Compare commits

...

116 Commits

Author SHA1 Message Date
0d56eddc59 release 2017.12.02 2017-12-02 21:34:34 +07:00
e25ee72657 [ChangeLog] Actualize 2017-12-02 21:29:06 +07:00
78593e294c Add references for #14844 2017-12-02 21:22:43 +07:00
593f2f7989 [downloader/fragment] Commit part file after each fragment
In order to obtain correct resume_len on next iteration
2017-12-02 21:21:11 +07:00
603fc4e0ea [extractor/common] Add durations for DASH fragments with bare SegmentURLs 2017-12-02 21:21:01 +07:00
41bf647e89 [extractor/common] Add support for DASH manifests with SegmentLists with bare SegmentURLs 2017-12-02 21:16:36 +07:00
fea92aa65d [xhamster] Fix extraction (closes #14884) 2017-12-02 19:04:59 +07:00
0981585bef [youku] Update ccode (closes #14872) 2017-12-02 18:16:22 +07:00
f5ac68d88f [mnet] fix format extraction(fixes #14883) 2017-11-30 23:45:33 +01:00
1663b32946 [xiami] add Referer header to api request 2017-11-29 20:36:55 +01:00
5ea765fb72 [mtv] correct scc extention in extracted subtitles(closes #13730) 2017-11-29 17:50:38 +01:00
fb61b57d0f [vvvvid] fix extraction for kenc videos(fixes #13406) 2017-11-29 16:09:45 +01:00
07cf18b9c5 [br] add support for BR Mediathek videos(fixes #14560)(fixes #14788) 2017-11-29 14:21:38 +01:00
5f699251e9 [daisuki] add support for motto.daisuki.com(fixes #14681) 2017-11-28 10:57:22 +01:00
a3474aa59e [Odnoklassniki] fix api metadata request(fixes #14862) 2017-11-28 09:04:51 +01:00
115afb77ec [itv] update hls formats extraction 2017-11-27 21:59:27 +01:00
53f024e7c5 [pbs] add another media id regex 2017-11-27 16:55:27 +01:00
ffe6979ef9 [utils] add hvc1 codec code to parse_codecs 2017-11-27 16:55:27 +01:00
dafb4c6647 [Makefile] Include setup.cfg in the tarball (closes #14857) 2017-11-27 22:49:35 +08:00
82a62de192 [Makefile,devscripts/run_tests.sh] Actually exclude network tests
Closes #14858
2017-11-27 21:32:06 +08:00
f58a506044 [test_InfoExtractor] Fix flake8 2017-11-27 21:30:47 +08:00
5ddeb7702a release 2017.11.26 2017-11-26 21:49:22 +07:00
6c07f0b288 [ChangeLog] Actualize 2017-11-26 21:37:27 +07:00
e94d1adc36 Add testdata to youtube-dl.tar.gz (closes #14854) 2017-11-26 21:10:32 +07:00
d08dcd2dbd [test_YoutubeDL] Fix typo (closes #14856) 2017-11-26 21:06:14 +07:00
7512aa986f Fix some only matching tests (closes #14855) 2017-11-26 20:53:10 +07:00
93f3f10cdc [fczenit] fix extraction 2017-11-25 19:28:26 +01:00
87dac57cf6 [firstpost] remove extractor 2017-11-25 18:50:15 +01:00
b485d5d6bf [nexx] make http format ids more consistent 2017-11-25 18:36:31 +01:00
a238a868ba [freespeech] fix extraction 2017-11-25 18:25:00 +01:00
c0f647a179 [nexx] extract more formats 2017-11-25 18:13:26 +01:00
6ff27b8d5a [openload] Don't use bare except when removing temp files 2017-11-26 00:05:28 +07:00
9ef909f2b2 [openload] Add support for openload.link 2017-11-26 00:04:13 +07:00
8cfbcfab9a [tnaflix] Extract common parts of tnaflix and empflix 2017-11-25 23:42:20 +07:00
b7785cf156 [empflix] Relax _VALID_URL 2017-11-25 23:42:20 +07:00
9105523818 [empflix] Fix extractrion 2017-11-25 23:42:20 +07:00
dbb25af657 [tnaflix] Don't modify download URLs (closes #14811) 2017-11-25 23:42:20 +07:00
fe4bfe36e1 [gamersyde] remove extractor 2017-11-25 15:58:28 +01:00
6f5c598a28 [france2.fr:generation-what] fix extraction 2017-11-25 15:49:49 +01:00
cd9ff4ec5b [massengeschmacktv] Add support for Massengeschmack TV(replaces Fernseh Kritik TV) 2017-11-24 20:00:01 +01:00
c6c6a64aa5 [fox9] fix extraction 2017-11-24 19:00:56 +01:00
e0a8686f48 [faz] fix extraction and add support for Perform Group embeds(fixes #14714) 2017-11-24 18:42:41 +01:00
6049176471 [ChangeLog] Update after #14828
[skip ci]
2017-11-24 21:40:51 +08:00
805f5bf759 [Generic] ie_key in JWPlatform test 72 2017-11-24 21:39:55 +08:00
32ad4f3faf [JWPlatform] Use non-capturing group in RE
Per @yan12125.
2017-11-24 21:39:55 +08:00
6899b1d9e8 [Generic] Update test 69 (suffolk/sjc)
suffolk.edu/sjc => suffolk.edu/sjc/live.php

Unfortunately it only transmits video a few mornings per month, so
leaving the 'skip' is probably appropriate. Updating the 'skip- to
include the calendar information though.
2017-11-24 21:39:55 +08:00
939be9adfe [JWPlatform] Support iframes
Support content.jwplatform... src attributes inside <iframe> tags in
addition to <script> tags. Just a regexp change.

Add a test (currently Generic_72).
2017-11-24 21:39:55 +08:00
2688664762 [culturebox] Fix extraction (closes #14827) 2017-11-23 06:39:11 +07:00
8f63941104 [youku] Fix extraction; update ccode (closes #14815) 2017-11-22 22:49:48 +08:00
a9efdf3d4a [livestream] make smil extraction non fatal(fixes #14792) 2017-11-19 12:59:31 +01:00
f610dbb05f [extractor/common] Use final URL when dumping request (closes #14769) 2017-11-18 19:04:56 +07:00
38db52adf3 [drtuber] Add support for mobile URLs 2017-11-17 01:50:07 +07:00
3192d4bc7a [spankbang] Add support for mobile URLs and fix test 2017-11-17 01:05:04 +07:00
9cbd4dda10 [instagram] Fix description, timestamp and counters extraction (closes #14755) 2017-11-15 22:14:54 +07:00
08e45b39e7 release 2017.11.15 2017-11-15 00:15:42 +07:00
fae0eb42ec [ChangeLog] Actualize 2017-11-15 00:02:54 +07:00
ea2295842f [common] skip Apple FairPlay m3u8 manifests(closes #14741) 2017-11-14 17:41:30 +01:00
a2b6aba8de [vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 22:50:15 +07:00
ff31f2d5c3 [vshare] Capture and output error message 2017-11-14 22:39:54 +07:00
0987f2ddb2 [vshare] Fix extraction (closes #14473) 2017-11-14 22:34:45 +07:00
5871ebac47 [YoutubeDL] Fix playlist range optimization for --playlist-items (closes #14740) 2017-11-14 01:43:20 +07:00
05dee6c520 [crunchyroll] extract old rtmp formats 2017-11-13 19:15:49 +01:00
27adc9ec65 [tva] fix extraction(closes #14736) 2017-11-13 11:24:15 +01:00
388beb86e0 [gamespot] add test for #14652 2017-11-13 10:30:12 +01:00
d4e31b72b9 [gamespot] lower the preference of http formats(#14652) 2017-11-13 10:24:35 +01:00
5fc12b9549 [instagram:user] Fix extraction (closes #14699) 2017-11-12 18:36:18 +07:00
af85ce29c6 [ccma] Fix typo 2017-11-12 13:25:21 +07:00
e4d9586562 Remove sensitive data from logging in messages 2017-11-11 20:52:12 +07:00
79d1f8ed68 [gamespot] add support for article URLS(closes #14652) 2017-11-11 13:03:16 +01:00
a5203935d6 [gamespot] skip Brightcove Once http formats(#14652) 2017-11-11 13:03:16 +01:00
59d2e6d04f [cartoonnetwork] Update tokenizer_src (closes #14666) 2017-11-11 04:59:48 +07:00
a9543e37c8 [wsj] Recognize another URL pattern (closes #14704) 2017-11-11 00:29:08 +08:00
61fb07e156 [pandatv] Modernize (closes #14693) 2017-11-09 23:30:25 +07:00
4222346fb2 [pandatv] Update API URL and sign format URLs 2017-11-09 23:26:46 +07:00
cc6a960e13 use older login method(closes #11572) 2017-11-08 20:30:05 +01:00
f34b841b51 release 2017.11.06 2017-11-06 22:39:24 +07:00
e0998333fa [ChangeLog] Actualize 2017-11-06 22:36:46 +07:00
909191de91 [hotstar:playlist] Fix issues and improve (closes #12465) 2017-11-05 19:15:40 +07:00
477c97f86b [hotstar:playlist] Add extractor 2017-11-05 19:15:34 +07:00
6e71bbf4ab [hotstar] Bypass geo restriction (closes #14672) 2017-11-05 16:12:56 +07:00
181e381fda [test_InfoExtractor] Add test for #14660 2017-11-04 22:15:58 +07:00
187ee66c94 [extractor/common] Add protocol for f4m formats 2017-11-04 22:11:39 +07:00
48107c198b [f4m] Prefer baseURL for relative URLs (closes #14660) 2017-11-04 22:10:55 +07:00
cd670befc4 [22tracks] Remove extractor (closes #11024) 2017-11-02 23:48:43 +07:00
44cca168cc [skysport] add support ooyala embed_token protected videos(fixes #14641) 2017-11-02 14:16:15 +01:00
b0f4331002 [gamespot] extract formats referenced with new data fields(#14652) 2017-11-02 13:30:50 +01:00
044eeb1455 [extractor/common] Respect URL query in _extract_wowza_formats (closes #14645) 2017-11-01 23:39:26 +07:00
8fe767e072 [spankbang] Detect unavailable videos (closes #14644) 2017-10-31 23:05:25 +07:00
6d0630d880 release 2017.10.29 2017-10-29 07:22:53 +07:00
518d357b46 [ChangeLog] Actualize 2017-10-29 07:21:33 +07:00
514e8aefd4 [egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00
9211e3319e [extractor/common] Prefix format id for audio only HLS formats 2017-10-29 07:05:55 +07:00
056653bbb1 [utils] Add support for zero years and months in parse_duration 2017-10-29 07:04:48 +07:00
c3206d02e9 [fxnetworks] Extract series metadata 2017-10-29 05:20:18 +07:00
eb4b5818e2 [younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436) 2017-10-29 04:18:43 +07:00
47a8587915 [younow] Add extractor 2017-10-29 04:17:03 +07:00
8e01f3ca81 [dctptv] Fix extraction (closes #14599) 2017-10-28 22:58:01 +07:00
f2332f18e6 [youtube] Restrict embed regex (#14600) 2017-10-27 22:26:43 +07:00
7c1f419341 [vimeo] Restrict iframe embed regex (closes #14600) 2017-10-27 22:21:47 +07:00
30e6161799 [soundgasm] Improve extraction (closes #14588) 2017-10-26 23:16:16 +07:00
dc24a7d4a2 [myvideo] Remove extractor (closes #8557)
Redirects to store.maxdome.de
2017-10-25 23:27:55 +07:00
d673ab6562 [nbc] Add support for classic-tv videos 2017-10-25 23:23:27 +07:00
b8c6ffc518 [vrtnu] Add support for cookies authentication and simplify (#11873) 2017-10-25 23:21:51 +07:00
7913e0fca7 [canvas] Add support for vrt.be/vrtnu (closes #11873) 2017-10-25 23:17:28 +07:00
cdd1ce92c4 [twitch:clips] Fix title extraction 2017-10-23 23:12:50 +07:00
55c727a547 [ndtv] Add support for sub-sites 2017-10-22 08:32:20 +07:00
36e2d3ca43 [dramafever] Fix login error message extraction 2017-10-22 08:16:30 +07:00
f7a5038305 [travis] Disable IRC notifications 2017-10-22 02:46:28 +07:00
9ff6273cae [nickru] Add support for more sites 2017-10-22 01:51:01 +07:00
f03ee0b372 [nickde] Add support for nickelodeon.be 2017-10-22 01:42:44 +07:00
cf6bda312b [nickde] Add support for nick.ch 2017-10-22 01:30:35 +07:00
3ebbd9991e [nick] Add support for more nickelodeon sites (closes #14553) 2017-10-22 01:26:58 +07:00
21ce434051 [travis] Enable IRC notifications
Let's see how is it verbose now
2017-10-21 02:14:25 +07:00
5c0e5bc4df [README.md] Add build status bagde 2017-10-21 02:11:11 +07:00
9a9de2d7b2 [travis] Allow download tests to fail and fast finish 2017-10-21 01:58:45 +07:00
424505df76 [azmedien] Fix test 2017-10-21 01:10:56 +07:00
93 changed files with 2322 additions and 990 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.20**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.12.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.12.02**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.10.20
[debug] youtube-dl version 2017.12.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -11,12 +11,12 @@ sudo: false
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
matrix:
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
script: ./devscripts/run_tests.sh
notifications:
email:
- filippo.valsorda@gmail.com
- yasoob.khld@gmail.com
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"
# skip_join: true

117
ChangeLog
View File

@ -1,3 +1,120 @@
version 2017.12.02
Core
+ [downloader/fragment] Commit part file after each fragment
+ [extractor/common] Add durations for DASH fragments with bare SegmentURLs
+ [extractor/common] Add support for DASH manifests with SegmentLists with
bare SegmentURLs (#14844)
+ [utils] Add hvc1 codec code to parse_codecs
Extractors
* [xhamster] Fix extraction (#14884)
* [youku] Update ccode (#14872)
* [mnet] Fix format extraction (#14883)
+ [xiami] Add Referer header to API request
* [mtv] Correct scc extention in extracted subtitles (#13730)
* [vvvvid] Fix extraction for kenc videos (#13406)
+ [br] Add support for BR Mediathek videos (#14560, #14788)
+ [daisuki] Add support for motto.daisuki.com (#14681)
* [odnoklassniki] Fix API metadata request (#14862)
* [itv] Fix HLS formats extraction
+ [pbs] Add another media id regular expression
version 2017.11.26
Core
* [extractor/common] Use final URL when dumping request (#14769)
Extractors
* [fczenit] Fix extraction
- [firstpost] Remove extractor
* [freespeech] Fix extraction
* [nexx] Extract more formats
+ [openload] Add support for openload.link (#14763)
* [empflix] Relax URL regular expression
* [empflix] Fix extractrion
* [tnaflix] Don't modify download URLs (#14811)
- [gamersyde] Remove extractor
* [francetv:generationwhat] Fix extraction
+ [massengeschmacktv] Add support for Massengeschmack TV
* [fox9] Fix extraction
* [faz] Fix extraction and add support for Perform Group embeds (#14714)
+ [performgroup] Add support for performgroup.com
+ [jwplatform] Add support for iframes (#14828)
* [culturebox] Fix extraction (#14827)
* [youku] Fix extraction; update ccode (#14815)
* [livestream] Make SMIL extraction non fatal (#14792)
+ [drtuber] Add support for mobile URLs (#14772)
+ [spankbang] Add support for mobile URLs (#14771)
* [instagram] Fix description, timestamp and counters extraction (#14755)
version 2017.11.15
Core
* [common] Skip Apple FairPlay m3u8 manifests (#14741)
* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)
Extractors
* [vshare] Capture and output error message
* [vshare] Fix extraction (#14473)
* [crunchyroll] Extract old RTMP formats
* [tva] Fix extraction (#14736)
* [gamespot] Lower preference of HTTP formats (#14652)
* [instagram:user] Fix extraction (#14699)
* [ccma] Fix typo (#14730)
- Remove sensitive data from logging in messages
* [instagram:user] Fix extraction (#14699)
+ [gamespot] Add support for article URLs (#14652)
* [gamespot] Skip Brightcove Once HTTP formats (#14652)
* [cartoonnetwork] Update tokenizer_src (#14666)
+ [wsj] Recognize another URL pattern (#14704)
* [pandatv] Update API URL and sign format URLs (#14693)
* [crunchyroll] Use old login method (#11572)
version 2017.11.06
Core
+ [extractor/common] Add protocol for f4m formats
* [f4m] Prefer baseURL for relative URLs (#14660)
* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
Extractors
+ [hotstar:playlist] Add support for playlists (#12465)
* [hotstar] Bypass geo restriction (#14672)
- [22tracks] Remove extractor (#11024, #14628)
+ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
* [gamespot] Extract formats referenced with new data fields (#14652)
* [spankbang] Detect unavailable videos (#14644)
version 2017.10.29
Core
* [extractor/common] Prefix format id for audio only HLS formats
+ [utils] Add support for zero years and months in parse_duration
Extractors
* [egghead] Fix extraction (#14388)
+ [fxnetworks] Extract series metadata (#14603)
+ [younow] Add support for younow.com (#9255, #9432, #12436)
* [dctptv] Fix extraction (#14599)
* [youtube] Restrict embed regex (#14600)
* [vimeo] Restrict iframe embed regex (#14600)
* [soundgasm] Improve extraction (#14588)
- [myvideo] Remove extractor (#8557)
+ [nbc] Add support for classic-tv videos (#14575)
+ [vrtnu] Add support for cookies authentication and simplify (#11873)
+ [canvas] Add support for vrt.be/vrtnu (#11873)
* [twitch:clips] Fix title extraction (#14566)
+ [ndtv] Add support for sub-sites (#14534)
* [dramafever] Fix login error message extraction
+ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
ro, hu) (#14553)
version 2017.10.20
Core

View File

@ -36,8 +36,17 @@ test:
ot: offlinetest
# Keep this list in sync with devscripts/run_tests.sh
offlinetest: codetest
$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
$(PYTHON) -m nose --verbose test \
--exclude test_age_restriction.py \
--exclude test_download.py \
--exclude test_iqiyi_sdk_interpreter.py \
--exclude test_socks.py \
--exclude test_subtitles.py \
--exclude test_write_annotations.py \
--exclude test_youtube_lists.py \
--exclude test_youtube_signature.py
tar: youtube-dl.tar.gz
@ -110,11 +119,10 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '*~' \
--exclude '__pycache__' \
--exclude '.git' \
--exclude 'testdata' \
--exclude 'docs/_build' \
-- \
bin devscripts test youtube_dl docs \
ChangeLog LICENSE README.md README.txt \
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
youtube-dl.zsh youtube-dl.fish setup.py \
youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
youtube-dl

View File

@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms
- [INSTALLATION](#installation)

View File

@ -1,6 +1,7 @@
#!/bin/bash
DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists"
# Keep this list in sync with the `offlinetest` target in Makefile
DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature"
test_set=""
multiprocess_args=""

View File

@ -3,8 +3,6 @@
- **1up.com**
- **20min**
- **220.ro**
- **22tracks:genre**
- **22tracks:track**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
@ -114,11 +112,12 @@
- **BokeCC**
- **BostonGlobe**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
- **BR**: Bayerischer Rundfunk
- **BravoTV**
- **Break**
- **brightcove:legacy**
- **brightcove:new**
- **BRMediathek**: Bayerischer Rundfunk Mediathek
- **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BuzzFeed**
@ -200,8 +199,8 @@
- **dailymotion:playlist**
- **dailymotion:user**
- **DailymotionCloud**
- **Daisuki**
- **DaisukiPlaylist**
- **DaisukiMotto**
- **DaisukiMottoPlaylist**
- **daum.net**
- **daum.net:clip**
- **daum.net:playlist**
@ -268,10 +267,8 @@
- **fc2**
- **fc2:embed**
- **Fczenit**
- **fernsehkritik.tv**
- **filmon**
- **filmon:channel**
- **Firstpost**
- **FiveTV**
- **Flickr**
- **Flipagram**
@ -285,7 +282,7 @@
- **foxnews:article**
- **foxnews:insider**
- **FoxSports**
- **france2.fr:generation-quoi**
- **france2.fr:generation-what**
- **FranceCulture**
- **FranceInter**
- **FranceTV**
@ -303,7 +300,6 @@
- **GameInformer**
- **GameOne**
- **gameone:playlist**
- **Gamersyde**
- **GameSpot**
- **GameStar**
- **Gaskrank**
@ -342,6 +338,7 @@
- **HornBunny**
- **HotNewHipHop**
- **HotStar**
- **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
- **HRTi**
@ -442,6 +439,7 @@
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
@ -498,7 +496,6 @@
- **MySpace:album**
- **MySpass**
- **Myvi**
- **myvideo** (Currently broken)
- **MyVidster**
- **n-tv.de**
- **natgeo**
@ -610,6 +607,7 @@
- **pcmag**
- **PearVideo**
- **People**
- **PerformGroup**
- **periscope**: Periscope
- **periscope:user**: Periscope user videos
- **PhilharmonieDeParis**: Philharmonie de Paris
@ -977,6 +975,7 @@
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
@ -1035,6 +1034,9 @@
- **YouJizz**
- **youku**: 优酷
- **youku:show**
- **YouNowChannel**
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com

View File

@ -562,7 +562,89 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 1920,
'height': 1080,
}]
),
), (
# https://github.com/rg3/youtube-dl/pull/14844
'urls_only',
'http://unknown/manifest.mpd',
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_144p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 200,
'width': 256,
'height': 144,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_240p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 400,
'width': 424,
'height': 240,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_360p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 800,
'width': 640,
'height': 360,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_480p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 1200,
'width': 856,
'height': 480,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_576p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 1600,
'width': 1024,
'height': 576,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_720p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 2400,
'width': 1280,
'height': 720,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_1080p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 4400,
'width': 1920,
'height': 1080,
}]
)
]
for mpd_file, mpd_url, expected_formats in _TEST_CASES:
@ -574,6 +656,33 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
'ext': 'flv',
'format_id': '2148',
'protocol': 'f4m',
'tbr': 2148,
'width': 1280,
'height': 720,
}]
),
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
if __name__ == '__main__':
unittest.main()

View File

@ -466,11 +466,11 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({'is_live': True})
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
ydl = YDL({'simulate': True, 'is_live': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')

View File

@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self):
self.assertEqual(

10
test/testdata/f4m/custom_base_url.f4m vendored Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest xmlns="http://ns.adobe.com/f4m/1.0">
<streamType>recorded</streamType>
<baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
<duration>269.293</duration>
<bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
<media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
<metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
</media>
</manifest>

218
test/testdata/mpd/urls_only.mpd vendored Normal file
View File

@ -0,0 +1,218 @@
<?xml version="1.0" ?>
<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
<Period duration="PT0H4M1.728S">
<AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
<ContentComponent contentType="video" id="1"/>
<Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -948,7 +948,8 @@ class YoutubeDL(object):
report_download(n_entries)
else: # iterable
if playlistitems:
entries = make_playlistitems_entries(list(ie_entries))
entries = make_playlistitems_entries(list(itertools.islice(
ie_entries, 0, max(playlistitems))))
else:
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))

View File

@ -243,8 +243,17 @@ def remove_encrypted_media(media):
media))
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
def _add_ns(prop, ver=1):
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
def get_base_url(manifest):
base_url = xpath_text(
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
return base_url
class F4mFD(FragmentFD):
@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
# From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:

View File

@ -107,6 +107,7 @@ class FragmentFD(FileDownloader):
def _append_fragment(self, ctx, frag_content):
try:
ctx['dest_stream'].write(frag_content)
ctx['dest_stream'].flush()
finally:
if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx)

View File

@ -78,7 +78,7 @@ class AnimeOnDemandIE(InfoExtractor):
post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in as %s' % username,
post_url, None, 'Logging in',
data=urlencode_postdata(login_form), headers={
'Referer': self._LOGIN_URL,
})

View File

@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
error = self._html_search_regex(
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',

View File

@ -47,7 +47,7 @@ class AZMedienIE(AZMedienBaseIE):
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': {
'id': '1_2444peh4',
'ext': 'mov',
'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
'uploader_id': 'TeleZ?ri',

View File

@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
login_error = self._html_search_regex(
r'(?s)<div class="messages error">(.+?)</div>',

View File

@ -1,20 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_duration,
parse_iso8601,
xpath_element,
xpath_text,
)
class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
IE_DESC = 'Bayerischer Rundfunk'
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_TESTS = [
@ -123,10 +126,10 @@ class BRIE(InfoExtractor):
for asset in assets.findall('asset'):
format_url = xpath_text(asset, ['downloadUrl', 'url'])
asset_type = asset.get('type')
if asset_type == 'HDS':
if asset_type.startswith('HDS'):
formats.extend(self._extract_f4m_formats(
format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
elif asset_type == 'HLS':
elif asset_type.startswith('HLS'):
formats.extend(self._extract_m3u8_formats(
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
else:
@ -169,3 +172,140 @@ class BRIE(InfoExtractor):
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails
class BRMediathekIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
_TESTS = [{
'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
'md5': 'fdc3d485835966d1622587d08ba632ec',
'info_dict': {
'id': 'av:5a1e6a6e8fce6d001871cc8e',
'ext': 'mp4',
'title': 'Die Sendung vom 28.11.2017',
'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
'timestamp': 1511942766,
'upload_date': '20171129',
}
}]
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._download_json(
'https://proxy-base.master.mango.express/graphql',
clip_id, data=json.dumps({
"query": """{
viewer {
clip(id: "%s") {
title
description
duration
createdAt
ageRestriction
videoFiles {
edges {
node {
publicLocation
fileSize
videoProfile {
width
height
bitrate
encoding
}
}
}
}
captionFiles {
edges {
node {
publicLocation
}
}
}
teaserImages {
edges {
node {
imageFiles {
edges {
node {
publicLocation
width
height
}
}
}
}
}
}
}
}
}""" % clip_id}).encode(), headers={
'Content-Type': 'application/json',
})['data']['viewer']['clip']
title = clip['title']
formats = []
for edge in clip.get('videoFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
ext = determine_ext(n_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
n_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
video_profile = node.get('videoProfile', {})
tbr = int_or_none(video_profile.get('bitrate'))
format_id = 'http'
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': n_url,
'width': int_or_none(video_profile.get('width')),
'height': int_or_none(video_profile.get('height')),
'tbr': tbr,
'filesize': int_or_none(node.get('fileSize')),
})
self._sort_formats(formats)
subtitles = {}
for edge in clip.get('captionFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
subtitles.setdefault('de', []).append({
'url': n_url,
})
thumbnails = []
for edge in clip.get('teaserImages', {}).get('edges', []):
for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
node = image_edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
thumbnails.append({
'url': n_url,
'width': int_or_none(node.get('width')),
'height': int_or_none(node.get('height')),
})
return {
'id': clip_id,
'title': title,
'description': clip.get('description'),
'duration': int_or_none(clip.get('duration')),
'timestamp': parse_iso8601(clip.get('createdAt')),
'age_limit': int_or_none(clip.get('ageRestriction')),
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
}

View File

@ -1,16 +1,22 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import (
float_or_none,
ExtractorError,
strip_or_none,
float_or_none,
int_or_none,
parse_iso8601,
)
class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
@ -166,3 +172,139 @@ class CanvasEenIE(InfoExtractor):
'title': title,
'description': self._og_search_description(webpage),
}
class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
'info_dict': {
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'flv',
'title': 'De zwarte weduwe',
'description': 'md5:d90c21dced7db869a85db89a623998d4',
'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$',
'season': '1',
'season_number': 1,
'episode_number': 1,
},
'skip': 'This video is only available for registered users'
}]
_NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CONTEXT_ID = 'R3595707040'
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
auth_data = {
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
}
auth_info = self._gigya_login(auth_data)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
# When requesting a token, no actual token is returned, but the
# necessary cookies are set.
self._request_webpage(
'https://token.vrt.be',
None, note='Requesting a token', errnote='Could not get a token',
headers={
'Content-Type': 'application/json',
'Referer': 'https://www.vrt.be/vrtnu/',
},
data=json.dumps({
'uid': auth_info['UID'],
'uidsig': auth_info['UIDSignature'],
'ts': auth_info['signatureTimestamp'],
'email': auth_info['profile']['email'],
}).encode('utf-8'))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
self.report_warning('Authentication failed')
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
else:
raise e
else:
break
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title').strip()
description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>',
webpage, 'description', default=None)
season = self._html_search_regex(
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
<span>seizoen\ (.+?)</span>\s*
</div>''',
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
webpage, 'season', default=None)
season_number = int_or_none(season)
episode_number = int_or_none(self._html_search_regex(
r'''(?xms)<div\ class="content__episode">\s*
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
</div>''',
webpage, 'episode_number', default=None))
release_date = parse_iso8601(self._html_search_regex(
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
webpage, 'release_date', default=None))
# If there's a ? or a # in the URL, remove them and everything after
clean_url = url.split('?')[0].split('#')[0].strip('/')
securevideo_url = clean_url + '.mssecurevideo.json'
try:
video = self._download_json(securevideo_url, display_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self.raise_login_required()
raise
# We are dealing with a '../<show>.relevant' URL
redirect_url = video.get('url')
if redirect_url:
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
# There is only one entry, but with an unknown key, so just get
# the first one
video_id = list(video.values())[0].get('videoid')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'season': season,
'season_number': season_number,
'episode_number': episode_number,
'release_date': release_date,
}

View File

@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE):
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
},
}, {
'url': url,

View File

@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor):
'description': clean_html(informacio.get('descripcio')),
'duration': duration,
'timestamp': timestamp,
'thumnails': thumbnails,
'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}

View File

@ -29,7 +29,10 @@ from ..compat import (
compat_urlparse,
compat_xml_parse_error,
)
from ..downloader.f4m import remove_encrypted_media
from ..downloader.f4m import (
get_base_url,
remove_encrypted_media,
)
from ..utils import (
NO_DEFAULT,
age_restricted,
@ -589,19 +592,11 @@ class InfoExtractor(object):
if not encoding:
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self._downloader.params.get('dump_intermediate_pages', False):
try:
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
self.to_screen('Dumping request to ' + url)
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self._downloader.params.get('write_pages', False):
try:
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
basen = '%s_%s' % (video_id, url)
basen = '%s_%s' % (video_id, urlh.geturl())
if len(basen) > 240:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
basen = basen[:240 - len(h)] + h
@ -1239,11 +1234,8 @@ class InfoExtractor(object):
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
manifest_base_url = get_base_url(manifest)
bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@ -1275,7 +1267,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@ -1310,6 +1302,7 @@ class InfoExtractor(object):
'url': manifest_url,
'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
'protocol': 'f4m',
'tbr': tbr,
'width': width,
'height': height,
@ -1355,6 +1348,9 @@ class InfoExtractor(object):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return []
if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
return []
formats = []
format_url = lambda u: (
@ -1401,7 +1397,7 @@ class InfoExtractor(object):
media_url = media.get('URI')
if media_url:
format_id = []
for v in (group_id, name):
for v in (m3u8_id, group_id, name):
if v:
format_id.append(v)
f = {
@ -1979,6 +1975,22 @@ class InfoExtractor(object):
})
segment_index += 1
representation_ms_info['fragments'] = fragments
elif 'segment_urls' in representation_ms_info:
# Segment URLs with no SegmentTimeline
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
# https://github.com/rg3/youtube-dl/pull/14844
fragments = []
segment_duration = float_or_none(
representation_ms_info['segment_duration'],
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
for segment_url in representation_ms_info['segment_urls']:
fragment = {
location_key(segment_url): segment_url,
}
if segment_duration:
fragment['duration'] = segment_duration
fragments.append(fragment)
representation_ms_info['fragments'] = fragments
# NB: MPD manifest may contain direct URLs to unfragmented media.
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
@ -2233,27 +2245,35 @@ class InfoExtractor(object):
return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
url_base = self._search_regex(
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
http_base_url = '%s:%s' % ('http', url_base)
formats = []
def manifest_url(manifest):
m_url = '%s/%s' % (http_base_url, manifest)
if query:
m_url += '?%s' % query
return m_url
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
manifest_url('playlist.m3u8'), video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
if 'f4m' not in skip_protocols:
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
manifest_url('manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'dash' not in skip_protocols:
formats.extend(self._extract_mpd_formats(
http_base_url + '/manifest.mpd',
manifest_url('manifest.mpd'),
video_id, mpd_id='dash', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
http_base_url + '/jwplayer.smil',
manifest_url('jwplayer.smil'),
video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()

View File

@ -38,11 +38,32 @@ class CrunchyrollBaseIE(InfoExtractor):
_LOGIN_FORM = 'login_form'
_NETRC_MACHINE = 'crunchyroll'
def _call_rpc_api(self, method, video_id, note=None, data=None):
data = data or {}
data['req'] = 'RpcApi' + method
data = compat_urllib_parse_urlencode(data).encode('utf-8')
return self._download_xml(
'http://www.crunchyroll.com/xml/',
video_id, note, fatal=False, data=data, headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
self._download_webpage(
'https://www.crunchyroll.com/?a=formhandler',
None, 'Logging in', 'Wrong login info',
data=urlencode_postdata({
'formname': 'RpcApiUser_Login',
'next_url': 'https://www.crunchyroll.com/acct/membership',
'name': username,
'password': password,
}))
'''
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@ -86,6 +107,7 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
'''
def _real_initialize(self):
self._login()
@ -365,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
sub_doc = self._call_rpc_api(
'Subtitle_GetXml', video_id,
'Downloading subtitles for ' + sub_name, data={
'subtitle_script_id': sub_id,
})
if not sub_doc:
continue
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
data = xpath_text(sub_doc, 'data', 'subtitle data')
if not sid or not iv or not data:
continue
subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
@ -444,15 +470,29 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in available_fmts:
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (video_id, stream_format, stream_quality),
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata = self._download_xml(
streamdata_req, video_id,
note='Downloading media info for %s' % video_format)
stream_infos = []
streamdata = self._call_rpc_api(
'VideoPlayer_GetStandardConfig', video_id,
'Downloading media info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_quality': stream_quality,
'current_page': url,
})
if streamdata:
stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info:
stream_infos.append(stream_info)
stream_info = self._call_rpc_api(
'VideoEncode_GetStreamInfo', video_id,
'Downloading stream info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
if stream_info:
stream_infos.append(stream_info)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id')
if video_encode_id in video_encode_ids:
continue
@ -473,7 +513,6 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
metadata = stream_info.find('./metadata')
format_info = {
'format': video_format,
'format_id': video_format,
'height': int_or_none(xpath_text(metadata, './height')),
'width': int_or_none(xpath_text(metadata, './width')),
}
@ -486,23 +525,24 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
if self._is_valid_url(direct_video_url, video_id, video_format):
format_info.update({
'format_id': 'http-' + video_format,
'url': direct_video_url,
})
formats.append(format_info)
continue
format_info.update({
'format_id': 'rtmp-' + video_format,
'url': video_url,
'play_path': video_file,
'ext': 'flv',
})
formats.append(format_info)
self._sort_formats(formats)
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
metadata = self._download_xml(
'http://www.crunchyroll.com/xml', video_id,
note='Downloading media info', query={
'req': 'RpcApiVideoPlayer_GetMediaMetadata',
metadata = self._call_rpc_api(
'VideoPlayer_GetMediaMetadata', video_id,
note='Downloading media info', data={
'media_id': video_id,
})

View File

@ -13,33 +13,30 @@ from ..aes import (
from ..utils import (
bytes_to_intlist,
bytes_to_long,
clean_html,
extract_attributes,
ExtractorError,
intlist_to_bytes,
get_element_by_id,
js_to_json,
int_or_none,
long_to_bytes,
pkcs1pad,
remove_end,
)
class DaisukiIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html'
class DaisukiMottoIE(InfoExtractor):
_VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
_TEST = {
'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html',
'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
'info_dict': {
'id': '11213',
'id': 'V2e',
'ext': 'mp4',
'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS',
'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
'subtitles': {
'mul': [{
'ext': 'ttml',
}],
},
'creator': 'BANDAI NAMCO Entertainment',
},
'params': {
'skip_download': True, # AES-encrypted HLS stream
@ -73,7 +70,9 @@ class DaisukiIE(InfoExtractor):
n, e = self._RSA_KEY
encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={
init_data = self._download_json(
'http://motto.daisuki.net/fastAPI/bgn/init/',
video_id, query={
's': flashvars.get('s', ''),
'c': flashvars.get('ss3_prm', ''),
'e': url,
@ -98,14 +97,11 @@ class DaisukiIE(InfoExtractor):
aes_key, iv)).decode('utf-8').rstrip('\0'),
video_id)
title = rtn['title_str']
formats = self._extract_m3u8_formats(
rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
title = remove_end(self._og_search_title(webpage), ' - DAISUKI')
creator = self._html_search_regex(
r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False)
subtitles = {}
caption_url = rtn.get('caption_url')
if caption_url:
@ -120,21 +116,18 @@ class DaisukiIE(InfoExtractor):
'title': title,
'formats': formats,
'subtitles': subtitles,
'creator': creator,
}
class DaisukiPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html'
class DaisukiMottoPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
_TEST = {
'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html',
'url': 'http://motto.daisuki.net/information/',
'info_dict': {
'id': 'TheIdolMasterCG',
'title': 'THE IDOLM@STER CINDERELLA GIRLS',
'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8',
'title': 'DRAGON BALL SUPER',
},
'playlist_count': 26,
'playlist_mincount': 117,
}
def _real_extract(self, url):
@ -142,18 +135,19 @@ class DaisukiPlaylistIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
episode_pattern = r'''(?sx)
<img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+?
<p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)'''
entries = [{
entries = []
for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
attr = extract_attributes(li)
ad_id = attr.get('data-ad_id')
product_id = attr.get('data-product_id')
if ad_id and product_id:
episode_id = attr.get('data-chapter')
entries.append({
'_type': 'url_transparent',
'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'),
'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
'episode_id': episode_id,
'episode_number': int_or_none(episode_id),
} for movie_id, episode_id in re.findall(episode_pattern, webpage)]
'ie_key': 'DaisukiMotto',
})
playlist_title = remove_end(
self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI')
playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage))
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')

View File

@ -2,53 +2,85 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import unified_strdate
from ..compat import compat_str
from ..utils import (
float_or_none,
unified_strdate,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'mp4',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
object_id = self._html_search_meta('DC.identifier', webpage)
webpage = self._download_webpage(url, display_id)
servers_json = self._download_json(
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
video_id, note='Downloading server list')
server = servers_json[0]['server']
m3u8_path = self._search_regex(
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
formats = self._extract_m3u8_formats(
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
entry_protocol='m3u8_native')
video_id = self._html_search_meta(
'DC.identifier', webpage, 'video id',
default=None) or self._search_regex(
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
title = self._og_search_title(webpage)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
note='Downloading server list', fatal=False)
if servers:
endpoint = next(
server['endpoint']
for server in servers
if isinstance(server.get('endpoint'), compat_str) and
'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
app = self._search_regex(
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
formats = [{
'url': endpoint,
'app': app,
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
'ext': 'flv',
}]
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
duration = float_or_none(self._search_regex(
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
default=None), scale=1000)
return {
'id': object_id,
'id': video_id,
'title': title,
'formats': formats,
'display_id': video_id,
'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@ -54,12 +54,12 @@ class DramaFeverBaseIE(AMPIE):
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form))
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
error = self._html_search_regex(
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)

View File

@ -10,7 +10,7 @@ from ..utils import (
class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor):
}, {
'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True,
}, {
'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
'only_matching': True,
}]
@staticmethod

View File

@ -2,7 +2,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
try_get,
unified_timestamp,
@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
'id': 'professor-frisby-introduces-composable-functional-javascript',
'id': '72',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
lessons = self._download_json(
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
playlist_id, 'Downloading course lessons JSON')
entries = [
self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
entries = []
for lesson in lessons:
lesson_url = lesson.get('http_url')
if not lesson_url or not isinstance(lesson_url, compat_str):
continue
lesson_id = lesson.get('id')
if lesson_id:
lesson_id = compat_str(lesson_id)
entries.append(self.url_result(
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id,
playlist_id, 'Downloading course JSON', fatal=False) or {}
playlist_id = course.get('id')
if playlist_id:
playlist_id = compat_str(playlist_id)
return self.playlist_result(
entries, playlist_id, course.get('title'),
@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
_TEST = {
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
'id': 'fv5yotjxcg',
'id': '1196',
'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
},
'params': {
'skip_download': True,
'format': 'bestvideo',
},
}
}, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True,
}]
def _real_extract(self, url):
lesson_id = self._match_id(url)
display_id = self._match_id(url)
lesson = self._download_json(
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
lesson_id = compat_str(lesson['id'])
title = lesson['title']
formats = []
for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, lesson_id, 'mp4', entry_protocol='m3u8',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, lesson_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
})
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'ie_key': 'Wistia',
'url': 'wistia:%s' % lesson['wistia_id'],
'id': lesson['wistia_id'],
'title': lesson.get('title'),
'id': lesson_id,
'display_id': display_id,
'title': title,
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
'series': try_get(
lesson, lambda x: x['series']['title'], compat_str),
'formats': formats,
}

View File

@ -127,7 +127,10 @@ from .bloomberg import BloombergIE
from .bokecc import BokeCCIE
from .bostonglobe import BostonGlobeIE
from .bpb import BpbIE
from .br import BRIE
from .br import (
BRIE,
BRMediathekIE,
)
from .bravotv import BravoTVIE
from .breakcom import BreakIE
from .brightcove import (
@ -150,6 +153,7 @@ from .canalc2 import Canalc2IE
from .canvas import (
CanvasIE,
CanvasEenIE,
VrtNUIE,
)
from .carambatv import (
CarambaTVIE,
@ -245,8 +249,8 @@ from .dailymotion import (
DailymotionCloudIE,
)
from .daisuki import (
DaisukiIE,
DaisukiPlaylistIE,
DaisukiMottoIE,
DaisukiMottoPlaylistIE,
)
from .daum import (
DaumIE,
@ -343,11 +347,9 @@ from .filmon import (
FilmOnIE,
FilmOnChannelIE,
)
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
from .fivetv import FiveTVIE
from .fktv import FKTVIE
from .flickr import FlickrIE
from .flipagram import FlipagramIE
from .folketinget import FolketingetIE
@ -374,7 +376,7 @@ from .francetv import (
FranceTVIE,
FranceTVEmbedIE,
FranceTVInfoIE,
GenerationQuoiIE,
GenerationWhatIE,
CultureboxIE,
)
from .freesound import FreesoundIE
@ -390,7 +392,6 @@ from .gameone import (
GameOneIE,
GameOnePlaylistIE,
)
from .gamersyde import GamersydeIE
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gaskrank import GaskrankIE
@ -431,7 +432,10 @@ from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
from .hornbunny import HornBunnyIE
from .hotnewhiphop import HotNewHipHopIE
from .hotstar import HotStarIE
from .hotstar import (
HotStarIE,
HotStarPlaylistIE,
)
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .hrti import (
@ -568,6 +572,7 @@ from .mangomolo import (
MangomoloLiveIE,
)
from .manyvids import ManyVidsIE
from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .mediaset import MediasetIE
@ -623,7 +628,6 @@ from .mwave import MwaveIE, MwaveMeetGreetIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import MyviIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
@ -786,6 +790,7 @@ from .patreon import PatreonIE
from .pbs import PBSIE
from .pearvideo import PearVideoIE
from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import (
PeriscopeIE,
PeriscopeUserIE,
@ -1110,10 +1115,6 @@ from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentytwotracks import (
TwentyTwoTracksIE,
TwentyTwoTracksGenreIE
)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@ -1335,6 +1336,11 @@ from .youku import (
YoukuIE,
YoukuShowIE,
)
from .younow import (
YouNowLiveIE,
YouNowChannelIE,
YouNowMomentIE,
)
from .youporn import YouPornIE
from .yourupload import YourUploadIE
from .youtube import (

View File

@ -1,7 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_etree_fromstring
from ..utils import (
xpath_element,
xpath_text,
@ -43,10 +46,15 @@ class FazIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
description = self._og_search_description(webpage)
config_xml_url = self._search_regex(
r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml')
media = self._html_search_regex(
r"data-videojs-media='([^']+)",
webpage, 'media')
if media == 'extern':
perform_url = self._search_regex(
r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})",
webpage, 'perform url')
return self.url_result(perform_url)
config = compat_etree_fromstring(media)
encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = []
@ -55,12 +63,24 @@ class FazIE(InfoExtractor):
if encoding is not None:
encoding_url = xpath_text(encoding, 'FILENAME')
if encoding_url:
formats.append({
tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000)
if tbr:
tbr = int_or_none(tbr.replace(',', '.'))
f = {
'url': encoding_url,
'format_id': code.lower(),
'quality': pref,
'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
'tbr': tbr,
'vcodec': xpath_text(encoding, 'CODEC'),
}
mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url)
if mobj:
f.update({
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
'tbr': tbr or int(mobj.group(3)),
})
formats.append(f)
self._sort_formats(formats)
return {

View File

@ -2,7 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
float_or_none,
)
class FczenitIE(InfoExtractor):
@ -14,6 +17,8 @@ class FczenitIE(InfoExtractor):
'id': '41044',
'ext': 'mp4',
'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',
'timestamp': 1462283735,
'upload_date': '20160503',
},
}
@ -21,28 +26,31 @@ class FczenitIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(
r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title')
msi_id = self._search_regex(
r"(?s)config\s*=\s*{.+?video_id\s*:\s*'([^']+)'", webpage, 'msi id')
video_items = self._parse_json(self._search_regex(
r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'),
video_id)
def merge_dicts(*dicts):
ret = {}
for a_dict in dicts:
ret.update(a_dict)
return ret
msi_data = self._download_json(
'http://player.fc-zenit.ru/msi/video', msi_id, query={
'video': msi_id,
})['data']
title = msi_data['name']
formats = [{
'url': compat_urlparse.urljoin(url, video_url),
'tbr': int(tbr),
} for tbr, video_url in merge_dicts(*video_items).items()]
'format_id': q.get('label'),
'url': q['url'],
'height': int_or_none(q.get('label')),
} for q in msi_data['qualities'] if q.get('url')]
self._sort_formats(formats)
tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')]
return {
'id': video_id,
'title': video_title,
'title': title,
'thumbnail': msi_data.get('preview'),
'formats': formats,
'duration': float_or_none(msi_data.get('duration')),
'timestamp': int_or_none(msi_data.get('date')),
'tags': tags,
}

View File

@ -1,50 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class FirstpostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
_TEST = {
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',
'md5': 'ee9114957692f01fb1263ed87039112a',
'info_dict': {
'id': '1025403',
'ext': 'mp4',
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
'description': 'md5:feef3041cb09724e0bdc02843348f5f4',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
description = self._html_search_meta('twitter:description', page, 'title')
data = self._download_xml(
'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
'Downloading video XML')
item = data.find('./playlist/item')
thumbnail = item.find('./image').text
formats = [
{
'url': details.find('./file').text,
'format_id': details.find('./label').text.strip(),
'width': int(details.find('./width').text.strip()),
'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text
]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -1,51 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
js_to_json,
)
class FKTVIE(InfoExtractor):
IE_NAME = 'fernsehkritik.tv'
_VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
_TEST = {
'url': 'http://fernsehkritik.tv/folge-1',
'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
'info_dict': {
'id': '1',
'ext': 'mp4',
'title': 'Folge 1 vom 10. April 2007',
'thumbnail': r're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
episode = self._match_id(url)
webpage = self._download_webpage(
'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
formats = []
for source in sources:
furl = source.get('src')
if furl:
formats.append({
'url': furl,
'format_id': determine_ext(furl),
})
self._sort_formats(formats)
return {
'id': episode,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals
from .anvato import AnvatoIE
from ..utils import js_to_json
class FOX9IE(AnvatoIE):
@ -34,9 +33,9 @@ class FOX9IE(AnvatoIE):
video_id = self._parse_json(
self._search_regex(
r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
r"this\.videosJson\s*=\s*'(\[.+?\])';",
webpage, 'anvato playlist'),
video_id, transform_source=js_to_json)[0]['video']
video_id)[0]['video']
return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',

View File

@ -3,7 +3,6 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..compat import compat_urlparse
@ -308,31 +307,32 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
return self._extract_video(video_id, catalogue)
class GenerationQuoiIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
class GenerationWhatIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-what'
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
_TESTS = [{
'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
'info_dict': {
'id': 'k7FJX8VBcvvLmX4wA5Q',
'id': 'wtvKYUG45iw',
'ext': 'mp4',
'title': 'Génération Quoi - Garde à Vous',
'uploader': 'Génération Quoi',
'title': 'Generation What - Garde à vous - FRA',
'uploader': 'Generation What',
'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
'upload_date': '20160411',
},
'params': {
# It uses Dailymotion
'skip_download': True,
},
}
}, {
'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
info_json = self._download_webpage(info_url, display_id)
info = json.loads(info_json)
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
ie='Dailymotion')
webpage = self._download_webpage(url, display_id)
youtube_id = self._search_regex(
r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
webpage, 'youtube id')
return self.url_result(youtube_id, 'Youtube', youtube_id)
class CultureboxIE(FranceTVBaseInfoExtractor):
@ -363,6 +363,6 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
raise ExtractorError('Video %s is not available' % name, expected=True)
video_id, catalogue = self._search_regex(
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
r'"https?://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
return self._extract_video(video_id, catalogue)

View File

@ -1,37 +1,34 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class FreespeechIE(InfoExtractor):
IE_NAME = 'freespeech.org'
_VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)'
_VALID_URL = r'https?://(?:www\.)?freespeech\.org/stories/(?P<id>.+)'
_TEST = {
'add_ie': ['Youtube'],
'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
'url': 'http://www.freespeech.org/stories/fcc-announces-net-neutrality-rollback-whats-stake/',
'info_dict': {
'id': 'poKsVCZ64uU',
'ext': 'webm',
'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'uploader': 'freespeechtv',
'id': 'waRk6IPqyWM',
'ext': 'mp4',
'title': 'What\'s At Stake - Net Neutrality Special',
'description': 'Presented by MNN and FSTV',
'upload_date': '20170728',
'uploader_id': 'freespeechtv',
'upload_date': '20121002',
'uploader': 'freespeechtv',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
info = json.loads(info_json)
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
youtube_url = self._search_regex(
r'data-video-url="([^"]+)"',
webpage, 'youtube url')
return {
'_type': 'url',
'url': info['jw_player']['basic_video_node_player']['file'],
'url': youtube_url,
'ie_key': 'Youtube',
}

View File

@ -57,7 +57,7 @@ class FunimationIE(InfoExtractor):
try:
data = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
None, 'Logging in as %s' % username, data=urlencode_postdata({
None, 'Logging in', data=urlencode_postdata({
'username': username,
'password': password,
}))

View File

@ -3,27 +3,31 @@ from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
extract_attributes,
int_or_none,
parse_age_limit,
smuggle_url,
update_url_query,
)
class FXNetworksIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fxnetworks.com/video/719841347694',
'md5': '1447d4722e42ebca19e5232ab93abb22',
'url': 'http://www.fxnetworks.com/video/1032565827847',
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
'info_dict': {
'id': '719841347694',
'id': 'dRzwHC_MMqIv',
'ext': 'mp4',
'title': 'Vanpage',
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
'title': 'First Look: Better Things - Season 2',
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
'age_limit': 14,
'uploader': 'NEWA-FNG-FX',
'upload_date': '20160706',
'timestamp': 1467844741,
'upload_date': '20170825',
'timestamp': 1503686274,
'episode_number': 0,
'season_number': 2,
'series': 'Better Things',
},
'add_ie': ['ThePlatform'],
}, {
@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE):
'id': video_id,
'title': title,
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
'series': video_data.get('data-show-title'),
'episode_number': int_or_none(video_data.get('data-episode')),
'season_number': int_or_none(video_data.get('data-season')),
'thumbnail': video_data.get('data-large-thumb'),
'age_limit': parse_age_limit(rating),
'ie_key': 'ThePlatform',

View File

@ -1,70 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
parse_duration,
remove_start,
)
class GamersydeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
_TEST = {
'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
'md5': 'f38d400d32f19724570040d5ce3a505f',
'info_dict': {
'id': '34371',
'ext': 'mp4',
'duration': 372,
'title': 'Bloodborne - Birth of a hero',
'thumbnail': r're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
playlist = self._parse_json(
self._search_regex(
r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
display_id, transform_source=js_to_json)
formats = []
for source in playlist['sources']:
video_url = source.get('file')
if not video_url:
continue
format_id = source.get('label')
f = {
'url': video_url,
'format_id': format_id,
}
m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
if m:
f.update({
'height': int(m.group('height')),
'fps': int(m.group('fps')),
})
formats.append(f)
self._sort_formats(formats)
title = remove_start(playlist['title'], '%s - ' % video_id)
thumbnail = playlist.get('image')
duration = parse_duration(self._search_regex(
r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(OnceIE):
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@ -35,6 +35,12 @@ class GameSpotIE(OnceIE):
'params': {
'skip_download': True, # m3u8 downloads
},
}, {
'url': 'https://www.gamespot.com/videos/embed/6439218/',
'only_matching': True,
}, {
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
'only_matching': True,
}]
def _real_extract(self, url):
@ -52,7 +58,7 @@ class GameSpotIE(OnceIE):
manifest_url = f4m_url
formats.extend(self._extract_f4m_formats(
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
m3u8_url = streams.get('m3u8_stream')
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
if m3u8_url:
manifest_url = m3u8_url
m3u8_formats = self._extract_m3u8_formats(
@ -60,7 +66,7 @@ class GameSpotIE(OnceIE):
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
progressive_url = dict_get(
streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
if progressive_url and manifest_url:
qualities_basename = self._search_regex(
r'/([^/]+)\.csmil/',
@ -105,7 +111,8 @@ class GameSpotIE(OnceIE):
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
if onceux_url:
formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
http_formats_preference=-1))
if not formats:
for quality in ['sd', 'hd']:

View File

@ -102,6 +102,7 @@ from .joj import JojIE
from .megaphone import MegaphoneIE
from .vzaar import VzaarIE
from .channel9 import Channel9IE
from .vshare import VShareIE
class GenericIE(InfoExtractor):
@ -1098,9 +1099,9 @@ class GenericIE(InfoExtractor):
},
# jwplayer rtmp
{
'url': 'http://www.suffolk.edu/sjc/',
'url': 'http://www.suffolk.edu/sjc/live.php',
'info_dict': {
'id': 'sjclive',
'id': 'live',
'ext': 'flv',
'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
'uploader': 'www.suffolk.edu',
@ -1108,7 +1109,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'does not contain a video anymore',
'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
},
# Complex jwplayer
{
@ -1135,6 +1136,19 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
{
# JWPlatform iframe
'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
'info_dict': {
'id': 'O0c5JcKT',
'ext': 'mp4',
'upload_date': '20171122',
'timestamp': 1511366290,
'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
},
'add_ie': [JWPlatformIE.ie_key()],
},
{
# Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
@ -1921,6 +1935,16 @@ class GenericIE(InfoExtractor):
'title': 'Rescue Kit 14 Free Edition - Getting started',
},
'playlist_count': 4,
},
{
# vshare embed
'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
'md5': '17b39f55b5497ae8b59f5fbce8e35886',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
}
}
# {
# # TODO: find another test
@ -2879,6 +2903,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
vshare_urls = VShareIE._extract_urls(webpage)
if vshare_urls:
return self.playlist_from_matches(
vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():

View File

@ -0,0 +1,22 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
urlencode_postdata,
)
class GigyaBaseIE(InfoExtractor):
def _gigya_login(self, auth_data):
auth_info = self._download_json(
'https://accounts.eu1.gigya.com/accounts.login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(auth_data))
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
if error_message:
raise ExtractorError(
'Unable to login: %s' % error_message, expected=True)
return auth_info

View File

@ -1,22 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
ExtractorError,
int_or_none,
)
class HotStarIE(InfoExtractor):
class HotStarBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['IN']
def _download_json(self, *args, **kwargs):
response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
if response['resultCode'] != 'OK':
if kwargs.get('fatal'):
raise ExtractorError(
response['errorDescription'], expected=True)
return None
return response['resultObj']
def _download_content_info(self, content_id):
return self._download_json(
'https://account.hotstar.com/AVS/besc', content_id, query={
'action': 'GetAggregatedContentDetails',
'appVersion': '5.0.40',
'channel': 'PCTV',
'contentId': content_id,
})['contentInfo'][0]
class HotStarIE(HotStarBaseIE):
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TESTS = [{
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
'info_dict': {
'id': '1000076273',
'ext': 'mp4',
'title': 'On Air With AIB - English',
'title': 'On Air With AIB',
'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
'timestamp': 1447227000,
'upload_date': '20151111',
@ -34,23 +59,11 @@ class HotStarIE(InfoExtractor):
'only_matching': True,
}]
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
json_data = super(HotStarIE, self)._download_json(
url_or_request, video_id, note, fatal=fatal, query=query)
if json_data['resultCode'] != 'OK':
if fatal:
raise ExtractorError(json_data['errorDescription'])
return None
return json_data['resultObj']
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://account.hotstar.com/AVS/besc', video_id, query={
'action': 'GetAggregatedContentDetails',
'channel': 'PCTV',
'contentId': video_id,
})['contentInfo'][0]
video_data = self._download_content_info(video_id)
title = video_data['episodeTitle']
if video_data.get('encrypted') == 'Y':
@ -99,3 +112,51 @@ class HotStarIE(InfoExtractor):
'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('contentTitle'),
}
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
'info_dict': {
'id': '14812',
},
'playlist_mincount': 75,
}, {
'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
'only_matching': True,
}]
_ITEM_TYPES = {
'episodes': 'EPISODE',
'popular-clips': 'CLIPS',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
base_url = mobj.group('url')
content_id = mobj.group('content_id')
playlist_type = mobj.group('type')
content_info = self._download_content_info(content_id)
playlist_id = compat_str(content_info['categoryId'])
collection = self._download_json(
'https://search.hotstar.com/AVS/besc', playlist_id, query={
'action': 'SearchContents',
'appVersion': '5.0.40',
'channel': 'PCTV',
'moreFilters': 'series:%s;' % playlist_id,
'query': '*',
'searchOrder': 'last_broadcast_date desc,year desc,title asc',
'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
})
entries = [
self.url_result(
'%s/_/%s' % (base_url, video['contentId']),
ie=HotStarIE.ie_key(), video_id=video['contentId'])
for video in collection['response']['docs']
if video.get('contentId')]
return self.playlist_result(entries, playlist_id)

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
@ -7,7 +8,6 @@ from ..compat import compat_str
from ..utils import (
get_element_by_attribute,
int_or_none,
limit_length,
lowercase_escape,
try_get,
)
@ -130,13 +130,21 @@ class InstagramIE(InfoExtractor):
video_url = media.get('video_url')
height = int_or_none(media.get('dimensions', {}).get('height'))
width = int_or_none(media.get('dimensions', {}).get('width'))
description = media.get('caption')
description = try_get(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str) or media.get('caption')
thumbnail = media.get('display_src')
timestamp = int_or_none(media.get('date'))
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
uploader = media.get('owner', {}).get('full_name')
uploader_id = media.get('owner', {}).get('username')
like_count = int_or_none(media.get('likes', {}).get('count'))
comment_count = int_or_none(media.get('comments', {}).get('count'))
def get_count(key, kind):
return int_or_none(try_get(
media, (lambda x: x['edge_media_%s' % key]['count'],
lambda x: x['%ss' % kind]['count'])))
like_count = get_count('preview_like', 'like')
comment_count = get_count('to_comment', 'comment')
comments = [{
'author': comment.get('user', {}).get('username'),
'author_id': comment.get('user', {}).get('id'),
@ -212,7 +220,7 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
@ -221,82 +229,79 @@ class InstagramUserIE(InfoExtractor):
'id': 'porsche',
'title': 'porsche',
},
'playlist_mincount': 2,
'playlist': [{
'info_dict': {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
},
}],
'playlist_count': 5,
'params': {
'extract_flat': True,
'skip_download': True,
'playlistend': 5,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader_id = mobj.group('username')
def _entries(self, uploader_id):
query = {
'__a': 1,
}
entries = []
page_count = 0
media_url = 'http://instagram.com/%s/media' % uploader_id
while True:
def get_count(kind):
return int_or_none(try_get(
node, lambda x: x['%ss' % kind]['count']))
for page_num in itertools.count(1):
page = self._download_json(
media_url, uploader_id,
note='Downloading page %d ' % (page_count + 1),
)
page_count += 1
'https://instagram.com/%s/' % uploader_id, uploader_id,
note='Downloading page %d' % page_num,
fatal=False, query=query)
if not page:
break
for it in page['items']:
if it.get('type') != 'video':
nodes = try_get(page, lambda x: x['user']['media']['nodes'], list)
if not nodes:
break
max_id = None
for node in nodes:
node_id = node.get('id')
if node_id:
max_id = node_id
if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True:
continue
video_id = node.get('code')
if not video_id:
continue
like_count = int_or_none(it.get('likes', {}).get('count'))
user = it.get('user', {})
formats = [{
'format_id': k,
'height': v.get('height'),
'width': v.get('width'),
'url': v['url'],
} for k, v in it['videos'].items()]
self._sort_formats(formats)
info = self.url_result(
'https://instagram.com/p/%s/' % video_id,
ie=InstagramIE.ie_key(), video_id=video_id)
thumbnails_el = it.get('images', {})
thumbnail = thumbnails_el.get('thumbnail', {}).get('url')
description = try_get(
node, [lambda x: x['caption'], lambda x: x['text']['id']],
compat_str)
thumbnail = node.get('thumbnail_src') or node.get('display_src')
timestamp = int_or_none(node.get('date'))
# In some cases caption is null, which corresponds to None
# in python. As a result, it.get('caption', {}) gives None
title = (it.get('caption') or {}).get('text', it['id'])
comment_count = get_count('comment')
like_count = get_count('like')
view_count = int_or_none(node.get('video_views'))
entries.append({
'id': it['id'],
'title': limit_length(title, 80),
'formats': formats,
info.update({
'description': description,
'thumbnail': thumbnail,
'webpage_url': it.get('link'),
'uploader': user.get('full_name'),
'uploader_id': user.get('username'),
'timestamp': timestamp,
'comment_count': comment_count,
'like_count': like_count,
'timestamp': int_or_none(it.get('created_time')),
'view_count': view_count,
})
if not page['items']:
break
max_id = page['items'][-1]['id'].split('_')[0]
media_url = (
'http://instagram.com/%s/media?max_id=%s' % (
uploader_id, max_id))
yield info
return {
'_type': 'playlist',
'entries': entries,
'id': uploader_id,
'title': uploader_id,
}
if not max_id:
break
query['max_id'] = max_id
def _real_extract(self, url):
uploader_id = self._match_id(url)
return self.playlist_result(
self._entries(uploader_id), uploader_id, uploader_id)

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import uuid
import xml.etree.ElementTree as etree
import json
import re
from .common import InfoExtractor
from ..compat import (
@ -142,9 +143,9 @@ class ITVIE(InfoExtractor):
f['url'] = rtmp_url
formats.append(f)
ios_playlist_url = params.get('data-video-playlist')
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
hmac = params.get('data-video-hmac')
if ios_playlist_url and hmac:
if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url):
headers = self.geo_verification_headers()
headers.update({
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
@ -159,12 +160,12 @@ class ITVIE(InfoExtractor):
'token': ''
},
'device': {
'manufacturer': 'Apple',
'model': 'iPad',
'manufacturer': 'Safari',
'model': '5',
'os': {
'name': 'iPhone OS',
'version': '9.3',
'type': 'ios'
'name': 'Windows NT',
'version': '6.1',
'type': 'desktop'
}
},
'client': {
@ -173,10 +174,10 @@ class ITVIE(InfoExtractor):
},
'variantAvailability': {
'featureset': {
'min': ['hls', 'aes'],
'max': ['hls', 'aes']
'min': ['hls', 'aes', 'outband-webvtt'],
'max': ['hls', 'aes', 'outband-webvtt']
},
'platformTag': 'mobile'
'platformTag': 'dotcom'
}
}).encode(), headers=headers, fatal=False)
if ios_playlist:

View File

@ -24,7 +24,7 @@ class JWPlatformIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
r'<(?:script|iframe)[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
webpage)
if mobj:
return mobj.group('url')

View File

@ -114,7 +114,7 @@ class LivestreamIE(InfoExtractor):
smil_url = video_data.get('smil_url')
if smil_url:
formats.extend(self._extract_smil_formats(smil_url, video_id))
formats.extend(self._extract_smil_formats(smil_url, video_id, fatal=False))
m3u8_url = video_data.get('m3u8_url')
if m3u8_url:

View File

@ -0,0 +1,77 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
int_or_none,
js_to_json,
mimetype2ext,
parse_filesize,
)
class MassengeschmackTVIE(InfoExtractor):
IE_NAME = 'massengeschmack.tv'
_VALID_URL = r'https?://(?:www\.)?massengeschmack\.tv/play/(?P<id>[^?&#]+)'
_TEST = {
'url': 'https://massengeschmack.tv/play/fktv202',
'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
'info_dict': {
'id': 'fktv202',
'ext': 'mp4',
'title': 'Fernsehkritik-TV - Folge 202',
},
}
def _real_extract(self, url):
episode = self._match_id(url)
webpage = self._download_webpage(url, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
formats = []
for source in sources:
furl = source.get('src')
if not furl:
continue
furl = self._proto_relative_url(furl)
ext = determine_ext(furl) or mimetype2ext(source.get('type'))
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
furl, episode, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': furl,
'format_id': determine_ext(furl),
})
for (durl, format_id, width, height, filesize) in re.findall(r'''(?x)
<a[^>]+?href="(?P<url>(?:https:)?//[^"]+)".*?
<strong>(?P<format_id>.+?)</strong>.*?
<small>(?:(?P<width>\d+)x(?P<height>\d+))?\s+?\((?P<filesize>[\d,]+\s*[GM]iB)\)</small>
''', webpage):
formats.append({
'url': durl,
'format_id': format_id,
'width': int_or_none(width),
'height': int_or_none(height),
'filesize': parse_filesize(filesize),
'vcodec': 'none' if format_id.startswith('Audio') else None,
})
self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr'))
return {
'id': episode,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -2,19 +2,18 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
try_get,
unified_timestamp,
urlencode_postdata,
)
class MedialaanIE(InfoExtractor):
class MedialaanIE(GigyaBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:www\.|nieuws\.)?
@ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor):
'password': password,
}
auth_info = self._download_json(
'https://accounts.eu1.gigya.com/accounts.login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(auth_data))
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
if error_message:
raise ExtractorError(
'Unable to login: %s' % error_message, expected=True)
auth_info = self._gigya_login(auth_data)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']

View File

@ -40,21 +40,29 @@ class MnetIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
# TODO: extract rtmp formats
# no stype -> rtmp url
# stype=H -> m3u8 url
# stype=M -> mpd url
info = self._download_json(
'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id,
video_id, 'Downloading vod config JSON')['data']['info']
'http://content.api.mnet.com/player/vodConfig',
video_id, 'Downloading vod config JSON', query={
'id': video_id,
'ctype': 'CLIP',
'stype': 'H',
})['data']['info']
title = info['title']
rtmp_info = self._download_json(
info['cdn'], video_id, 'Downloading vod cdn JSON')
formats = [{
'url': rtmp_info['serverurl'] + rtmp_info['fileurl'],
'ext': 'flv',
'page_url': url,
'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318',
}]
cdn_data = self._download_json(
info['cdn'], video_id, 'Downloading vod cdn JSON')['data'][0]
m3u8_url = cdn_data['url']
token = cdn_data.get('token')
if token and token != '-':
m3u8_url += '?' + token
formats = self._extract_wowza_formats(
m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m'])
self._sort_formats(formats)
description = info.get('ment')
duration = parse_duration(info.get('time'))

View File

@ -115,10 +115,17 @@ class MTVServicesInfoExtractor(InfoExtractor):
if transcript.get('kind') != 'captions':
continue
lang = transcript.get('srclang')
subtitles[lang] = [{
'url': compat_str(typographic.get('src')),
'ext': typographic.get('format')
} for typographic in transcript.findall('./typographic')]
for typographic in transcript.findall('./typographic'):
sub_src = typographic.get('src')
if not sub_src:
continue
ext = typographic.get('format')
if ext == 'cea-608':
ext = 'scc'
subtitles.setdefault(lang, []).append({
'url': compat_str(sub_src),
'ext': ext
})
return subtitles
def _get_video_info(self, itemdoc, use_hls=True):

View File

@ -1,177 +0,0 @@
from __future__ import unicode_literals
import binascii
import base64
import hashlib
import re
import json
from .common import InfoExtractor
from ..compat import (
compat_ord,
compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
class MyVideoIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
IE_NAME = 'myvideo'
_TEST = {
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
'md5': '2d2753e8130479ba2cb7e0a37002053e',
'info_dict': {
'id': '8229274',
'ext': 'flv',
'title': 'bowling-fail-or-win',
}
}
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19
# https://github.com/rg3/youtube-dl/pull/842
def __rc4crypt(self, data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
box[i], box[x] = box[x], box[i]
x = 0
y = 0
out = ''
for char in data:
x = (x + 1) % 256
y = (y + box[x]) % 256
box[x], box[y] = box[y], box[x]
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
return out
def __md5(self, s):
return hashlib.md5(s).hexdigest().encode()
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
b'TnpsbA0KTVRkbU1tSTRNdz09'
)
# Get video webpage
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
if mobj is not None:
self.report_extraction(video_id)
video_url = mobj.group(1) + '.flv'
video_title = self._html_search_regex('<title>([^<]+)</title>',
webpage, 'title')
return {
'id': video_id,
'url': video_url,
'title': video_title,
}
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {
'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
raise ExtractorError('Unable to extract video')
params = {}
encxml = ''
sec = mobj.group(1)
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
if not a == '_encxml':
params[a] = b
else:
encxml = compat_urllib_parse_unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning('avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
) % video_id
# get enc data
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
enc_data_b = binascii.unhexlify(enc_data)
sk = self.__md5(
base64.b64decode(base64.b64decode(GK)) +
self.__md5(
str(video_id).encode('utf-8')
)
)
dec_data = self.__rc4crypt(enc_data_b, sk)
# extracting infos
self.report_extraction(video_id)
video_url = None
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj:
video_url = compat_urllib_parse_unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self.report_warning(
'Rewriting URL to use unencrypted rtmp:// ...',
video_id)
video_url = video_url.replace('rtmpe://', 'rtmp://')
if not video_url:
# extract non rtmp videos
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError('unable to extract url')
video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
video_file = compat_urllib_parse_unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
else:
video_playpath = ''
video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, 'title')
return {
'id': video_id,
'url': video_url,
'tc_url': video_url,
'title': video_title,
'ext': 'flv',
'play_path': video_playpath,
'player_url': video_swfobj,
}

View File

@ -15,7 +15,7 @@ from ..utils import (
class NBCIE(AdobePassIE):
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
{
@ -67,7 +67,11 @@ class NBCIE(AdobePassIE):
'skip_download': True,
},
'skip': 'Only works from US',
}
},
{
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
'only_matching': True,
},
]
def _real_extract(self, url):

View File

@ -1,45 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote_plus
)
from ..utils import (
int_or_none,
parse_duration,
remove_end,
unified_strdate,
urljoin
)
class NDTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
_VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
_TEST = {
'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
_TESTS = [
{
'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
'info_dict': {
'id': '300710',
'id': '468818',
'ext': 'mp4',
'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
'upload_date': '20131208',
'duration': 1327,
'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
'description': 'md5:f410512f1b49672e5695dea16ef2731d',
'upload_date': '20170928',
'duration': 2218,
'thumbnail': r're:https?://.*\.jpg',
},
}
},
{
# __filename is url
'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
'md5': 'f1d709352305b44443515ac56b45aa46',
'info_dict': {
'id': '470304',
'ext': 'mp4',
'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
'upload_date': '20171019',
'duration': 137,
'thumbnail': r're:https?://.*\.jpg',
}
},
{
'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
'only_matching': True
},
{
'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
'only_matching': True
},
{
'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
'only_matching': True
},
{
'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
'only_matching': True
},
{
'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
'only_matching': True
},
{
'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
'only_matching': True
},
{
'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
'only_matching': True
},
{
'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
'only_matching': True
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - NDTV')
# '__title' does not contain extra words such as sub-site name, "Video" etc.
title = compat_urllib_parse_unquote_plus(
self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
self._og_search_title(webpage))
filename = self._search_regex(
r"__filename='([^']+)'", webpage, 'video filename')
video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
# in "movies" sub-site pages, filename is URL
video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
duration = int_or_none(self._search_regex(
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
# "doctor" sub-site has MM:SS format
duration = parse_duration(self._search_regex(
r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
# "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
upload_date = unified_strdate(self._html_search_meta(
'publish-date', webpage, 'upload date', fatal=False))
'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
description = remove_end(self._og_search_description(webpage), ' (Read more)')

View File

@ -28,7 +28,7 @@ class NexxIE(InfoExtractor):
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
'md5': '16746bfc28c42049492385c989b26c4a',
'md5': '828cea195be04e66057b846288295ba1',
'info_dict': {
'id': '128907',
'ext': 'mp4',
@ -42,9 +42,6 @@ class NexxIE(InfoExtractor):
'timestamp': 1384264416,
'upload_date': '20131112',
},
'params': {
'format': 'bestvideo',
},
}, {
# episode
'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
@ -62,7 +59,6 @@ class NexxIE(InfoExtractor):
'season_number': 2,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
@ -193,35 +189,67 @@ class NexxIE(InfoExtractor):
stream_data = video['streamdata']
language = general.get('language_raw') or ''
# TODO: reverse more cdns and formats
# TODO: reverse more cdns
cdn = stream_data['cdnType']
assert cdn == 'azure'
azure_locator = stream_data['azureLocator']
AZURE_URL = 'http://nx-p%02d.akamaized.net/'
AZURE_URL = 'http://nx%s%02d.akamaized.net/'
for secure in ('s', ''):
cdn_shield = stream_data.get('cdnShieldHTTP%s' % secure.upper())
def get_cdn_shield_base(shield_type='', prefix='-p'):
for secure in ('', 's'):
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
if cdn_shield:
azure_base = 'http%s://%s' % (secure, cdn_shield)
break
return 'http%s://%s' % (secure, cdn_shield)
else:
azure_base = AZURE_URL % int(stream_data['azureAccount'].replace('nexxplayplus', ''))
return AZURE_URL % (prefix, int(stream_data['azureAccount'].replace('nexxplayplus', '')))
azure_stream_base = get_cdn_shield_base()
is_ml = ',' in language
azure_m3u8_url = '%s%s/%s_src%s.ism/Manifest(format=m3u8-aapl)' % (
azure_base, azure_locator, video_id, ('_manifest' if is_ml else ''))
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
protection_token = try_get(
video, lambda x: x['protectiondata']['token'], compat_str)
if protection_token:
azure_m3u8_url += '?hdnts=%s' % protection_token
azure_manifest_url += '?hdnts=%s' % protection_token
formats = self._extract_m3u8_formats(
azure_m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='%s-hls' % cdn)
azure_manifest_url % '(format=m3u8-aapl)',
video_id, 'mp4', 'm3u8_native',
m3u8_id='%s-hls' % cdn, fatal=False)
formats.extend(self._extract_mpd_formats(
azure_manifest_url % '(format=mpd-time-csf)',
video_id, mpd_id='%s-dash' % cdn, fatal=False))
formats.extend(self._extract_ism_formats(
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
azure_progressive_base = get_cdn_shield_base('Prog', '-d')
azure_file_distribution = stream_data.get('azureFileDistribution')
if azure_file_distribution:
fds = azure_file_distribution.split(',')
if fds:
for fd in fds:
ss = fd.split(':')
if len(ss) == 2:
tbr = int_or_none(ss[0])
if tbr:
f = {
'url': '%s%s/%s_src_%s_%d.mp4' % (
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
'format_id': '%s-http-%d' % (cdn, tbr),
'tbr': tbr,
}
width_height = ss[1].split('x')
if len(width_height) == 2:
f.update({
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
})
formats.append(f)
self._sort_formats(formats)
return {

View File

@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de'
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True,
@ -91,6 +91,21 @@ class NickDeIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
'only_matching': True,
}, {
'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
'only_matching': True,
}]
def _extract_mrss_url(self, webpage, host):
@ -132,13 +147,28 @@ class NickNightIE(NickDeIE):
class NickRuIE(MTVServicesInfoExtractor):
IE_NAME = 'nickelodeonru'
_VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -70,7 +70,7 @@ class NocoIE(InfoExtractor):
return
login = self._download_json(
self._LOGIN_URL, None, 'Logging in as %s' % username,
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata({
'a': 'login',
'cookie': '1',

View File

@ -14,6 +14,7 @@ from ..utils import (
int_or_none,
qualities,
unescapeHTML,
urlencode_postdata,
)
@ -56,7 +57,7 @@ class OdnoklassnikiIE(InfoExtractor):
'url': 'http://ok.ru/video/64211978996595-1',
'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
'info_dict': {
'id': '64211978996595-1',
'id': 'V_VztHT5BzY',
'ext': 'mp4',
'title': 'Космическая среда от 26 августа 2015',
'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
@ -127,9 +128,14 @@ class OdnoklassnikiIE(InfoExtractor):
if metadata:
metadata = self._parse_json(metadata, video_id)
else:
data = {}
st_location = flashvars.get('location')
if st_location:
data['st.location'] = st_location
metadata = self._download_json(
compat_urllib_parse_unquote(flashvars['metadataUrl']),
video_id, 'Downloading metadata JSON')
video_id, 'Downloading metadata JSON',
data=urlencode_postdata(data))
movie = metadata['movie']

View File

@ -11,7 +11,7 @@ class OnceIE(InfoExtractor):
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
def _extract_once_formats(self, url):
def _extract_once_formats(self, url, http_formats_preference=None):
domain_id, application_id, media_item_id = re.match(
OnceIE._VALID_URL, url).groups()
formats = self._extract_m3u8_formats(
@ -35,6 +35,7 @@ class OnceIE(InfoExtractor):
'format_id': adaptive_format['format_id'].replace(
'hls', 'http'),
'protocol': 'http',
'preference': http_formats_preference,
})
progressive_formats.append(progressive_format)
self._check_formats(progressive_formats, media_item_id)

View File

@ -140,7 +140,7 @@ class PhantomJSwrapper(object):
for name in self._TMP_FILE_NAMES:
try:
os.remove(self._TMP_FILES[name].name)
except:
except (IOError, OSError):
pass
def _save_cookies(self, url):
@ -242,7 +242,7 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
@ -286,6 +286,9 @@ class OpenloadIE(InfoExtractor):
}, {
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
'only_matching': True,
}, {
'url': 'http://www.openload.link/f/KnG-kKZdcfY',
'only_matching': True,
}]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'

View File

@ -49,13 +49,13 @@ class ORFTVthekIE(InfoExtractor):
'params': {
'skip_download': True, # rtsp downloads
},
'_skip': 'Blocked outside of Austria / Germany',
'skip': 'Blocked outside of Austria / Germany',
}, {
'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
'skip_download': True,
'only_matching': True,
}, {
'url': 'http://tvthek.orf.at/profile/Universum/35429',
'skip_download': True,
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -33,7 +33,7 @@ class PandaTVIE(InfoExtractor):
video_id = self._match_id(url)
config = self._download_json(
'https://www.panda.tv/api_room?roomid=%s' % video_id, video_id)
'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
error_code = config.get('errno', 0)
if error_code is not 0:
@ -66,6 +66,11 @@ class PandaTVIE(InfoExtractor):
plflag1 = '4'
live_panda = 'live_panda' if plflag0 < 1 else ''
plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
sign = plflag_auth['auth']['sign']
ts = plflag_auth['auth']['time']
rid = plflag_auth['auth']['rid']
quality_key = qualities(['OD', 'HD', 'SD'])
suffix = ['_small', '_mid', '']
formats = []
@ -77,8 +82,8 @@ class PandaTVIE(InfoExtractor):
continue
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
formats.append({
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext),
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
% (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
'format_id': '%s-%s' % (k, ext),
'quality': quality,
'source_preference': pref,

View File

@ -67,7 +67,7 @@ class PatreonIE(InfoExtractor):
'https://www.patreon.com/processLogin',
compat_urllib_parse_urlencode(login_form).encode('utf-8')
)
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
login_page = self._download_webpage(request, None, note='Logging in')
if re.search(r'onLoginFailed', login_page):
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)

View File

@ -421,6 +421,7 @@ class PBSIE(InfoExtractor):
r'class="coveplayerid">([^<]+)<', # coveplayer
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
]
media_id = self._search_regex(

View File

@ -0,0 +1,83 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class PerformGroupIE(InfoExtractor):
_VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})'
_TESTS = [{
# http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html
'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab',
'md5': '259cb03d142e2e52471e8837ecacb29f',
'info_dict': {
'id': 'xgrwobuzumes1lwjxtcdpwgxd',
'ext': 'mp4',
'title': 'Liga MX: Keine Einsicht nach Horrorfoul',
'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b',
'timestamp': 1511533477,
'upload_date': '20171124',
}
}]
def _call_api(self, service, auth_token, content_id, referer_url):
return self._download_json(
'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id),
content_id, headers={
'Referer': referer_url,
'Origin': 'http://player.performgroup.com',
}, query={
'_fmt': 'json',
})
def _real_extract(self, url):
player_id, auth_token = re.search(self._VALID_URL, url).groups()
bootstrap = self._call_api('bootstrap', auth_token, player_id, url)
video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0]
video_id = video['uuid']
vod = self._call_api('vod', auth_token, video_id, url)
media = vod['videos']['video'][0]['media']
formats = []
hls_url = media.get('hls', {}).get('url')
if hls_url:
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
hds_url = media.get('hds', {}).get('url')
if hds_url:
formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False))
for c in media.get('content', []):
c_url = c.get('url')
if not c_url:
continue
tbr = int_or_none(c.get('bitrate'), 1000)
format_id = 'http'
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': c_url,
'tbr': tbr,
'width': int_or_none(c.get('width')),
'height': int_or_none(c.get('height')),
'filesize': int_or_none(c.get('fileSize')),
'vcodec': c.get('type'),
'fps': int_or_none(c.get('videoFrameRate')),
'vbr': int_or_none(c.get('videoRate'), 1000),
'abr': int_or_none(c.get('audioRate'), 1000),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video['title'],
'description': video.get('description'),
'thumbnail': video.get('poster'),
'duration': int_or_none(video.get('duration')),
'timestamp': int_or_none(video.get('publishedTime'), 1000),
'formats': formats,
}

View File

@ -116,7 +116,7 @@ class PluralsightIE(PluralsightBaseIE):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in as %s' % username,
post_url, None, 'Logging in',
data=urlencode_postdata(login_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'})

View File

@ -68,7 +68,7 @@ class RoosterTeethIE(InfoExtractor):
login_request = self._download_webpage(
self._LOGIN_URL, None,
note='Logging in as %s' % username,
note='Logging in',
data=urlencode_postdata(login_form),
headers={
'Referer': self._LOGIN_URL,

View File

@ -21,7 +21,7 @@ class RozhlasIE(InfoExtractor):
}
}, {
'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed',
'skip_download': True,
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -61,7 +61,7 @@ class SafariBaseIE(InfoExtractor):
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
login_page = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
if not is_logged(login_page):
raise ExtractorError(

View File

@ -2,7 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import strip_or_none
from ..utils import (
extract_attributes,
smuggle_url,
strip_or_none,
urljoin,
)
class SkySportsIE(InfoExtractor):
@ -22,12 +27,22 @@ class SkySportsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = extract_attributes(self._search_regex(
r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
video_url = 'ooyala:%s' % video_data['data-video-id']
if video_data.get('data-token-required') == 'true':
token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
token_fetch_url = token_fetch_options.get('url')
if token_fetch_url:
embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
if embed_token:
video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'ooyala:%s' % self._search_regex(
r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
'url': video_url,
'title': self._og_search_title(webpage),
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',

View File

@ -8,36 +8,49 @@ from .common import InfoExtractor
class SoundgasmIE(InfoExtractor):
IE_NAME = 'soundgasm'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'md5': '010082a2c802c5275bb00030743e75ad',
'info_dict': {
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
'ext': 'm4a',
'title': 'ytdl_Piano-sample',
'description': 'Royalty Free Sample Music'
'title': 'Piano sample',
'description': 'Royalty Free Sample Music',
'uploader': 'ytdl',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('title')
audio_title = mobj.group('user') + '_' + mobj.group('title')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
audio_url = self._html_search_regex(
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
audio_id = re.split(r'\/|\.', audio_url)[-2]
r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'audio URL', group='url')
title = self._search_regex(
r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
webpage, 'title', default=display_id)
description = self._html_search_regex(
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
fatal=False)
(r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
r'(?s)<li>Description:\s(.*?)<\/li>'),
webpage, 'description', fatal=False)
audio_id = self._search_regex(
r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
return {
'id': audio_id,
'display_id': display_id,
'url': audio_url,
'title': audio_title,
'description': description
'vcodec': 'none',
'title': title,
'description': description,
'uploader': mobj.group('user'),
}

View File

@ -3,10 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class SpankBangIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
_VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
_TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7',
@ -14,7 +15,7 @@ class SpankBangIE(InfoExtractor):
'id': '3vvn',
'ext': 'mp4',
'title': 'fantasy solo',
'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
'description': 'Watch fantasy solo free HD porn video - 05 minutes - Babe,Masturbation,Solo,Toy - dillion harper masturbates on a bed free adult movies sexy clips.',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'silly2587',
'age_limit': 18,
@ -27,12 +28,20 @@ class SpankBangIE(InfoExtractor):
# no uploader
'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
'only_matching': True,
}, {
# mobile page
'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError(
'Video %s is not available' % video_id, expected=True)
stream_key = self._html_search_regex(
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
webpage, 'stream key')

View File

@ -21,6 +21,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
r'flashvars\.config\s*=\s*escape\("([^"]+)"',
r'<input[^>]+name="config\d?" value="([^"]+)"',
]
_HOST = 'tna'
_VKEY_SUFFIX = ''
_TITLE_REGEX = r'<input[^>]+name="title" value="([^"]+)"'
_DESCRIPTION_REGEX = r'<input[^>]+name="description" value="([^"]+)"'
_UPLOADER_REGEX = r'<input[^>]+name="username" value="([^"]+)"'
@ -72,7 +74,13 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') if 'display_id' in mobj.groupdict() else video_id
for display_id_key in ('display_id', 'display_id_2'):
if display_id_key in mobj.groupdict():
display_id = mobj.group(display_id_key)
if display_id:
break
else:
display_id = video_id
webpage = self._download_webpage(url, display_id)
@ -81,8 +89,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
if not cfg_url:
inputs = self._hidden_inputs(webpage)
cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
% (inputs['vkey'], inputs['nkey'], video_id))
cfg_url = ('https://cdn-fck.%sflix.com/%sflix/%s%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
% (self._HOST, self._HOST, inputs['vkey'], self._VKEY_SUFFIX, inputs['nkey'], video_id))
cfg_xml = self._download_xml(
cfg_url, display_id, 'Downloading metadata',
@ -91,7 +99,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
formats = []
def extract_video_url(vl):
return re.sub(r'speed=\d+', 'speed=', unescapeHTML(vl.text))
# Any URL modification now results in HTTP Error 403: Forbidden
return unescapeHTML(vl.text)
video_link = cfg_xml.find('./videoLink')
if video_link is not None:
@ -192,18 +201,21 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
webpage)]
class TNAFlixIE(TNAFlixNetworkBaseIE):
class TNAEMPFlixBaseIE(TNAFlixNetworkBaseIE):
_DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
_UPLOADER_REGEX = r'<span>by\s*<a[^>]+\bhref=["\']/profile/[^>]+>([^<]+)<'
_CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
class TNAFlixIE(TNAEMPFlixBaseIE):
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
_TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
_DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
_UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h\d+>(.+?)<'
_CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
_TESTS = [{
# anonymous uploader, no categories
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
'md5': '7e569419fe6d69543d01e6be22f5f7c4',
'info_dict': {
'id': '553878',
'display_id': 'Carmella-Decesare-striptease',
@ -228,7 +240,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
'duration': 164,
'age_limit': 18,
'uploader': 'bobwhite39',
'categories': ['Amateur Porn', 'Squirting Videos', 'Teen Girls 18+'],
'categories': list,
}
}, {
'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
@ -236,14 +248,15 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
}]
class EMPFlixIE(TNAFlixNetworkBaseIE):
_VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
class EMPFlixIE(TNAEMPFlixBaseIE):
_VALID_URL = r'https?://(?:www\.)?empflix\.com/(?:videos/(?P<display_id>.+?)-|[^/]+/(?P<display_id_2>[^/]+)/video)(?P<id>[0-9]+)'
_UPLOADER_REGEX = r'<span[^>]+class="infoTitle"[^>]*>Uploaded By:</span>(.+?)</li>'
_HOST = 'emp'
_VKEY_SUFFIX = '-1'
_TESTS = [{
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
'md5': 'bc30d48b91a7179448a0bda465114676',
'info_dict': {
'id': '33051',
'display_id': 'Amateur-Finger-Fuck',
@ -259,6 +272,9 @@ class EMPFlixIE(TNAFlixNetworkBaseIE):
}, {
'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
'only_matching': True,
}, {
'url': 'https://www.empflix.com/amateur-porn/Amateur-Finger-Fuck/video33051',
'only_matching': True,
}]

View File

@ -32,6 +32,8 @@ class TVAIE(InfoExtractor):
video_data = self._download_json(
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
'Accept': 'application/json',
}, query={
'appId': '5955fc5f23eec60006c951f1',
})
def get_attribute(key):

View File

@ -1,86 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
# 22Tracks regularly replace the audio tracks that can be streamed on their
# site. The tracks usually expire after 1 months, so we can't add tests.
class TwentyTwoTracksIE(InfoExtractor):
_VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
IE_NAME = '22tracks:track'
_API_BASE = 'http://22tracks.com/api'
def _extract_info(self, city, genre_name, track_id=None):
item_id = track_id if track_id else genre_name
cities = self._download_json(
'%s/cities' % self._API_BASE, item_id,
'Downloading cities info',
'Unable to download cities info')
city_id = [x['id'] for x in cities if x['slug'] == city][0]
genres = self._download_json(
'%s/genres/%s' % (self._API_BASE, city_id), item_id,
'Downloading %s genres info' % city,
'Unable to download %s genres info' % city)
genre = [x for x in genres if x['slug'] == genre_name][0]
genre_id = genre['id']
tracks = self._download_json(
'%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
'Downloading %s genre tracks info' % genre_name,
'Unable to download track info')
return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
def _get_track_url(self, filename, track_id):
token = self._download_json(
'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
track_id, 'Downloading token', 'Unable to download token')
return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
def _extract_track_info(self, track_info, track_id):
download_url = self._get_track_url(track_info['filename'], track_id)
title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
return {
'id': track_id,
'url': download_url,
'ext': 'mp3',
'title': title,
'duration': int_or_none(track_info.get('duration')),
'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
city = mobj.group('city')
genre = mobj.group('genre')
track_id = mobj.group('id')
track_info = self._extract_info(city, genre, track_id)
return self._extract_track_info(track_info, track_id)
class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
_VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
IE_NAME = '22tracks:genre'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
city = mobj.group('city')
genre = mobj.group('genre')
genre_title, tracks = self._extract_info(city, genre)
entries = [
self._extract_track_info(track_info, track_info['id'])
for track_info in tracks]
return self.playlist_result(entries, genre, genre_title)

View File

@ -101,7 +101,7 @@ class TwitchBaseIE(InfoExtractor):
fail(clean_html(login_page))
redirect_page, handle = login_step(
login_page, handle, 'Logging in as %s' % username, {
login_page, handle, 'Logging in', {
'username': username,
'password': password,
})
@ -609,7 +609,7 @@ class TwitchClipsIE(InfoExtractor):
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
video_id, transform_source=js_to_json)
title = clip.get('channel_title') or self._og_search_title(webpage)
title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
formats = [{
'url': option['source'],

View File

@ -164,7 +164,7 @@ class UdemyIE(InfoExtractor):
})
response = self._download_webpage(
self._LOGIN_URL, None, 'Logging in as %s' % username,
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata(login_form),
headers={
'Referer': self._ORIGIN_URL,

View File

@ -28,10 +28,10 @@ class VidziIE(InfoExtractor):
},
}, {
'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
'skip_download': True,
'only_matching': True,
}, {
'url': 'http://vidzi.cc/cghql9yq6emu.html',
'skip_download': True,
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -99,7 +99,7 @@ class VikiBaseIE(InfoExtractor):
login = self._call_api(
'sessions.json', None,
'Logging in as %s' % username, post_data=login_form)
'Logging in', post_data=login_form)
self._token = login.get('token')
if not self._token:

View File

@ -412,7 +412,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
urls = []
# Look for embedded (iframe) Vimeo player
for mobj in re.finditer(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
webpage):
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
PLAIN_EMBED_RE = (

View File

@ -67,7 +67,7 @@ class VKBaseIE(InfoExtractor):
login_page = self._download_webpage(
'https://login.vk.com/?act=login', None,
note='Logging in as %s' % username,
note='Logging in',
data=urlencode_postdata(login_form))
if re.search(r'onLoginFailed', login_page):

View File

@ -1,14 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
decode_packed_codes,
ExtractorError,
)
class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://vshare.io/d/0f64ce6',
'md5': '16d7b8fef58846db47419199ff1ab3e7',
'md5': '17b39f55b5497ae8b59f5fbce8e35886',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
@ -19,20 +26,49 @@ class VShareIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
webpage)
def _extract_packed(self, webpage):
packed = self._search_regex(
r'(eval\(function.+)', webpage, 'packed code')
unpacked = decode_packed_codes(packed)
digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
digits = [int(digit) for digit in digits.split(',')]
key_digit = self._search_regex(
r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
chars = [compat_chr(d - int(key_digit)) for d in digits]
return ''.join(chars)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://vshare.io/d/%s' % video_id, video_id)
'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
video_id)
title = self._html_search_regex(
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
video_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
webpage, 'video url', group='url')
r'<title>([^<]+)</title>', webpage, 'title')
title = title.split(' - ')[0]
return {
error = self._html_search_regex(
r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
'error', default=None)
if error:
raise ExtractorError(error, expected=True)
info = self._parse_html5_media_entries(
url, '<video>%s</video>' % self._extract_packed(webpage),
video_id)[0]
self._sort_formats(info['formats'])
info.update({
'id': video_id,
'title': title,
'url': video_url,
}
})
return info

View File

@ -22,6 +22,9 @@ class VVVVIDIE(InfoExtractor):
'ext': 'mp4',
'title': 'Ping Pong',
},
'params': {
'skip_download': True,
},
}, {
# video_type == 'video/rcs'
'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
@ -31,6 +34,9 @@ class VVVVIDIE(InfoExtractor):
'ext': 'mp4',
'title': 'Episodio 01',
},
'params': {
'skip_download': True,
},
}]
_conn_id = None
@ -116,8 +122,20 @@ class VVVVIDIE(InfoExtractor):
embed_code = ds(embed_code)
video_type = video_data.get('video_type')
if video_type in ('video/rcs', 'video/kenc'):
formats.extend(self._extract_akamai_formats(
embed_code, video_id))
embed_code = re.sub(r'https?://([^/]+)/z/', r'https://\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
if video_type == 'video/kenc':
kenc = self._download_json(
'https://www.vvvvid.it/kenc', video_id, query={
'action': 'kt',
'conn_id': self._conn_id,
'url': embed_code,
}, fatal=False) or {}
kenc_message = kenc.get('message')
if kenc_message:
embed_code += '?' + ds(kenc_message)
formats.extend(self._extract_m3u8_formats(
embed_code, video_id, 'mp4',
m3u8_id='hls', fatal=False))
else:
formats.extend(self._extract_wowza_formats(
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))

View File

@ -13,7 +13,7 @@ class WSJIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
https?://(?:www\.)?(?:wsj|barrons)\.com/video/[^/]+/|
https?://(?:www\.)?(?:wsj|barrons)\.com/video/(?:[^/]+/)+|
wsj:
)
(?P<id>[a-fA-F0-9-]{36})
@ -38,6 +38,9 @@ class WSJIE(InfoExtractor):
}, {
'url': 'http://www.barrons.com/video/capitalism-deserves-more-respect-from-millennials/F301217E-6F46-43AE-B8D2-B7180D642EE9.html',
'only_matching': True,
}, {
'url': 'https://www.wsj.com/video/series/a-brief-history-of/the-modern-cell-carrier-how-we-got-here/980E2187-401D-48A1-B82B-1486CEE06CB9',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -6,10 +6,12 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
determine_ext,
dict_get,
ExtractorError,
int_or_none,
parse_duration,
try_get,
unified_strdate,
)
@ -32,6 +34,7 @@ class XHamsterIE(InfoExtractor):
'display_id': 'femaleagent_shy_beauty_takes_the_bait',
'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait',
'timestamp': 1350194821,
'upload_date': '20121014',
'uploader': 'Ruseful2011',
'duration': 893,
@ -45,6 +48,7 @@ class XHamsterIE(InfoExtractor):
'display_id': 'britney_spears_sexy_booty',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'timestamp': 1379123460,
'upload_date': '20130914',
'uploader': 'jojo747400',
'duration': 200,
@ -61,6 +65,7 @@ class XHamsterIE(InfoExtractor):
'id': '5667973',
'ext': 'mp4',
'title': '....',
'timestamp': 1454948101,
'upload_date': '20160208',
'uploader': 'parejafree',
'duration': 72,
@ -96,6 +101,83 @@ class XHamsterIE(InfoExtractor):
if error:
raise ExtractorError(error, expected=True)
age_limit = self._rta_search(webpage)
def get_height(s):
return int_or_none(self._search_regex(
r'^(\d+)[pP]', s, 'height', default=None))
initials = self._parse_json(
self._search_regex(
r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials',
default='{}'),
video_id, fatal=False)
if initials:
video = initials['videoModel']
title = video['title']
formats = []
for format_id, formats_dict in video['sources'].items():
if not isinstance(formats_dict, dict):
continue
for quality, format_item in formats_dict.items():
if format_id == 'download':
# Download link takes some time to be generated,
# skipping for now
continue
if not isinstance(format_item, dict):
continue
format_url = format_item.get('link')
filesize = int_or_none(
format_item.get('size'), invscale=1000000)
else:
format_url = format_item
filesize = None
if not isinstance(format_url, compat_str):
continue
formats.append({
'format_id': '%s-%s' % (format_id, quality),
'url': format_url,
'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality),
'filesize': filesize,
})
self._sort_formats(formats)
categories_list = video.get('categories')
if isinstance(categories_list, list):
categories = []
for c in categories_list:
if not isinstance(c, dict):
continue
c_name = c.get('name')
if isinstance(c_name, compat_str):
categories.append(c_name)
else:
categories = None
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': video.get('description'),
'timestamp': int_or_none(video.get('created')),
'uploader': try_get(
video, lambda x: x['author']['name'], compat_str),
'thumbnail': video.get('thumbURL'),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')),
'like_count': int_or_none(try_get(
video, lambda x: x['rating']['likes'], int)),
'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('views')),
'age_limit': age_limit,
'categories': categories,
'formats': formats,
}
# Old layout fallback
title = self._html_search_regex(
[r'<h1[^>]*>([^<]+)</h1>',
r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
@ -119,8 +201,7 @@ class XHamsterIE(InfoExtractor):
formats.append({
'format_id': format_id,
'url': format_url,
'height': int_or_none(self._search_regex(
r'^(\d+)[pP]', format_id, 'height', default=None))
'height': get_height(format_id),
})
video_url = self._search_regex(
@ -167,8 +248,6 @@ class XHamsterIE(InfoExtractor):
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
comment_count = mobj.group('commentcount') if mobj else 0
age_limit = self._rta_search(webpage)
categories_html = self._search_regex(
r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
'categories', default=None)

View File

@ -40,9 +40,12 @@ class XiamiBaseIE(InfoExtractor):
'subtitles': subtitles,
}
def _extract_tracks(self, item_id, typ=None):
def _extract_tracks(self, item_id, referer, typ=None):
playlist = self._download_json(
'%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
'%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''),
item_id, headers={
'Referer': referer,
})
return [
self._extract_track(track, item_id)
for track in playlist['data']['trackList']]
@ -135,13 +138,13 @@ class XiamiSongIE(XiamiBaseIE):
}]
def _real_extract(self, url):
return self._extract_tracks(self._match_id(url))[0]
return self._extract_tracks(self._match_id(url), url)[0]
class XiamiPlaylistBaseIE(XiamiBaseIE):
def _real_extract(self, url):
item_id = self._match_id(url)
return self.playlist_result(self._extract_tracks(item_id, self._TYPE), item_id)
return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id)
class XiamiAlbumIE(XiamiPlaylistBaseIE):

View File

@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
# request basic data
basic_data_params = {
'vid': video_id,
'ccode': '0402' if 'tudou.com' in url else '0401',
'ccode': '0501',
'client_ip': '192.168.1.1',
'utid': cna,
'client_ts': time.time() / 1000,
@ -240,7 +240,7 @@ class YoukuShowIE(InfoExtractor):
}, {
# Ongoing playlist. The initial page is the last one
'url': 'http://list.youku.com/show/id_za7c275ecd7b411e1a19e.html',
'only_matchine': True,
'only_matching': True,
}]
def _extract_entries(self, playlist_data_url, show_id, note, query):

View File

@ -0,0 +1,202 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
try_get,
)
CDN_API_BASE = 'https://cdn.younow.com/php/api'
MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
class YouNowLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.younow.com/AmandaPadeezy',
'info_dict': {
'id': 'AmandaPadeezy',
'ext': 'mp4',
'is_live': True,
'title': 'March 26, 2017',
'thumbnail': r're:^https?://.*\.jpg$',
'tags': ['girls'],
'categories': ['girls'],
'uploader': 'AmandaPadeezy',
'uploader_id': '6716501',
'uploader_url': 'https://www.younow.com/AmandaPadeezy',
'creator': 'AmandaPadeezy',
},
'skip': True,
}
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
else super(YouNowLiveIE, cls).suitable(url))
def _real_extract(self, url):
username = self._match_id(url)
data = self._download_json(
'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
% username, username)
if data.get('errorCode') != 0:
raise ExtractorError(data['errorMsg'], expected=True)
uploader = try_get(
data, lambda x: x['user']['profileUrlString'],
compat_str) or username
return {
'id': uploader,
'is_live': True,
'title': self._live_title(uploader),
'thumbnail': data.get('awsUrl'),
'tags': data.get('tags'),
'categories': data.get('tags'),
'uploader': uploader,
'uploader_id': data.get('userId'),
'uploader_url': 'https://www.younow.com/%s' % username,
'creator': uploader,
'view_count': int_or_none(data.get('viewers')),
'like_count': int_or_none(data.get('likes')),
'formats': [{
'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
% (CDN_API_BASE, data['broadcastId'], data['userId']),
'ext': 'mp4',
'protocol': 'm3u8',
}],
}
def _extract_moment(item, fatal=True):
moment_id = item.get('momentId')
if not moment_id:
if not fatal:
return
raise ExtractorError('Unable to extract moment id')
moment_id = compat_str(moment_id)
title = item.get('text')
if not title:
title = 'YouNow %s' % (
item.get('momentType') or item.get('titleType') or 'moment')
uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
uploader_id = try_get(item, lambda x: x['owner']['userId'])
uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
entry = {
'extractor_key': 'YouNowMoment',
'id': moment_id,
'title': title,
'view_count': int_or_none(item.get('views')),
'like_count': int_or_none(item.get('likes')),
'timestamp': int_or_none(item.get('created')),
'creator': uploader,
'uploader': uploader,
'uploader_id': uploader_id,
'uploader_url': uploader_url,
'formats': [{
'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
% (moment_id, moment_id),
'ext': 'mp4',
'protocol': 'm3u8_native',
}],
}
return entry
class YouNowChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
_TEST = {
'url': 'https://www.younow.com/its_Kateee_/channel',
'info_dict': {
'id': '14629760',
'title': 'its_Kateee_ moments'
},
'playlist_mincount': 8,
}
def _entries(self, username, channel_id):
created_before = 0
for page_num in itertools.count(1):
if created_before is None:
break
info = self._download_json(
'%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
% (CDN_API_BASE, channel_id, created_before), username,
note='Downloading moments page %d' % page_num)
items = info.get('items')
if not items or not isinstance(items, list):
break
for item in items:
if not isinstance(item, dict):
continue
item_type = item.get('type')
if item_type == 'moment':
entry = _extract_moment(item, fatal=False)
if entry:
yield entry
elif item_type == 'collection':
moments = item.get('momentsIds')
if isinstance(moments, list):
for moment_id in moments:
m = self._download_json(
MOMENT_URL_FORMAT % moment_id, username,
note='Downloading %s moment JSON' % moment_id,
fatal=False)
if m and isinstance(m, dict) and m.get('item'):
entry = _extract_moment(m['item'])
if entry:
yield entry
created_before = int_or_none(item.get('created'))
def _real_extract(self, url):
username = self._match_id(url)
channel_id = compat_str(self._download_json(
'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
% username, username, note='Downloading user information')['userId'])
return self.playlist_result(
self._entries(username, channel_id), channel_id,
'%s moments' % username)
class YouNowMomentIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
'info_dict': {
'id': '20712117',
'ext': 'mp4',
'title': 'YouNow capture',
'view_count': int,
'like_count': int,
'timestamp': 1490432040,
'upload_date': '20170325',
'uploader': 'GABO...',
'uploader_id': 35917228,
},
}
@classmethod
def suitable(cls, url):
return (False
if YouNowChannelIE.suitable(url)
else super(YouNowMomentIE, cls).suitable(url))
def _real_extract(self, url):
video_id = self._match_id(url)
item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
return _extract_moment(item['item'])

View File

@ -1391,7 +1391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v|p)/.+?)
(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
\1''', webpage)]
# lazyYT YouTube embed

View File

@ -1835,10 +1835,20 @@ def parse_duration(s):
days, hours, mins, secs, ms = m.groups()
else:
m = re.match(
r'''(?ix)(?:P?T)?
r'''(?ix)(?:P?
(?:
[0-9]+\s*y(?:ears?)?\s*
)?
(?:
[0-9]+\s*m(?:onths?)?\s*
)?
(?:
[0-9]+\s*w(?:eeks?)?\s*
)?
(?:
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
)?
T)?
(?:
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
)?
@ -2362,7 +2372,7 @@ def parse_codecs(codecs_str):
vcodec, acodec = None, None
for full_codec in splited_codecs:
codec = full_codec.split('.')[0]
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'):
if not vcodec:
vcodec = full_codec
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.10.20'
__version__ = '2017.12.02'