Compare commits

...

712 Commits

Author SHA1 Message Date
Sergey M․
6ab35f5e16 release 2018.02.26 2018-02-26 04:23:38 +07:00
Sergey M․
32ae31847f [ChangeLog] Actualize 2018-02-26 04:19:04 +07:00
Sergey M․
abe8766c35 [udemy] Use custom User-Agent (closes #15571) 2018-02-26 04:12:53 +07:00
Sergey M․
eaa3172672 release 2018.02.25 2018-02-25 20:38:10 +07:00
Sergey M․
797c9284d6 [ChangeLog] Actualize 2018-02-25 20:35:52 +07:00
Sergey M․
8c73ef37b6 [vidlii] Add extractor (closes #14472, closes #14512, closes #14779) 2018-02-25 20:28:40 +07:00
Andrew Udvare
b5cbe3d652 [postprocessor/embedthumbnail] Skip embedding when there aren't any thumbnails 2018-02-25 19:33:13 +07:00
Sergey M․
ece12e6348 [streamango] Skip dead test 2018-02-25 18:36:25 +07:00
Sergey M․
ff274e3c16 [streamango] Capture and output error messages 2018-02-25 18:34:52 +07:00
Sergey M․
c106237d56 [streamango] Fix formats extraction, improve and simplify (closes #14256) 2018-02-25 18:27:23 +07:00
gfabiano
6e72ea4775 [streamango] Fix extraction (closes #14160) 2018-02-25 18:26:48 +07:00
Sergey M․
d6a0350253 [ard] Remove dead tests 2018-02-25 17:41:12 +07:00
Wandang
ad29ef043e [ard] Add alive tests 2018-02-25 17:38:07 +07:00
Sergey M․
f01df14c4f [telequebec:emission] Extend _VALID_URL 2018-02-25 17:05:39 +07:00
Sergey M․
9306b0c8d9 [telequebec] Add support for emissions and refactor (closes #14649, closes #14655) 2018-02-25 16:54:12 +07:00
Sergey M․
f4b7427279 [extractor/common] Improve jwplayer subtitles extraction (closes #15695) 2018-02-25 00:59:29 +07:00
Sergey M․
300148b48a [telequebec:live] Add extractor (closes #15688) 2018-02-24 06:17:29 +07:00
Wandang
2d17c63140 [abcnews] Update tests 2018-02-24 05:17:21 +07:00
Sergey M․
f2908d072e [mailru:music] Add extractor (closes #15618) 2018-02-24 04:52:55 +07:00
Remita Amine
5e7841932c [aenetworks] switch to akamai hls formats(closes #15612) 2018-02-23 08:23:55 +01:00
Sergey M․
870f3bfc63 [ytsearch] Fix flat title extraction (closes #11260, closes #15681) 2018-02-23 03:43:42 +07:00
Sergey M․
3d977fe4d2 release 2018.02.22 2018-02-22 23:50:35 +07:00
Sergey M․
f075838728 [ChangeLog] Actualize 2018-02-22 23:48:58 +07:00
Sergey M․
2acc11d771 [vidio] Fix HLS URL extraction (closes #15675) 2018-02-22 22:50:39 +07:00
Sergey M․
0704306e1d [nexx] Add support for arc.nexx.cloud URLs 2018-02-22 22:31:28 +07:00
Sergey M․
9dc7ea320d [nexx] Don't capture domain id and add support for domainless shortcuts 2018-02-22 22:27:19 +07:00
Remita Amine
e231afb14f [nexx] switch to ark api(closes #15652) 2018-02-22 10:41:47 +01:00
Wandang
12acb9a6fb [zdf] Update tests 2018-02-21 21:57:34 +07:00
Wandang
18ebd1a843 [redtube] Fix duration extraction and update test 2018-02-21 21:55:28 +07:00
Wandang
8315ee6c4c [reddit] Update test 2018-02-21 04:12:56 +07:00
Wandang
b9d1a79426 [9gag] Update test 2018-02-20 22:28:54 +07:00
Wandang
09f934b009 [vk] Update test 2018-02-20 22:21:10 +07:00
Wandang
73af6e22fd [vimeo] Update test 2018-02-20 22:20:15 +07:00
Wandang
77e499f95e [xhamster] Update test 2018-02-20 22:18:50 +07:00
Sergey M․
befa4708fd [utils] Fixup some common URL's typos in sanitize_url (closes #15649) 2018-02-19 22:50:23 +07:00
Sergey M․
90830004c8 [sonyliv] Respect referrer (closes #15648) 2018-02-19 22:29:08 +07:00
Sergey M․
18d7aa6efa [brightcove:new] Use referrer for formats' HTTP headers 2018-02-19 22:28:27 +07:00
Remita Amine
b12cf31bb1 [cbc] add new extractor for olympics.cbc.ca(closes #15535) 2018-02-19 09:02:23 +01:00
Sergey M․
7d2b4aa047 Respect --prefer-insecure while updating (closes #15497) 2018-02-18 16:43:54 +07:00
VietTPham
38662dfec7 [fusion] Add support for fusion.tv 2018-02-17 20:54:52 +07:00
Sergey M․
ee706f1009 [npo] Improve quality metadata extraction 2018-02-17 20:32:34 +07:00
Sergey M․
c4e7496421 [npo] Relax _VALID_URL (closes #14987, closes #14994) 2018-02-17 20:32:26 +07:00
Sergey M․
b8adcec4ea [npo] Capture and output error message 2018-02-17 20:32:20 +07:00
Sergey M․
073cca3df8 [downloader/common] Add whitespace 2018-02-17 19:11:46 +07:00
Parmjit Virk
f66df20ccd [pornhub] Add support for channels (closes #15613) 2018-02-17 01:17:06 +07:00
Sergey M․
ea69624992 [youtube] Handle shared URLs with generic extractor (closes #14303) 2018-02-15 22:33:11 +07:00
Sergey M․
49702e3669 [francetv] Fix typo 2018-02-12 00:25:42 +07:00
Sergey M․
59b5e7b280 release 2018.02.11 2018-02-11 22:31:52 +07:00
Sergey M․
b9683400cf [ChangeLog] Actualize 2018-02-11 22:16:42 +07:00
Sergey M․
760f81212f [francetv] Add support for live streams (closes #13689) 2018-02-11 21:58:04 +07:00
Sergey M․
79080573b5 [francetv] Add support for zouzous.fr and ludo.fr (closes #10454, closes #13087, closes #13103, closes #15012) 2018-02-11 21:40:38 +07:00
Sergey M․
99892e9908 [francetv] Separate main extractor and rework others to delegate to it 2018-02-11 21:14:05 +07:00
Sergey M․
8faa338ff3 [francetv] Improve manifest URL signing (closes #15536) 2018-02-11 20:06:37 +07:00
François Revol
818df33fda [francetv] Sign m3u8 manifest URLs (closes #15565)
they seem to be required now, else we get 403 errors.
2018-02-11 20:06:37 +07:00
Jan Schär
a072a12e24 [veoh] Add support for embed URLs 2018-02-11 06:51:10 +07:00
Petr Novák
e67734dda9 [dvtv] Skip download on failing test 2018-02-11 04:56:03 +07:00
Sergey M․
9e36fedd00 [afreecatv] Fix extraction (closes #15556) 2018-02-10 22:36:26 +07:00
Sergey M․
d2b200eef9 [periscope] Use accessVideoPublic endpoint (closes #15554) 2018-02-10 17:23:37 +07:00
Sergey M․
a03a3c80fe [YoutubeDL] Add support for filesize_approx in format selector (closes #15550) 2018-02-10 16:42:45 +07:00
Sergey M․
9d5871fdff [discovery] Fix auth request (closes #15542) 2018-02-10 00:55:11 +07:00
Sergey M․
ff873b5777 [6play] Extract subtitles (closes #15541) 2018-02-10 00:04:08 +07:00
Sergey M․
042968ff31 Credit @mweinelt for #15124 2018-02-09 23:10:36 +07:00
Sergey M․
430f2ca544 Credit @iamleot for internazionale (#14973) 2018-02-09 23:10:30 +07:00
Sergey M․
cbfbf07cdc Credit @che0 for seznamzpravy (#14616) and dvtv (#15442) 2018-02-09 23:10:24 +07:00
Sergey M․
9e167e1ee3 [newgrounds] Fix metadata extraction (closes #15531) 2018-02-09 21:17:02 +07:00
Remita Amine
5828489072 [nbc] add support for NBC Olympics Streams(closes #10295) 2018-02-09 02:05:28 +01:00
Sergey M․
8c5fafe29f [dvtv] Simplify (closes #15442) 2018-02-09 02:19:29 +07:00
Petr Novak
6f4ad0db34 [dvtv] Fix live streams extraction
(also fixed test broken by changed og tags on the site)
2018-02-09 02:19:05 +07:00
Sergey M․
c2b3bd0451 release 2018.02.08 2018-02-08 23:39:40 +07:00
Sergey M․
728cee5385 [ChangeLog] Actualize 2018-02-08 23:37:49 +07:00
Sergey M․
246a75b4ff [extractors] Import for myvi:embed 2018-02-08 23:05:13 +07:00
Sergey M․
4fac463d70 [pokemon] PEP 8 2018-02-08 23:04:20 +07:00
Sergey M․
382b8182ce [gameinformer] PEP 8 2018-02-08 23:03:57 +07:00
Sergey M․
ce53320b11 [myvi] Extend _VALID_URL 2018-02-08 23:02:05 +07:00
Sergey M․
51b0557d1e [myvi:embed] Add extractor (closes #15521) 2018-02-08 22:57:33 +07:00
Sergey M․
5a5860825d [prosiebensat1] Extend _VALID_URL (closes #15520) 2018-02-08 22:28:56 +07:00
Sergey M․
237d07f114 [pokemon] Relax _VALID_URL and extend title extraction (closes #15518) 2018-02-08 03:58:35 +07:00
Sergey M․
9f4ec3de25 [gameinformer] Use geo verification headers 2018-02-07 21:52:15 +07:00
Vrihub
96a0bbdd0d [la7] Fix extraction (closes #15501) 2018-02-07 21:48:16 +07:00
Peter Pitzulo
c8064d4fab [gameinformer] Fix brightcove id extraction 2018-02-07 21:40:48 +07:00
Sergey M․
fde677fed4 [afreecatv] Pass referrer to video info request (closes #15507) 2018-02-06 22:21:10 +07:00
Sergey M․
0e0508c8a2 [telebruxelles] Relax _VALID_URL and add support for live streams 2018-02-05 23:56:00 +07:00
Sergey M․
bcf150e435 [telebruxelles] Fix extraction (closes #15504) 2018-02-05 23:56:00 +07:00
Sergey M․
240f26229d [extractor/common] Respect secure schemes in _extract_wowza_formats 2018-02-05 23:56:00 +07:00
Sergey M․
b9b150def7 release 2018.02.04 2018-02-04 08:03:00 +07:00
Sergey M․
d20225f33b [ChangeLog] Actualize 2018-02-04 08:01:09 +07:00
Sergey M․
5399ab3f0c [brightcove] Pass embed page URL as referrer (closes #15486) 2018-02-04 07:55:04 +07:00
Sergey M․
b91a7a4e5e [downloader/http] Randomize HTTP chunk size 2018-02-04 07:33:18 +07:00
Sergey M․
e4a60912b8 [youtube] Enforce using chunked HTTP downloading for DASH formats 2018-02-04 07:17:26 +07:00
Sergey M․
00c97e3e7a [downloader/http] Add ability to pass downloader options via info dict 2018-02-04 07:16:22 +07:00
Sergey M․
cf7259bc93 [downloader/http] Fix 302 infinite loops by not reusing requests 2018-02-04 06:58:34 +07:00
Sergey M․
b54d4a5ce8 Document http_chunk_size 2018-02-04 02:53:50 +07:00
Sergey M․
db157d2a2a release 2018.02.03 2018-02-03 23:36:03 +07:00
Sergey M․
6fcc053947 [test_downloader_http] Use try_rm 2018-02-03 23:24:27 +07:00
Sergey M․
a3e8146ea8 [ChangeLog] Actualize 2018-02-03 23:21:18 +07:00
Sergey M․
f19eae429a [test_http] Use 127.0.0.1 instead of localhost 2018-02-03 23:09:25 +07:00
Sergey M․
ba515388b8 Introduce --http-chunk-size 2018-02-03 23:08:58 +07:00
Remita Amine
e2e18694db [redbulltv] fix extraction(closes #15481) 2018-02-03 15:43:33 +01:00
Chih-Hsuan Yen
4989d351b4 flake8: Ignore E741 'ambiguous variable name'
Most of violating codes are reverse-engineered JavaScripts. IMO it's
better to keep original (obfuscated) names.

[skip ci]
2018-02-03 21:02:30 +08:00
Sergey M․
1367c798e3 [redtube] Fix metadata extraction (closes #15472) 2018-02-02 22:32:53 +07:00
Sergey M․
9a340af37e [compat] Mute some F821 under python 3 2018-02-02 03:18:22 +07:00
Sergey M․
3c3bceb41d [pladform] Respect platform id and extract HLS formats (closes #15468) 2018-02-02 03:07:30 +07:00
Sergey M․
64a12edb48 [rtlnl] Remove progressive formats (closes #15459) 2018-02-01 21:30:17 +07:00
Remita Amine
4bf18702e6 [6play] do no modify asset urls with a token(#15248) 2018-02-01 11:22:05 +01:00
Remita Amine
ecc218ab14 [nationalgeographic] Relax _VALID_URL 2018-01-31 09:19:11 +01:00
Sergey M․
d6b152915c [dplay] Relax _VALID_URL (closes #15458) 2018-01-31 01:29:00 +07:00
Sergey M․
69a934e9ad [ISSUE_TEMPLATE_tmpl.md] Add entry on checking URLs availability 2018-01-30 23:31:41 +07:00
M.Yasoob Ullah Khalid ☺
5fa2a6a561 [soundcloud] Remove unused _IPHONE_CLIENT_ID 2018-01-30 22:51:43 +07:00
Remita Amine
d2a422f548 [cbsinteractive] fix data extraction(closes #15451) 2018-01-29 15:33:24 +01:00
Sergey M․
b9d52fb2ca [seznamzpravy] Remove debug output 2018-01-29 04:40:07 +07:00
Remita Amine
466000fc6b [amcnetworks] add support for SundanceTV(closes #9260) 2018-01-28 11:30:20 +01:00
Sergey M․
65220c3bd6 Add support for IronPython 2018-01-28 05:48:18 +07:00
Sergey M․
c989bdbef8 [downloader/ism] Fix Python 3.2 support 2018-01-28 05:20:21 +07:00
Sergey M․
eee1692ff3 release 2018.01.27 2018-01-27 23:44:28 +07:00
Sergey M․
07e56e6df7 [ChangeLog] Actualize 2018-01-27 23:41:25 +07:00
Sergey M․
3c3a07ee0b [seznamzpravy] Improve and simplify (closes #14616) 2018-01-27 23:36:44 +07:00
Petr Novák
27940ca09c [seznamzpravy] Add extractor (closes #14102) 2018-01-27 23:34:31 +07:00
Sergey M․
3931b84597 [extractor/common] Improve _json_ld for articles 2018-01-27 23:24:38 +07:00
Sergey M․
a0ee342b50 [dplay] Bypass geo restriction 2018-01-26 23:56:31 +07:00
Sergey M․
864a4576b7 [dplay] Add support for disco-api videos (closes #15396) 2018-01-26 23:49:47 +07:00
Yen Chi Hsuan
bbb7c3f7e9 [youtube] Extract precise error messages (closes #15284) 2018-01-25 22:30:33 +08:00
Sergey M․
9d6458a206 [teachertube] Capture and output error message 2018-01-24 22:46:04 +07:00
Sergey M․
837b061710 [teachertube] Fix and relax thumbnail extraction (closes #15403) 2018-01-24 22:41:25 +07:00
Remita Amine
967ebbdb6c [prosiebensat1] add another clip ID regexp(fixes #15378) 2018-01-23 19:22:44 +01:00
Remita Amine
dc400ed6a2 [tbs] update tokenizer url(fixes #15395) 2018-01-23 19:06:46 +01:00
Sergey M․
cf2820710d Switch codebase to use compat_b64decode 2018-01-23 22:23:12 +07:00
Sergey M․
5d7d805ca9 [mixcloud] Use compat_b64decode (closes #15394) 2018-01-23 21:53:45 +07:00
Sergey M․
f206126df0 [compat] Add compat_b64decode 2018-01-23 21:53:01 +07:00
Sergey M․
021bd012bb [thesixtyone] Remove extractor (closes #15341) 2018-01-22 22:30:28 +07:00
Sergey M․
6e5eacb770 release 2018.01.21 2018-01-21 21:26:05 +07:00
Sergey M․
d7da6db4e1 [ChangeLog] Actualize 2018-01-21 21:23:24 +07:00
Sergey M․
721a0c3c7b [prosiebensat1] Relax clip id 2018-01-21 21:22:38 +07:00
Sergey M․
e0ab56571e [southparkdk] Add support for southparkstudios.nu 2018-01-21 18:42:34 +07:00
Sergey M․
99d6e696fc [southpark] Add tests for collections (closes #14803) 2018-01-21 18:41:46 +07:00
catlover999
6289e07883 [southpark] Add support for collections 2018-01-21 18:41:10 +07:00
squibbysquibby
655c410063 [test_download] Fix download tests for lazy extractors (closes #13554, closes #13757) 2018-01-21 18:15:11 +07:00
helb
b2a027fc6f [franceinter] Fix upload date extraction (closes #14996) 2018-01-21 17:50:53 +07:00
Sergey M․
0d9c48de4f [extractor/common] Improve DASH formats extraction for jwplayer (#9242, #15187) 2018-01-21 17:42:48 +07:00
Sergey M․
df58ecbeba [rtvs] Add extractor (closes #9242, closes #15187) 2018-01-21 17:40:23 +07:00
Sergey M․
ac458e90a3 [restudy] Extend _VALID_URL (#15347) 2018-01-21 01:22:16 +07:00
Sergey M․
7df18fcc65 [restudy] Fix extraction (closes #15347) 2018-01-20 23:19:02 +07:00
Sergey M․
c707b1d828 [test_utils] Add tests for malformed JSON handling in js_to_json 2018-01-20 23:00:09 +07:00
Mike Fährmann
c384d537f8 [util] Improve scientific notation handling in js_to_json (closes #14789) 2018-01-20 22:54:21 +07:00
Sergey M․
e7f3529f68 [youtube:live] Improve live detection (closes #15365) 2018-01-20 17:57:20 +07:00
Sergey M․
7d5406216a [springboardplatform] Add extractor 2018-01-20 00:34:10 +07:00
Philipp Hagemeister
2a3683c378 prosiebensat1: add another clip ID regexp 2018-01-19 18:26:47 +01:00
Sergey M․
154e4fdace [ringtv] Remove extractor (closes #15345) 2018-01-19 22:49:58 +07:00
Sergey M․
e2fc6df169 release 2018.01.18 2018-01-18 23:41:44 +07:00
Sergey M․
68da3d033c [ChangeLog] Actualize 2018-01-18 23:39:15 +07:00
Varun
67408fe0e9 [soundcloud] Update client id (closes #15306) 2018-01-18 22:30:43 +07:00
Sergey M․
cad9caf76b [kamcord] Remove extractor (closes #15322) 2018-01-18 22:26:43 +07:00
Sergey M․
4471affc34 [spiegel] Add support for nexx videos (closes #15285) 2018-01-17 22:03:56 +07:00
Sergey M․
1370dba59f [twitch] Fix authentication and error capture (closes #14090, closes #15264) 2018-01-16 22:34:16 +07:00
Sergey M․
1d1d60f6dd [vk] Detect more errors due to copyright complaints (#15259) 2018-01-16 00:51:50 +07:00
Reto Kromer
a86922c470 [README.md] Clarify macOS name 2018-01-14 00:58:38 +07:00
Sergey M․
e11ccd76c6 release 2018.01.14 2018-01-14 00:13:56 +07:00
Sergey M․
dd896a6a07 [ChangeLog] Actualize 2018-01-14 00:10:04 +07:00
Sergey M․
391dd6f094 [youtube] Fix live streams extraction (closes #15202) 2018-01-14 00:03:22 +07:00
Sergey M․
0ce39bc542 [wdr] Fix test 2018-01-13 23:33:52 +07:00
Sergey M․
1915662d4f [wdr] Bypass geo restriction 2018-01-13 23:30:56 +07:00
Sergey M․
54e8f62e01 [wdr] Rework extractors (closes #14598) 2018-01-13 23:30:25 +07:00
Sebastian Leske
2d8bb80c60 [wdr:elefant] Add extractor 2018-01-13 23:29:36 +07:00
Sergey M․
df16e645f6 [gamestar] Fix issues (closes #15179) 2018-01-13 19:38:58 +07:00
Hendrik v. Raven
d4aedca3bd [gamestar] Add support for gamepro.de (closes #3384) 2018-01-13 19:36:59 +07:00
Sergey M․
47e2a9bc53 [viafree] Skip rtmp formats (closes #15232) 2018-01-13 18:47:47 +07:00
Chih-Hsuan Yen
e565a6386e Credit @scil for ximalaya extractor (#14687)
[ci skip]
2018-01-12 15:36:01 +08:00
Sergey M․
609850acfb [pandoratv] Add support for mobile URLs (closes #12441) 2018-01-11 23:10:18 +07:00
Sergey M․
64287560e4 [pandoratv] Add support for new URL format (closes #15131) 2018-01-11 23:06:56 +07:00
Chih-Hsuan Yen
37941fe204 [ChangeLog] Update after #14687
[ci skip]
2018-01-11 20:36:06 +08:00
scil
a90641fe87 [ximalaya_extractor] Add new extractor ximalaya (#14687)
* [ximalaya_extractor] Add new extractor

* format change according by flake8

* changes accoring to review by @yan12125 at github pull #14687

* change %d to %s in a temp str

* seond changes accoring to review by @yan12125 at github pull #1468

* improve TESTS about contains

* changes accoring to third review by @yan12125 at github pull #1468

* forth changes accoring to forth review by @yan12125 at github pull #1468
2018-01-11 20:35:09 +08:00
Sergey M․
1b79daffd9 [digg] Improve extraction 2018-01-10 22:19:51 +07:00
Sergey M․
e654829b4c [digg] Add extractor (closes #15214) 2018-01-10 21:24:22 +07:00
Sergey M․
2b4e1ace4a [limelight] Tolerate empty pc formats (closes #15150, closes #15151, closes #15207) 2018-01-10 05:39:57 +07:00
Sergey M․
310ea4661d [ndr:embed:base] Make separate formats extraction non fatal (closes #15203) 2018-01-09 22:04:50 +07:00
Chih-Hsuan Yen
5b23845125 Credit @sprhawk for the Weibo extractor (#15079) 2018-01-09 19:35:39 +08:00
Yen Chi Hsuan
0f71de0761 [ChangeLog] Update after #15079 2018-01-09 18:13:49 +08:00
Yen Chi Hsuan
4df1098c3f Merge branch 'sprhawk-weibo' 2018-01-09 18:13:11 +08:00
Yen Chi Hsuan
5eca00a2e3 [weibo] Misc improvements 2018-01-09 18:12:55 +08:00
Yen Chi Hsuan
1dd38dc0f4 Merge branch 'weibo' of https://github.com/sprhawk/youtube-dl into sprhawk-weibo 2018-01-09 17:31:52 +08:00
Sergey M․
8005dc68cb [ok] Add support for live streams 2018-01-08 21:53:03 +07:00
Remita Amine
a39e15c516 [canalplus] fix extraction(closes #15072) 2018-01-07 22:15:44 +01:00
Chih-Hsuan Yen
7643916a37 [ChangeLog] update after #15188
[ci skip]
2018-01-08 01:32:13 +08:00
Luca Steeb
3a513f29ad fix bilibili extraction (closes #15171) 2018-01-08 01:30:04 +08:00
Sergey M․
950b5f2969 release 2018.01.07 2018-01-07 23:52:16 +07:00
Sergey M․
8faa9576bb [ChangeLog] Actualize 2018-01-07 23:48:56 +07:00
Sergey M․
b0ead0e09a [jwplatform] Add support for multiple embeds (closes #15192) 2018-01-07 21:49:23 +07:00
Sergey M․
0a5b1295b7 [motherless:group] Relax entry extraction and add a fallback scenario 2018-01-07 00:31:53 +07:00
Sergey M․
a133eb7764 [motherless:group] Capture leading slash of video path 2018-01-07 00:02:41 +07:00
Sergey M․
f12628f934 [mitele] Fix extraction (closes #15186) 2018-01-06 23:58:00 +07:00
Martin Weinelt
45283afdec [motherless] Add support for groups 2018-01-06 23:33:40 +07:00
Sergey M․
b7c74c0403 [lynda] Relax _VALID_URL (closes #15185) 2018-01-06 23:12:30 +07:00
Parmjit Virk
0b0870f9d0 [soundcloud] Fallback to avatar picture for thumbnail (closes #12878) 2018-01-05 08:25:42 +07:00
Chih-Hsuan Yen
c2f18e1c49 [ChangeLog] Update after #15137
[skip ci]
2018-01-04 22:28:00 +08:00
JianxinLi
da35331c6c [youku] Fix list extraction.(close #15135) (#15137)
* [youku] Fix list extraction.(close #15135)

Change-Id: I2e9c920143f4f16012252625943a8f18b8ff40eb

* [youku] Remove KeyError try-except

Change-Id: Ic46327905cbef1356b7b12d5eb3db5d9746ca338
2018-01-04 22:25:28 +08:00
Yen Chi Hsuan
de329f64ab [openload] Fix extraction (closes #15166) 2018-01-04 13:26:08 +08:00
Sergey M․
75ba0efb52 [lynda] Skip invalid subtitles (closes #15159) 2018-01-03 16:41:28 +07:00
Luc Ritchie
f0c6c2bce2 [twitch] Pass video id to url_result when extracting playlist 2018-01-03 16:22:55 +07:00
Jaime Marquínez Ferrándiz
9650c3e91d [rtve.es:alacarta] Fix extraction of some new URLs 2018-01-02 21:12:39 +01:00
Mattias Wadman
b5e531f31a [acast] Fix extraction 2018-01-02 23:32:17 +07:00
Sergey M․
7a6c204fcb [travis] Add Jython build 2018-01-02 21:13:41 +07:00
Philipp Hagemeister
d7cd9a9e84 [utils] Fix youtube-dl under PyPy3 on Windows 2018-01-01 22:48:27 +07:00
Sergey M․
54009c246e [travis] Add PyPy builds 2018-01-01 21:54:28 +07:00
Sergey M․
b300cda476 [YoutubeDL] Output python implementation in debug header 2018-01-01 21:52:24 +07:00
sprhawk
6648fd8ad6 changed to use .get to get field from json object 2018-01-01 18:33:14 +08:00
Sergey M․
04cf1a191a release 2017.12.31 2017-12-31 04:30:49 +07:00
Sergey M․
c95c08a856 [ChangeLog] Actualize 2017-12-31 04:28:01 +07:00
Ondřej Caletka
126f225bcf [extractor/common] Add container meta field for formats extracted in _parse_mpd_formats 2017-12-31 04:04:09 +07:00
Windom
4f5cf31977 [slutload] Add support for mobile URLs 2017-12-31 01:41:07 +07:00
Sergey M․
77341dae14 [abc:iview] Improve extraction and bypass geo restriction (closes #14782) 2017-12-31 01:27:28 +07:00
d2au
2e65e7db9e [abc:iview] Fix extraction (closes #14711)
ABC dropped unmetering, so change to metered hls urls which
require auth.
2017-12-31 01:27:22 +07:00
Sergey M․
538d4f8681 [downloader/hls] Use HTTP headers for key request 2017-12-31 01:15:35 +07:00
Yen Chi Hsuan
620ee8712e [openload] Fix extraction (closes #15118) 2017-12-30 15:03:13 +08:00
Sergey M․
2ca7ed41fe [mediasite] Improve extraction and code style, add support for DASH (closes #11185, closes #14343, refs #5428) 2017-12-30 08:04:43 +07:00
felix
8056c8542d [mediasite] Add extractor, subsume sandia and collegerama extractors 2017-12-30 07:23:41 +07:00
felix
2501d41ef4 [common] use AACL as the default fourcc when AudioTag is 255 2017-12-30 07:22:07 +07:00
Remita Amine
d97cb84b31 [ufctv] Add new extractor(closes #14520) 2017-12-30 00:30:41 +01:00
50csent
2c8e11b4af [pluralsight] Fix missing first line of subtitles (closes #11118) 2017-12-30 05:56:47 +07:00
Sergey M․
d2c5b5a951 [openload] Fallback on f-page extraction (closes #14665, closes #14879) 2017-12-30 05:53:56 +07:00
Sergey M․
580f3c79d5 [vimeo] Improve password protected videos extraction (closes #15114) 2017-12-30 03:54:14 +07:00
sprhawk
48058d82dc replace unused _download_webpage_handle with _download_webpage 2017-12-30 01:14:21 +08:00
sprhawk
6a41a12d29 replace split with strip_jsonp 2017-12-30 01:11:30 +08:00
sprhawk
5c97ec5ff5 replace urlencode.encode with urlencode_postdata 2017-12-30 01:08:56 +08:00
Sergey M․
9d6ac71c27 [extractor/common] Fix extraction of DASH formats with the same representation id (closes #15111) 2017-12-29 23:14:56 +07:00
Remita Amine
84f085d4bd [aws] fix canonical/signed headers generation in python 2(closes #15102) 2017-12-29 00:13:40 +01:00
Sergey M․
a491fd0c6f release 2017.12.28 2017-12-28 23:12:56 +07:00
Sergey M․
99277daaac [ChangeLog] Actualize 2017-12-28 23:10:42 +07:00
Sergey M․
640788f6f4 [internazionale] Improve extraction (closes #14973) 2017-12-27 23:27:48 +07:00
Leonardo Taccari
1ae0f0a21d [internazionale] Add extractor 2017-12-27 23:27:43 +07:00
Ondřej Caletka
616bb95b28 [playtvak] Relax video regex and make description optional 2017-12-27 22:57:26 +07:00
Remita Amine
be069839b4 [filmweb] improve extraction 2017-12-26 19:41:08 +01:00
sprhawk
c33de004e1 Merge branch 'master' of github.com:rg3/youtube-dl into weibo 2017-12-26 22:27:26 +08:00
sprhawk
42a1012c77 fix according to "https://github.com/rg3/youtube-dl/pull/15079#discussion_r158688607" 2017-12-26 22:26:01 +08:00
Déstin Reed
a14001a5a1 [Filmweb] Add extractor 2017-12-26 15:19:37 +01:00
Remita Amine
db145ee54a [espn] Add new extractor for http://fivethirtyeight.com(closes #6864) 2017-12-26 14:20:21 +01:00
Remita Amine
45d20488f1 [umg:de] Add new extractor(closes #11582)(closes #11584) 2017-12-26 12:32:04 +01:00
sprhawk
2593651224 fix compat_urllib_request for python2.7 2017-12-26 16:46:01 +08:00
sprhawk
951043724f re-format code to pass flake8 2017-12-26 16:38:51 +08:00
sprhawk
d2be5bb5af change to use compat urllib 2017-12-26 16:28:47 +08:00
sprhawk
447a5a710d added weibo mobile site support 2017-12-26 16:24:56 +08:00
Remita Amine
0f897e0929 [espn] add support for espnfc and extract more formats(closes #8053) 2017-12-25 23:29:09 +01:00
Yen Chi Hsuan
173558ce96 [ChangeLog] Update after #15065 2017-12-25 22:06:18 +08:00
JianxinLi
d3ca283235 [youku] Add test case.
Some playlist has no data-id value.

Change-Id: I97455f2907f08bda03b538cdc13ec827e2f8ce26
2017-12-25 22:02:47 +08:00
JianxinLi
d99a1000c7 [youku] Fix list extraction.(close #15065)
Change-Id: I578fdc5b69509bdcd8d3191e3917afe47c234ff6
2017-12-25 22:02:47 +08:00
Yen Chi Hsuan
a75419586b [openload] Remove a confusing exception
If phantomjs is not installed, there's an error besides the missing
phantomjs exception:

Exception ignored in: <bound method PhantomJSwrapper.__del__ of <youtube_dl.extractor.openload.PhantomJSwrapper object at 0x7f8ad5e78278>>
Traceback (most recent call last):
  File "/home/yen/Projects/youtube-dl/youtube_dl/extractor/openload.py", line 142, in __del__
    os.remove(self._TMP_FILES[name].name)
AttributeError: 'PhantomJSwrapper' object has no attribute '_TMP_FILES'
2017-12-24 20:47:42 +08:00
Sergey M․
273c23d960 [openload] Add support for oload.stream (closes #15070) 2017-12-24 13:53:27 +07:00
Yen Chi Hsuan
b954e72c87 [ChangeLog] typo 2017-12-23 23:42:02 +08:00
Yen Chi Hsuan
116561697d [ChangeLog] Update after #14903 2017-12-23 23:41:24 +08:00
JianxinLi
0e25a1a278 [youku] Update ccode
Change-Id: Id397e814e81ff560506d68563b7409eebbe5943d
2017-12-23 23:34:42 +08:00
Sergey M․
307a7588b0 release 2017.12.23 2017-12-23 21:24:18 +07:00
Sergey M․
c2f2f8b120 [kaltura] Fix typo 2017-12-23 21:22:41 +07:00
Sergey M․
f5a6321107 [ChangeLog] Actualize 2017-12-23 21:17:53 +07:00
Sergey M․
69d69da98a [kaltura] Add another embed pattern for entry_id
For cases when player configuration map is setup via indexing operator, e.g. kalturaPlayerConfiguration_1_lre6rg3i_10[entry_id] = 1_lre6rg3i (see https://www.heise.de/video/artikel/odcast-c-t-uplink-20-1-Apple-CarPlay-vs-Android-Auto-Galileo-3D-Sound-erklaert-3919694.html)
2017-12-23 21:17:53 +07:00
Sergey M․
5c5e60cff8 [voot] Fix video identification 2017-12-23 21:17:53 +07:00
Sergey M․
2132edaa03 [extractor/common] Move X-Forwarded-For setup code into _request_webpage 2017-12-23 21:17:53 +07:00
Remita Amine
4b7dd1705a [7plus] Add new extractor(closes #15043) 2017-12-23 13:22:20 +01:00
Sergey M․
9e3682d555 [MANIFEST.in] Include all test data in PyPI package 2017-12-22 23:53:27 +07:00
Sergey M․
3e191da6d9 [Makefile] Add AUTHORS to youtube-dl.tar.gz 2017-12-22 23:46:08 +07:00
Sergey M․
963d237d26 Add LICENSE, AUTHORS and ChangeLog to PyPI package (closes #15054) 2017-12-22 23:38:16 +07:00
Sergey M․
d2d766bc6d [animeondemand] Fix typo 2017-12-20 23:18:14 +07:00
Sergey M․
17c3aced5d [animeondemand] Relax login error regex 2017-12-19 22:53:04 +07:00
Remita Amine
78466fcab5 [shahid] add support for show pages(closes #7401) 2017-12-19 02:00:38 +01:00
Sergey M․
3961c6cb9d [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in output template (closes #11427, #15018) 2017-12-19 03:53:44 +07:00
Sergey M․
07aeced68e [youtube] Extract uploader, uploader_id and uploader_url for playlists (#11427, #15018) 2017-12-19 03:51:28 +07:00
Sergey M․
c10c93238e [extractor/common] Introduce uploader, uploader_id and uploader_url meta fields for playlists (#11427, #15018) 2017-12-19 03:51:03 +07:00
Sergey M․
4a109f81bc [afreecatv] Improve format extraction (closes #15019) 2017-12-19 00:38:39 +07:00
Sergey M․
99081da90c [downloader/fragment] Encode filename of fragment being removed (closes #15020) 2017-12-18 03:31:53 +07:00
Remita Amine
7e81010987 [cspan] add support for audio only pages and catch page errors(closes #14995) 2017-12-17 19:15:59 +01:00
Sergey M․
549bb416f5 [mailru] Fix issues and improve (closes #14904) 2017-12-17 18:38:27 +07:00
Hongjie Dong
25475dfab3 [mailru] Add support for embed URLs 2017-12-17 18:37:03 +07:00
Remita Amine
3dfa9ec213 [crunchyroll] Future-proof XML element checks(closes #15013) 2017-12-17 09:15:44 +01:00
Sergey M․
06dbcd7be4 [cbslocal] Fix timestamp extraction (closes #14999, closes #15000) 2017-12-16 21:57:30 +07:00
Sergey M․
b555ae9bf1 [utils] Add another date format pattern (#14999) 2017-12-16 21:56:16 +07:00
Remita Amine
c402e7f3a0 [discoverygo] correct ttml subtitle extension 2017-12-16 12:55:44 +01:00
Sergey M․
498a8a4ca5 [vk] Make view count optional (closes #14979) 2017-12-15 22:53:56 +07:00
Remita Amine
d05ba4b89e [disney] skip Apple FairPlay formats(#14982) 2017-12-15 09:28:07 +01:00
Remita Amine
23f511f5c7 [voot] sort formats 2017-12-15 09:05:59 +01:00
Remita Amine
1c4804ef9b [voot] fix format extraction(closes #14758) 2017-12-14 23:05:43 +01:00
Sergey M․
8ff2b16435 release 2017.12.14 2017-12-14 05:19:21 +07:00
Sergey M․
c6a5a811a1 [ChangeLog] Actualize 2017-12-14 05:15:51 +07:00
Sergey M․
3fae11ac00 [itv] Improve extraction, extract more subtitles and duration (closes #14944) 2017-12-14 04:49:07 +07:00
Sergey M․
7974e289a1 [postprocessor/xattr] Clarify NO_SPACE message (#14970) 2017-12-14 01:05:02 +07:00
Remita Amine
6bf9c28b0a [byutv] add support for geo restricted videos 2017-12-13 17:51:56 +01:00
Sergey M․
bec49996c6 [downloader/http] Return actual download result (closes #14971) 2017-12-13 23:49:05 +07:00
Sergey M․
c8be7d5f74 [byutv] Fix extraction (closes #14966, closes #14967) 2017-12-13 23:14:30 +07:00
Remita Amine
15960255fe [tbs] fix typo 2017-12-12 18:16:45 +01:00
Remita Amine
6b2d8c9182 [bbc.co.uk] fix extraction for 320k m3u8 streams
broken since 197224b7a4
2017-12-12 18:04:05 +01:00
Remita Amine
e6b8803d59 [toutv] add support special video urls(closes #14179) 2017-12-12 11:11:44 +01:00
Remita Amine
cb0c2310fb [discovery] fix free videos extraction(#14157)(#14954) 2017-12-12 10:33:03 +01:00
Remita Amine
23b6e23002 [tvnow] fix extraction(closes #7831) 2017-12-11 21:36:12 +01:00
Remita Amine
127e98d31d [nickelodeon:br] correct extractor name 2017-12-11 17:21:31 +01:00
Remita Amine
e4f201bc1b [extractors] add import for NickBrIE 2017-12-11 17:19:16 +01:00
Remita Amine
08d77a95c9 [nickelodeon:br] add support for Nickelodeon(Brazil) websites(closes #14893) 2017-12-11 17:18:05 +01:00
Remita Amine
5868079e99 [nick.com] improve extraction(fixes #14876) 2017-12-11 14:37:03 +01:00
Remita Amine
b6f78d76c1 [tbs] fix extraction(fixes #13658) 2017-12-11 13:39:25 +01:00
sprhawk
0c69958844 add other properties; remove print verbose 2017-12-11 16:02:14 +08:00
sprhawk
3281af3464 a working version 2017-12-11 15:56:54 +08:00
sprhawk
29ac31afaf simply get the correct webpage, but not parsed to extract information 2017-12-11 12:26:19 +08:00
Sergey M․
1fa0dce2c0 release 2017.12.10 2017-12-10 23:18:53 +07:00
Sergey M․
fa1dd6d2cd [ChangeLog] Actualize 2017-12-10 23:15:24 +07:00
Sergey M․
c38970ca10 [culturebox] Improve video id extraction (closes #14947) 2017-12-10 22:46:21 +07:00
Remita Amine
51f2863357 [twitter] improve extraction(closes #14197) 2017-12-10 14:11:09 +01:00
Sergey M․
913b61eeee [udemy] Extract more HLS formats 2017-12-09 20:02:54 +07:00
Sergey M․
6f1ec339a0 [udemy] Improve course id extraction (closes #14938) 2017-12-09 20:02:49 +07:00
Sergey M․
a3de5e6c0e [stretchinternet] Fix issues and improve (closes #14576) 2017-12-09 17:59:08 +07:00
Andrew Bottom
f4cc03d60b [stretchinternet] Add extractor 2017-12-09 17:58:49 +07:00
Sergey M․
2a57b62b80 [ellentube] Fix issues, improve and simplify (closes #14570) 2017-12-09 02:16:54 +07:00
Alex Seiler
e2707a832c [ellentube] Fix extraction (closes #14407) 2017-12-09 02:16:48 +07:00
Sergey M․
1115271ac6 [raiplay:playlist] Fix issues and improve (closes #14563) 2017-12-09 00:48:04 +07:00
Timendum
d21d0ba6c1 [raiplay:playlist] Add extractor 2017-12-09 00:47:40 +07:00
Sergey M․
a670b1ba26 [README.md] Add is_live, start_time and end_time to output template section (closes #14926) 2017-12-07 22:16:41 +07:00
Remita Amine
1bd4fc96e6 [sonyliv] extract higher quality formats and bypass geo restriction(closes #14922) 2017-12-07 08:46:30 +01:00
Remita Amine
684ae10236 [fox] add support for adobe pass auth and extract subtitles(close #14489)(closes #14205) 2017-12-06 22:56:14 +01:00
Remita Amine
3c4fbfeca2 [dailymotion] remove dailymotion cloud extractor(closes #6794)
https://web.archive.org/web/20160312110217/https://www.dmcloud.net/
2017-12-06 10:56:48 +01:00
Windom
b271e33526 [xhamster] Add support for mobile URLs and fix thumbnail extraction 2017-12-06 00:08:31 +07:00
Sergey M․
d3f8b76b69 [extractor/generic] Fix typo (closes #14902)
Don't pass video_id as mpd_id
2017-12-05 23:11:15 +07:00
Sergey M․
91328f26b0 [ard] Skip invalid stream URLs (closes #14906) 2017-12-05 23:01:57 +07:00
Sergey M․
61d18c8a4b [porncom] Fix metadata extraction (closes #14911) 2017-12-05 22:42:02 +07:00
Sergey M․
c94427dd60 [pluralsight] Detect agreement request (#14913) 2017-12-05 22:34:56 +07:00
Remita Amine
d4f05d4731 [utils] add sami mimetype to mimetype2ext 2017-12-03 00:04:43 +01:00
Remita Amine
d7df308981 [toutv] fix login(closes 14614) 2017-12-02 20:22:40 +01:00
Sergey M․
0d56eddc59 release 2017.12.02 2017-12-02 21:34:34 +07:00
Sergey M․
e25ee72657 [ChangeLog] Actualize 2017-12-02 21:29:06 +07:00
Sergey M․
78593e294c Add references for #14844 2017-12-02 21:22:43 +07:00
Sergey M․
593f2f7989 [downloader/fragment] Commit part file after each fragment
In order to obtain correct resume_len on next iteration
2017-12-02 21:21:11 +07:00
Sergey M․
603fc4e0ea [extractor/common] Add durations for DASH fragments with bare SegmentURLs 2017-12-02 21:21:01 +07:00
Petr Novak
41bf647e89 [extractor/common] Add support for DASH manifests with SegmentLists with bare SegmentURLs 2017-12-02 21:16:36 +07:00
Sergey M․
fea92aa65d [xhamster] Fix extraction (closes #14884) 2017-12-02 19:04:59 +07:00
zcanfly
0981585bef [youku] Update ccode (closes #14872) 2017-12-02 18:16:22 +07:00
Remita Amine
f5ac68d88f [mnet] fix format extraction(fixes #14883) 2017-11-30 23:45:33 +01:00
Remita Amine
1663b32946 [xiami] add Referer header to api request 2017-11-29 20:36:55 +01:00
Remita Amine
5ea765fb72 [mtv] correct scc extention in extracted subtitles(closes #13730) 2017-11-29 17:50:38 +01:00
Remita Amine
fb61b57d0f [vvvvid] fix extraction for kenc videos(fixes #13406) 2017-11-29 16:09:45 +01:00
Remita Amine
07cf18b9c5 [br] add support for BR Mediathek videos(fixes #14560)(fixes #14788) 2017-11-29 14:21:38 +01:00
Remita Amine
5f699251e9 [daisuki] add support for motto.daisuki.com(fixes #14681) 2017-11-28 10:57:22 +01:00
Remita Amine
a3474aa59e [Odnoklassniki] fix api metadata request(fixes #14862) 2017-11-28 09:04:51 +01:00
Remita Amine
115afb77ec [itv] update hls formats extraction 2017-11-27 21:59:27 +01:00
Remita Amine
53f024e7c5 [pbs] add another media id regex 2017-11-27 16:55:27 +01:00
Remita Amine
ffe6979ef9 [utils] add hvc1 codec code to parse_codecs 2017-11-27 16:55:27 +01:00
Yen Chi Hsuan
dafb4c6647 [Makefile] Include setup.cfg in the tarball (closes #14857) 2017-11-27 22:49:35 +08:00
Yen Chi Hsuan
82a62de192 [Makefile,devscripts/run_tests.sh] Actually exclude network tests
Closes #14858
2017-11-27 21:32:06 +08:00
Yen Chi Hsuan
f58a506044 [test_InfoExtractor] Fix flake8 2017-11-27 21:30:47 +08:00
Sergey M․
5ddeb7702a release 2017.11.26 2017-11-26 21:49:22 +07:00
Sergey M․
6c07f0b288 [ChangeLog] Actualize 2017-11-26 21:37:27 +07:00
Sergey M․
e94d1adc36 Add testdata to youtube-dl.tar.gz (closes #14854) 2017-11-26 21:10:32 +07:00
Sergey M․
d08dcd2dbd [test_YoutubeDL] Fix typo (closes #14856) 2017-11-26 21:06:14 +07:00
Sergey M․
7512aa986f Fix some only matching tests (closes #14855) 2017-11-26 20:53:10 +07:00
Remita Amine
93f3f10cdc [fczenit] fix extraction 2017-11-25 19:28:26 +01:00
Remita Amine
87dac57cf6 [firstpost] remove extractor 2017-11-25 18:50:15 +01:00
Remita Amine
b485d5d6bf [nexx] make http format ids more consistent 2017-11-25 18:36:31 +01:00
Remita Amine
a238a868ba [freespeech] fix extraction 2017-11-25 18:25:00 +01:00
Remita Amine
c0f647a179 [nexx] extract more formats 2017-11-25 18:13:26 +01:00
Sergey M․
6ff27b8d5a [openload] Don't use bare except when removing temp files 2017-11-26 00:05:28 +07:00
jahudka
9ef909f2b2 [openload] Add support for openload.link 2017-11-26 00:04:13 +07:00
Sergey M․
8cfbcfab9a [tnaflix] Extract common parts of tnaflix and empflix 2017-11-25 23:42:20 +07:00
Sergey M․
b7785cf156 [empflix] Relax _VALID_URL 2017-11-25 23:42:20 +07:00
Sergey M․
9105523818 [empflix] Fix extractrion 2017-11-25 23:42:20 +07:00
Sergey M․
dbb25af657 [tnaflix] Don't modify download URLs (closes #14811) 2017-11-25 23:42:20 +07:00
Remita Amine
fe4bfe36e1 [gamersyde] remove extractor 2017-11-25 15:58:28 +01:00
Remita Amine
6f5c598a28 [france2.fr:generation-what] fix extraction 2017-11-25 15:49:49 +01:00
Remita Amine
cd9ff4ec5b [massengeschmacktv] Add support for Massengeschmack TV(replaces Fernseh Kritik TV) 2017-11-24 20:00:01 +01:00
Remita Amine
c6c6a64aa5 [fox9] fix extraction 2017-11-24 19:00:56 +01:00
Remita Amine
e0a8686f48 [faz] fix extraction and add support for Perform Group embeds(fixes #14714) 2017-11-24 18:42:41 +01:00
Yen Chi Hsuan
6049176471 [ChangeLog] Update after #14828
[skip ci]
2017-11-24 21:40:51 +08:00
John Hawkinson
805f5bf759 [Generic] ie_key in JWPlatform test 72 2017-11-24 21:39:55 +08:00
John Hawkinson
32ad4f3faf [JWPlatform] Use non-capturing group in RE
Per @yan12125.
2017-11-24 21:39:55 +08:00
John Hawkinson
6899b1d9e8 [Generic] Update test 69 (suffolk/sjc)
suffolk.edu/sjc => suffolk.edu/sjc/live.php

Unfortunately it only transmits video a few mornings per month, so
leaving the 'skip' is probably appropriate. Updating the 'skip- to
include the calendar information though.
2017-11-24 21:39:55 +08:00
John Hawkinson
939be9adfe [JWPlatform] Support iframes
Support content.jwplatform... src attributes inside <iframe> tags in
addition to <script> tags. Just a regexp change.

Add a test (currently Generic_72).
2017-11-24 21:39:55 +08:00
enigmaquip
2688664762 [culturebox] Fix extraction (closes #14827) 2017-11-23 06:39:11 +07:00
Yen Chi Hsuan
8f63941104 [youku] Fix extraction; update ccode (closes #14815) 2017-11-22 22:49:48 +08:00
Remita Amine
a9efdf3d4a [livestream] make smil extraction non fatal(fixes #14792) 2017-11-19 12:59:31 +01:00
Sergey M․
f610dbb05f [extractor/common] Use final URL when dumping request (closes #14769) 2017-11-18 19:04:56 +07:00
Windom
38db52adf3 [drtuber] Add support for mobile URLs 2017-11-17 01:50:07 +07:00
Windom
3192d4bc7a [spankbang] Add support for mobile URLs and fix test 2017-11-17 01:05:04 +07:00
Sergey M․
9cbd4dda10 [instagram] Fix description, timestamp and counters extraction (closes #14755) 2017-11-15 22:14:54 +07:00
Sergey M․
08e45b39e7 release 2017.11.15 2017-11-15 00:15:42 +07:00
Sergey M․
fae0eb42ec [ChangeLog] Actualize 2017-11-15 00:02:54 +07:00
Remita Amine
ea2295842f [common] skip Apple FairPlay m3u8 manifests(closes #14741) 2017-11-14 17:41:30 +01:00
Sergey M․
a2b6aba8de [vshare] Improve extraction, fix formats sorting and carry long lines 2017-11-14 22:50:15 +07:00
Sergey M․
ff31f2d5c3 [vshare] Capture and output error message 2017-11-14 22:39:54 +07:00
Timendum
0987f2ddb2 [vshare] Fix extraction (closes #14473) 2017-11-14 22:34:45 +07:00
Sergey M․
5871ebac47 [YoutubeDL] Fix playlist range optimization for --playlist-items (closes #14740) 2017-11-14 01:43:20 +07:00
Remita Amine
05dee6c520 [crunchyroll] extract old rtmp formats 2017-11-13 19:15:49 +01:00
Remita Amine
27adc9ec65 [tva] fix extraction(closes #14736) 2017-11-13 11:24:15 +01:00
Remita Amine
388beb86e0 [gamespot] add test for #14652 2017-11-13 10:30:12 +01:00
Remita Amine
d4e31b72b9 [gamespot] lower the preference of http formats(#14652) 2017-11-13 10:24:35 +01:00
Sergey M․
5fc12b9549 [instagram:user] Fix extraction (closes #14699) 2017-11-12 18:36:18 +07:00
Bob Poekert
af85ce29c6 [ccma] Fix typo 2017-11-12 13:25:21 +07:00
Sergey M․
e4d9586562 Remove sensitive data from logging in messages 2017-11-11 20:52:12 +07:00
Remita Amine
79d1f8ed68 [gamespot] add support for article URLS(closes #14652) 2017-11-11 13:03:16 +01:00
Remita Amine
a5203935d6 [gamespot] skip Brightcove Once http formats(#14652) 2017-11-11 13:03:16 +01:00
gkoelln
59d2e6d04f [cartoonnetwork] Update tokenizer_src (closes #14666) 2017-11-11 04:59:48 +07:00
Yen Chi Hsuan
a9543e37c8 [wsj] Recognize another URL pattern (closes #14704) 2017-11-11 00:29:08 +08:00
Sergey M․
61fb07e156 [pandatv] Modernize (closes #14693) 2017-11-09 23:30:25 +07:00
hcwhan
4222346fb2 [pandatv] Update API URL and sign format URLs 2017-11-09 23:26:46 +07:00
Remita Amine
cc6a960e13 use older login method(closes #11572) 2017-11-08 20:30:05 +01:00
Sergey M․
f34b841b51 release 2017.11.06 2017-11-06 22:39:24 +07:00
Sergey M․
e0998333fa [ChangeLog] Actualize 2017-11-06 22:36:46 +07:00
Sergey M․
909191de91 [hotstar:playlist] Fix issues and improve (closes #12465) 2017-11-05 19:15:40 +07:00
Alpesh Valia
477c97f86b [hotstar:playlist] Add extractor 2017-11-05 19:15:34 +07:00
Sergey M․
6e71bbf4ab [hotstar] Bypass geo restriction (closes #14672) 2017-11-05 16:12:56 +07:00
Sergey M․
181e381fda [test_InfoExtractor] Add test for #14660 2017-11-04 22:15:58 +07:00
Sergey M․
187ee66c94 [extractor/common] Add protocol for f4m formats 2017-11-04 22:11:39 +07:00
Sergey M․
48107c198b [f4m] Prefer baseURL for relative URLs (closes #14660) 2017-11-04 22:10:55 +07:00
Jimbolino
cd670befc4 [22tracks] Remove extractor (closes #11024) 2017-11-02 23:48:43 +07:00
Remita Amine
44cca168cc [skysport] add support ooyala embed_token protected videos(fixes #14641) 2017-11-02 14:16:15 +01:00
Remita Amine
b0f4331002 [gamespot] extract formats referenced with new data fields(#14652) 2017-11-02 13:30:50 +01:00
Sergey M․
044eeb1455 [extractor/common] Respect URL query in _extract_wowza_formats (closes #14645) 2017-11-01 23:39:26 +07:00
Sergey M․
8fe767e072 [spankbang] Detect unavailable videos (closes #14644) 2017-10-31 23:05:25 +07:00
Sergey M․
6d0630d880 release 2017.10.29 2017-10-29 07:22:53 +07:00
Sergey M․
518d357b46 [ChangeLog] Actualize 2017-10-29 07:21:33 +07:00
Sergey M․
514e8aefd4 [egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00
Sergey M․
9211e3319e [extractor/common] Prefix format id for audio only HLS formats 2017-10-29 07:05:55 +07:00
Sergey M․
056653bbb1 [utils] Add support for zero years and months in parse_duration 2017-10-29 07:04:48 +07:00
enigmaquip
c3206d02e9 [fxnetworks] Extract series metadata 2017-10-29 05:20:18 +07:00
Sergey M․
eb4b5818e2 [younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436) 2017-10-29 04:18:43 +07:00
Andrew Udvare
47a8587915 [younow] Add extractor 2017-10-29 04:17:03 +07:00
Sergey M․
8e01f3ca81 [dctptv] Fix extraction (closes #14599) 2017-10-28 22:58:01 +07:00
Sergey M․
f2332f18e6 [youtube] Restrict embed regex (#14600) 2017-10-27 22:26:43 +07:00
Sergey M․
7c1f419341 [vimeo] Restrict iframe embed regex (closes #14600) 2017-10-27 22:21:47 +07:00
Sergey M․
30e6161799 [soundgasm] Improve extraction (closes #14588) 2017-10-26 23:16:16 +07:00
Alex Seiler
dc24a7d4a2 [myvideo] Remove extractor (closes #8557)
Redirects to store.maxdome.de
2017-10-25 23:27:55 +07:00
Logan B
d673ab6562 [nbc] Add support for classic-tv videos 2017-10-25 23:23:27 +07:00
Sergey M․
b8c6ffc518 [vrtnu] Add support for cookies authentication and simplify (#11873) 2017-10-25 23:21:51 +07:00
mrBliss
7913e0fca7 [canvas] Add support for vrt.be/vrtnu (closes #11873) 2017-10-25 23:17:28 +07:00
J.D. Purcell
cdd1ce92c4 [twitch:clips] Fix title extraction 2017-10-23 23:12:50 +07:00
rawcoder
55c727a547 [ndtv] Add support for sub-sites 2017-10-22 08:32:20 +07:00
Sergey M․
36e2d3ca43 [dramafever] Fix login error message extraction 2017-10-22 08:16:30 +07:00
Sergey M․
f7a5038305 [travis] Disable IRC notifications 2017-10-22 02:46:28 +07:00
Sergey M․
9ff6273cae [nickru] Add support for more sites 2017-10-22 01:51:01 +07:00
Sergey M․
f03ee0b372 [nickde] Add support for nickelodeon.be 2017-10-22 01:42:44 +07:00
Sergey M․
cf6bda312b [nickde] Add support for nick.ch 2017-10-22 01:30:35 +07:00
Alex Seiler
3ebbd9991e [nick] Add support for more nickelodeon sites (closes #14553) 2017-10-22 01:26:58 +07:00
Sergey M․
21ce434051 [travis] Enable IRC notifications
Let's see how is it verbose now
2017-10-21 02:14:25 +07:00
Sergey M․
5c0e5bc4df [README.md] Add build status bagde 2017-10-21 02:11:11 +07:00
Sergey M․
9a9de2d7b2 [travis] Allow download tests to fail and fast finish 2017-10-21 01:58:45 +07:00
Alex Seiler
424505df76 [azmedien] Fix test 2017-10-21 01:10:56 +07:00
Sergey M․
fa3f0fd856 release 2017.10.20 2017-10-20 23:40:25 +07:00
Sergey M․
c9dcd4b0c5 [ChangeLog] Actualize 2017-10-20 23:37:55 +07:00
Alex Seiler
fc5c47d13c [parliamentliveuk] Fix extraction (closes #14524) 2017-10-20 23:31:13 +07:00
Sergey M․
a26a3c6d34 [soundcloud] Update client id (closes #14546) 2017-10-20 21:43:34 +07:00
Felix Yan
382fa456ea [ChangeLog] Fix typo 2017-10-19 23:36:32 +07:00
Alex Seiler
e1d168e592 [servus] Add extractor (closes #14362) 2017-10-19 22:17:20 +07:00
Parmjit Virk
ca1c9f26fa [unity] Add extractor (fixes #14528) 2017-10-19 04:46:06 +07:00
Sergey M․
6f3b4a98c9 [downloader/fragment] Report warning instead of error on inconsistent download state 2017-10-17 22:53:34 +07:00
Remita Amine
fa4bc6e712 [youtube] replace youtube redirect urls in description(fixes #14517) 2017-10-17 10:07:37 +00:00
Remita Amine
6b9cbd023f [pbs] restrict direct video url regex(fixes #14519) 2017-10-17 09:23:11 +00:00
Yen Chi Hsuan
c233003afe [megaphone] Fix deprecated escape sequence 2017-10-17 15:39:06 +08:00
Sergey M․
83fcf19e2d [drtv] Respect preference for direct http formats (#14509) 2017-10-16 05:48:45 +07:00
Sergey M․
acc4ea6237 [eporner] Add support for embed URLs (closes #14507) 2017-10-16 05:11:25 +07:00
Sergey M․
8cc1840ccb [arte] Capture and output error message 2017-10-15 22:12:34 +07:00
Sergey M․
a9ee4f6e49 [downloader/hls] Fix total fragments count when ad fragments exist 2017-10-15 11:03:54 +07:00
Pawit Pornkitprasan
aaab8c5e71 [niconico] Improve uploader metadata extraction robustness (closes #14135) 2017-10-15 10:40:57 +07:00
Sergey M․
7e721e35da release 2017.10.15.1 2017-10-15 06:16:41 +07:00
Sergey M․
bd7e1406b3 [ChangeLog] Actualize 2017-10-15 06:15:37 +07:00
Sergey M․
74c42d9ec3 [downloader/hls] Ignore anvato ad fragments (closes #14496) 2017-10-15 06:13:48 +07:00
Sergey M․
5efaf43c93 [downloader/fragment] Output ad fragment count 2017-10-15 06:13:07 +07:00
Sergey M․
4827270526 [scrippsnetworks:watch] Bypass geo restriction 2017-10-15 06:11:35 +07:00
Sergey M․
ee093a0ea0 [anvato] Add ability to bypass geo restriction 2017-10-15 06:11:02 +07:00
Sergey M․
9bb2c7673e [redditr] Fix extraction for URLs with query (closes #14495) 2017-10-15 03:38:34 +07:00
Sergey M․
715534083d release 2017.10.15 2017-10-15 02:26:58 +07:00
Sergey M․
ee88c1cbc6 [ChangeLog] Actualize 2017-10-15 02:26:10 +07:00
Sergey M․
57eb45b111 [scrippsnetworks:watch] Add support for geniuskitchen.com 2017-10-15 02:01:16 +07:00
Sergey M․
b21ab85088 [scrippsnetworks:watch] Fix extraction (closes #14389) 2017-10-15 01:57:43 +07:00
Sergey M․
210a2720bc [anvato] Process master m3u8 manifests
>>> Individual m3u8 manifests are not always present, e.g. anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:4173834
2017-10-15 01:44:57 +07:00
Sergey M․
685e87b61f [youtube] Fix relative URLs in description 2017-10-14 20:26:52 +07:00
Remita Amine
c9bd503e7d [spike] bypass geo restriction 2017-10-13 08:41:57 +00:00
Remita Amine
94a530c6cb [howstuffworks] add support for more domains 2017-10-12 19:03:47 +00:00
Remita Amine
e650659b94 [infoq] fix http format downloading 2017-10-12 17:39:51 +00:00
Remita Amine
2637fadc38 [generic] fix some of the tests 2017-10-12 16:14:43 +00:00
Remita Amine
50d808f5c9 [common] add support for jwplayer youtube embeds 2017-10-12 16:12:47 +00:00
Remita Amine
7a64c33aee [rtlnl] add support for another type of embeds 2017-10-12 16:09:06 +00:00
Remita Amine
b0def2c297 [onionstudios] add support for bulbs-video embeds 2017-10-12 16:05:25 +00:00
Remita Amine
81ce479f4d [udn] fix extraction 2017-10-12 16:04:41 +00:00
Remita Amine
414e709405 [shahid] fix extraction(fixes #14448) 2017-10-12 09:20:39 +00:00
Yen Chi Hsuan
645ed3e7c9 [ChangeLog] Update after #14471
[skip ci]
2017-10-12 12:12:37 +08:00
nyuszika7h
c0bddd6d65 [kaltura] Ignore Widevine encrypted video (.wvm)
There is currently no public method to decrypt this, and there may be
other streams available that can be downloaded.

Example URL, has `.wvm` and `.mp4` formats:
https://www.voot.com/shows/bigg-boss-s11/11/538936/bigg-boss-extra-dose-arshi-s-quirky-demand/541700
2017-10-12 12:09:58 +08:00
Yen Chi Hsuan
1baba7f4a8 [vh1] Adding coding cookie 2017-10-12 12:02:26 +08:00
Remita Amine
344d1a6794 [vh1] fix extraction(fixes #9613) 2017-10-11 20:52:14 +00:00
Sergey M․
76581082f6 release 2017.10.12 2017-10-12 01:06:28 +07:00
Sergey M․
2f0eb0a68a [ChangeLog] Actualize 2017-10-12 01:05:14 +07:00
Remita Amine
7fee3377dc [steam] fix extraction(fixes #14067) 2017-10-11 17:50:08 +00:00
Sergey M․
ff3f1a62f0 [funk] Add extractor (closes #14464) 2017-10-12 00:44:13 +07:00
Sergey M․
694b61545c [nexx] Add support for shortcuts and relax domain id extraction 2017-10-12 00:41:20 +07:00
Sergey M․
af0f74288d [YoutubeDL] Improve _default_format_spec (closes #14461) 2017-10-11 23:48:05 +07:00
Remita Amine
9e38dbb19c [voxmedia] add support for recode.net(fixes #14173) 2017-10-11 15:50:20 +00:00
Remita Amine
782195a9d4 [once] add support for vmap urls 2017-10-11 15:50:20 +00:00
Sergey M․
26bae2d965 [generic] Add support for channel9 embeds (closes #14469) 2017-10-11 21:59:30 +07:00
Remita Amine
5fe75f976f [tva] fix extraction(fixes #14328) 2017-10-11 14:15:52 +00:00
Remita Amine
4fe4bda287 [tubitv] add support for new url format(fixes #14460) 2017-10-11 11:36:05 +00:00
Remita Amine
cdab1df912 [afreecatv] remove AfreecaTVGlobalIE
the website now show this message
> Global AfreecaTV will be merged and integrated on July 20th, 2017.
Every user around the world are now able to interact with one another on
www.afreecatv.com!
2017-10-11 10:04:46 +00:00
Yen Chi Hsuan
dfc80bdd2e [ChangeLog] Update after #14420 2017-10-11 02:03:00 +08:00
Khang Nguyen
04af3aca04 Remove YoutubeSharedVideoIE https://github.com/rg3/youtube-dl/issues/14303 2017-10-11 02:01:18 +08:00
Jakub Wilk
d0f2d64114 [slideslive] Add extractor (closes #2680) 2017-10-10 23:45:10 +07:00
Yen Chi Hsuan
01c742ecd0 [facebook] Support thumbnails (closes #14416) 2017-10-10 23:20:38 +08:00
Silvan Mosberger
9e71f88105 [vvvvid] Fix typo 2017-10-10 03:48:26 +07:00
Sergey M․
ae5af89079 [hrti:playlist] Relax _VALID_URL 2017-10-09 23:52:39 +07:00
Sergey M․
197224b7a4 Fix some regexes 2017-10-09 23:50:53 +07:00
Sergey M․
8992331621 [wdr] Relax media link regex (closes #14447) 2017-10-08 21:36:50 +07:00
Aleksandar Topuzović
b0dde6686c [hrti] Relax _VALID_URL 2017-10-08 05:40:08 +07:00
Sergey M․
a22ccac1f0 [fox] Delegate to uplynk:preplay (#14147) 2017-10-08 01:34:17 +07:00
Sergey M․
8b561bfc9d [youtube] Add support for hooktube.com (closes #14437) 2017-10-07 21:59:04 +07:00
Sergey M․
8e751a185c release 2017.10.07 2017-10-07 05:02:53 +07:00
Sergey M․
3fc8f5b7c2 [ChangeLog] Actualize 2017-10-07 05:01:38 +07:00
Sergey M․
665f42d8c1 [reddit] Sort formats (closes #14430) 2017-10-07 01:40:00 +07:00
Sergey M․
e952847541 [PULL_REQUEST_TEMPLATE.md] Add explicit entry on flake8 2017-10-07 00:58:19 +07:00
remis
b1a7bf44b9 [lnkgo] Relax _VALID_URL 2017-10-06 23:59:09 +07:00
Jalaz Kumar
2e2a8e97d5 [pornflip] Extend _VALID_URL (closes #14405) 2017-10-06 23:56:31 +07:00
Sergey M․
ac93c09ab2 [xtube] Add support for embedded URLs (closes #14417) 2017-10-06 23:53:32 +07:00
Sergey M․
cd6fc19ed7 [YoutubeDL] Ignore duplicates in --playlist-items
E.g. '--playlist-items 2-4,3-4,3' should result in '[2,3,4]', not '[2,3,4,3,4,3]'
2017-10-06 23:50:34 +07:00
Sergey M․
86a15ed64b [test_YoutubeDL] Add test for #14425 2017-10-06 23:41:28 +07:00
Sergey M․
7e85e8729f [YoutubeDL] Fix out of range --playlist-items for iterable playlists and reduce code duplication (closes #14425) 2017-10-06 23:34:46 +07:00
Sergey M․
6be08ce602 [utils] Use in OnDemandPagedList by default
Not using cache results in redundant network I/O due to downloading the same pages while using --playlist-items n-m
2017-10-06 23:13:53 +07:00
Sergey M․
cf5f6ed5be [xvideos] Add support for embed URLs and improve extraction (closes #14409) 2017-10-05 00:27:24 +07:00
Philipp Hagemeister
6b46285e85 [comedycentral] new shortcut :theopposition for "The Opposition" show 2017-10-04 07:45:13 +02:00
Sergey M․
6e736d86e7 [beeg] Fix extraction (closes #14403) 2017-10-04 04:27:42 +07:00
M.K
c110944fa2 [extractor/common] Fix typo in _parse_mpd_formats 2017-10-04 03:50:27 +07:00
Sergey M․
9524dca3ac [README.md] Use revision bound link to YoutubeDL options (closes #14401) 2017-10-04 02:53:20 +07:00
Jakub Wilk
3e4cedf9e8 [tvn24] Relax _VALID_URL 2017-10-03 23:28:13 +07:00
remitamine
bfd484ccff Merge pull request #14392 from snipem/nbc-fix
Fix for JSON meta data download(closes #13651)
2017-10-03 14:49:55 +00:00
Matthias Küch
b7e14f06a4 Fix for JSON meta data download
Added fixes according to #13651 and user @remitamine
2017-10-03 15:17:28 +02:00
Sergey M․
d2ae7e24e5 [postprocessor/ffmpeg] Convert to opus using libopus (closes #14381) 2017-10-02 04:43:25 +07:00
Sergey M․
544ffb7790 [ketnet] Add support for videos without direct sources (closes #14377) 2017-10-02 04:15:12 +07:00
Sergey M․
117589dfa2 [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een 2017-10-02 04:14:36 +07:00
Sergey M․
839728f5bf [afreecatv] Add support for adult videos (closes #14376) 2017-10-02 03:28:25 +07:00
Sergey M․
fcdd37d053 release 2017.10.01 2017-10-01 21:54:11 +07:00
Sergey M․
1dd126180e [ChangeLog] Actualize 2017-10-01 21:45:56 +07:00
Rafal Borczuch
4e599194d6 [tvp] Add support for new URL schema (closes #14368) 2017-10-01 18:59:00 +07:00
Sergey M․
c5b7014a9c [generic] Add support for single format Video.js embeds (closes #14371) 2017-10-01 07:01:42 +07:00
Sergey M․
c8da40d834 [yahoo] Bypass geo restriction for brightcove (#14210) 2017-10-01 04:49:27 +07:00
Sergey M․
b69ca0ccfc [yahoo] Use extracted brightcove account id (closes #14210) 2017-10-01 04:37:42 +07:00
Giuseppe Fabiano
2c53bd51c6 [rtve:alacarta] Fix extraction (closes #14290) 2017-10-01 03:21:17 +07:00
Sergey M․
3836b02ce8 [YoutubeDL] PEP 8 2017-09-30 22:56:40 +07:00
Sergey M․
fa3fdeb41f [yahoo] Fix some tests 2017-09-30 22:54:22 +07:00
Sergey M․
eb9a15be60 [yahoo] Add support for custom brigthcove embeds (closes #14210) 2017-09-30 22:47:03 +07:00
Sergey M․
3600fd591d [YoutubeDL] Document youtube_include_dash_manifest 2017-09-28 00:46:48 +07:00
Sergey M․
63d990d285 [generic] Add support for Video.js embeds 2017-09-28 00:37:30 +07:00
Timendum
b14b2283a0 [gfycat] Add support for /gifs/detail URLs (closes #14322) 2017-09-27 22:48:47 +07:00
Sergey M․
02d01e15f1 [generic] Fix infinite recursion for twitter:player URLs (closes #14339) 2017-09-26 21:47:18 +07:00
Sergey M․
db96252831 [xhamsterembed] Fix extraction (closes #14308) 2017-09-24 19:23:08 +07:00
Sergey M․
8b389f7e3c Credit the author of multiple generic HTML5 embeds fix 2017-09-24 18:21:38 +07:00
Sergey M․
9fc41bcb6b release 2017.09.24 2017-09-24 00:22:50 +07:00
Sergey M․
10cab6613f [ChangeLog] Actualize 2017-09-24 00:21:34 +07:00
Sergey M․
4d182955a2 [kakao] Fix _VALID_URL 2017-09-24 00:19:27 +07:00
Sergey M․
011da618bd [openload] Fix _load_cookies for python 2.6 2017-09-24 00:12:40 +07:00
Sergey M․
4c54b89e03 Hide experimental phantomjs wrapper 2017-09-24 00:08:27 +07:00
Sergey M․
a87d7b4953 Credit @nbppp2 for americastestkitchen (#13996) 2017-09-23 23:27:28 +07:00
Sergey M․
2f3933aa1e Credit @ishitatsuyuki for mixcloud fix (#14132) 2017-09-23 23:26:35 +07:00
Sergey M․
aab20aabfc Credit @jdong92 for voot (#14059) 2017-09-23 23:23:27 +07:00
Sergey M․
16f54d0751 Credit @codeasashu for voot (#11814) 2017-09-23 23:20:20 +07:00
Sergey M․
07d1344c85 Credit @coreynicholson for vlive:playlist (#13613) 2017-09-23 23:16:27 +07:00
Sergey M․
47b5dfb047 Credit @luboss for joj (#13268) 2017-09-23 23:14:41 +07:00
Sergey M․
e3440d824a [24video] Fix timestamp extraction and make non fatal (#14295) 2017-09-23 07:46:53 +07:00
Sergey M․
136507b39a [24video] Add support for 24video.adult (closes #14295) 2017-09-23 07:41:22 +07:00
Sergey M․
7f4921b38d [heise] PEP 8 2017-09-23 07:28:29 +07:00
Sergey M․
f70ddd4aeb [kakao] Improve (closes #14007) 2017-09-23 07:28:24 +07:00
Namnamseo
1c22d7a7f3 [kakao] Add extractor (closes #12298) 2017-09-23 07:28:19 +07:00
Giuseppe Fabiano
5c1452e8f1 [twitter] Add support for user_id-less URLs (closes #14270) 2017-09-23 06:38:09 +07:00
Sergey M․
4bb58fa118 [americastestkitchen] Improve (closes #13996) 2017-09-23 06:29:20 +07:00
Dan Weber
13de91c9e9 [americastestkitchen] Add extractor (closes #10764) 2017-09-23 06:29:07 +07:00
kayb94
9ce1ac4046 [generic] Fix support for multiple HTML5 videos on one page (closes #14080) 2017-09-23 05:49:48 +07:00
Sergey M․
095774e591 [mixcloud] Improve and simplify (closes #14132) 2017-09-23 05:37:03 +07:00
Tatsuyuki Ishi
2384f5a64e [mixcloud] Fix extraction (closes #14088) 2017-09-23 05:36:57 +07:00
Yen Chi Hsuan
8c2895305d [options] Accept lrc as a subtitle conversion target format (closes #14292) 2017-09-23 02:30:03 +08:00
Sergey M․
8c6919e433 [lynda] Add support for educourse.ga (closes #14286) 2017-09-21 23:00:35 +07:00
Giuseppe Fabiano
f6ff52b473 [beeg] Fix extraction (closes #14275) 2017-09-21 04:05:33 +07:00
Parmjit Virk
12ea5c79fb [nbcsports:vplayer] Correct theplatform URL (closes #13873) 2017-09-21 02:53:06 +07:00
capital-G
3b65a6fbf3 [twitter] Fix duration extraction 2017-09-20 03:58:06 +07:00
Sergey M․
dc76eef092 [tvplay] Bypass geo restriction 2017-09-20 00:00:04 +07:00
Kareem Moussa
8a1a60d173 [devscripts/check-porn] Fix gettestcases import 2017-09-19 22:51:20 +07:00
kayb94
4d8c4b46d5 [heise] Add support for YouTube embeds 2017-09-17 22:46:52 +07:00
Sergey M․
9c2a17f2ce [popcorntv] Add extractor (closes #5914, closes #14211) 2017-09-17 22:19:57 +07:00
Yen Chi Hsuan
4ed2d7b7d1 Fix flake8 issues after #14225 2017-09-17 13:53:04 +08:00
Vijay Singh
8251af63a1 [viki] Update app data (closes #14181) 2017-09-16 22:45:23 +07:00
Windom
790d379e4d [morningstar] Relax _VALID_URL 2017-09-16 22:39:46 +07:00
Yen Chi Hsuan
3869028ffb [utils] Use bytes-like objects in dfxp2srt
This fixes handling of non-UTF8 TTML subtitles

Closes #14191
2017-09-16 12:18:38 +08:00
Yen Chi Hsuan
68d43a61b5 Ignore TTML subtitles 2017-09-16 12:14:48 +08:00
Yen Chi Hsuan
a88d461dff Merge pull request #14225 from Tithen-Firion/openload-phantomjs-method
Openload phantomjs method
2017-09-16 02:28:28 +08:00
Sergey M․
a4245acef8 [noovo] Fix extraction (closes #14214) 2017-09-15 23:12:19 +07:00
Sergey M․
6be44a50ed [dailymotion:playlist] Relax _VALID_URL (closes #14219) 2017-09-15 22:25:38 +07:00
Sergey M․
b763e1d68c [twitch] Add support for go.twitch.tv URLs (closes #14215) 2017-09-15 22:18:38 +07:00
Sergey M․
cbf85239bb [vgtv] Relax _VALID_URL (closes #14223) 2017-09-15 22:13:30 +07:00
Sergey M․
159d304a9f release 2017.09.15 2017-09-15 21:48:06 +07:00
Sergey M․
86e55e317c [ChangeLog] Actualize 2017-09-15 21:45:18 +07:00
Sergey M․
c46680fb2a [condenast] Fix extraction (closes #14196, closes #14207) 2017-09-15 02:01:17 +07:00
Philipp Hagemeister
fad9fc537d [tv4] fix a test URL 2017-09-14 20:47:23 +02:00
Philipp Hagemeister
0732a90579 [orf] Add new extractor for f4m stories 2017-09-14 20:37:46 +02:00
Sergey M․
319fc70676 [tv4] Relax _VALID_URL (closes #14206) 2017-09-14 23:50:19 +07:00
Sergey M․
e7c3e33456 [downloader/fragment] Restart inconsistent incomplete fragment downloads (#13731) 2017-09-14 23:19:53 +07:00
Yen Chi Hsuan
757984af90 Merge pull request #12909 from remitamine/raw-sub
[YoutubeDL] write raw subtitle files
2017-09-13 17:36:40 +08:00
Sergey M․
2f483758bc [animeondemand] Improve and modernize 2017-09-11 04:32:35 +07:00
Sergey M․
018cc61549 [animeondemand] Bypass geo restriction 2017-09-11 04:23:42 +07:00
Sergey M․
2709d9fa28 [animeondemand] Add support for flash videos (closes #9944) 2017-09-11 04:23:42 +07:00
Sergey M․
7dacceae75 release 2017.09.11 2017-09-11 03:30:33 +07:00
Sergey M․
43df248f10 [ChangeLog] Actualize 2017-09-11 03:27:43 +07:00
Sergey M․
f12a6e88b2 [rutube:playlist] Fix suitable (closes #14166) 2017-09-11 03:23:00 +07:00
Sergey M․
806498cf2f release 2017.09.10 2017-09-10 22:16:55 +07:00
Sergey M․
b98339b54b [ChangeLog] Actualize 2017-09-10 22:15:55 +07:00
Sergey M․
bf6ec2fea9 [fox] Fix extraction (#14147) 2017-09-10 22:08:32 +07:00
Sergey M․
c3dd44e085 [rutube] Use bool_or_none 2017-09-10 19:09:27 +07:00
Sergey M․
c7e327c4d4 [utils] Introduce bool_or_none 2017-09-10 19:08:39 +07:00
Sergey M․
48b813748d [rutube] Rework and generalize playlist extractors (closes #13565) 2017-09-10 18:40:33 +07:00
luceatnobis
debed8d759 [rutube:playlist] Add extractor (closes #13534) 2017-09-10 18:40:33 +07:00
kayb94
51aee72d16 [README.md] Clarify how to run extractor specific test cases 2017-09-08 22:13:17 +07:00
Olivier Bilodeau
931edb2ada [radiocanada] Add fallback for title extraction 2017-09-08 21:53:24 +07:00
Sergey M․
5113b69124 [abcnews,chilloutsoze,cracked,vice,vk] Use dedicated YouTube embeds extraction routines 2017-09-06 00:50:25 +07:00
Sergey M․
66c9fa36c1 [youtube] Separate methods for embeds extraction 2017-09-06 00:48:37 +07:00
Sergey M․
c5c9bf0c12 [YoutubeDL] Ensure dir existence for each requested format (closes #14116) 2017-09-05 23:31:34 +07:00
Sergey M․
880fa66f4f [redtube] Fix formats extraction (closes #14122) 2017-09-05 22:45:49 +07:00
Sergey M․
6348671c4a [arte] Relax unavailability check (closes #14112) 2017-09-04 23:08:40 +07:00
Sergey M․
efc57145c1 [manyvids] Improve (closes #14059) 2017-09-03 17:32:23 +07:00
John D
e9b865267a [manyvids] Add support for preview videos (closes #14053) 2017-09-03 17:31:53 +07:00
Sergey M․
bc35f07537 [vidme:user] Make tests only matching (closes #14054) 2017-09-03 17:03:51 +07:00
theychx
0b4a8eb3ac [vidme:user] Relax _VALID_URLs 2017-09-03 17:03:45 +07:00
Sergey M․
c1c1585b31 [bpb] Improve (closes #14086) 2017-09-03 16:43:33 +07:00
Timendum
0cbb841ba9 [bpb] Fix extraction (closes #14043) 2017-09-03 16:39:12 +07:00
Sergey M
d7c7100e3d [soundcloud] Simplify and add test (closes #14093) 2017-09-03 16:29:58 +07:00
Tatsuyuki Ishi
73602bcd0c [soundcloud] Fix download URL with private tracks 2017-09-03 16:28:34 +07:00
Sergey M․
23b2df82c7 [aliexpress:live] Fix issues (closes #13698, closes #13707) 2017-09-03 16:05:31 +07:00
dubber0
503115540d [aliexpress:live] Add extractor 2017-09-03 16:05:00 +07:00
Sergey M․
64f0e30b93 [viidea] Capture and output lecture error message (#14099) 2017-09-02 15:44:49 +07:00
Sergey M․
a3431e1224 [radiocanada] Skip unsupported platforms (closes #14100) 2017-09-02 15:33:54 +07:00
Sergey M․
a2022b0c40 release 2017.09.02 2017-09-02 01:08:32 +07:00
Sergey M․
8681ed7fc8 [ChangeLog] Actualize 2017-09-02 01:04:22 +07:00
Sergey M․
8d81f3e36d [youtube] Force old layout for each webpage (closes #14083) 2017-09-02 00:58:19 +07:00
Sergey M․
7998520933 [youtube] Fix upload date extraction (closes #14065) 2017-08-31 00:47:58 +07:00
Sergey M․
5b4bfbfc3b [charlierose] Add support for episodes (closes #14062) 2017-08-30 23:50:33 +07:00
Sergey M․
53647dfd0a [bbccouk] Add support for w-prefixed ids (closes #14056) 2017-08-30 05:27:56 +07:00
Yen Chi Hsuan
22f65a9efc Merge pull request #14048 from ryandesign/patch-1
Fix build failures with old cp and zip
2017-08-28 11:22:27 +08:00
Ryan Schmidt
c75c384fb6 Fix build failures with old cp and zip 2017-08-27 18:07:09 -05:00
Sergey M․
1b41da488d [googledrive] Extend _VALID_URL (closes #9785) 2017-08-28 00:50:41 +07:00
Sergey M․
fea82c1780 [googledrive] Add support for source format (closes #14046) 2017-08-28 00:39:22 +07:00
Sergey M․
3902cdd0e3 [pornhd] Fix extraction (closes #14005) 2017-08-27 22:37:26 +07:00
Sergey M․
2cfa7cbdd0 release 2017.08.27.1 2017-08-27 06:09:29 +07:00
Sergey M․
cc0412ef91 [ChangeLog] Actualize 2017-08-27 06:06:49 +07:00
Sergey M․
1c9c8de29e [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (closes #14037) 2017-08-27 06:06:39 +07:00
Sergey M․
f031b76065 release 2017.08.27 2017-08-27 04:28:04 +07:00
Sergey M․
62c06c593d [ChangeLog] Actualize 2017-08-27 04:27:19 +07:00
Sergey M․
ff17be3ac9 [extractor/generic] Extract from LD-JSON last of all
Previous sources may contain several formats, e.g. http://tamasha.com/v/PgGZ
2017-08-27 03:31:40 +07:00
Sergey M․
1ed4549942 [extractor/common] Extract format id from label attribute of source tag for HTML5 videos (#14034) 2017-08-27 03:27:05 +07:00
Sergey M․
dd121cc1ca [extractor/common] Extract height from res attribute of source tag for HTML5 videos (closes #14034) 2017-08-27 03:12:56 +07:00
Sergey M․
a3c3a1e128 [http] Rework HTTP downloader
* Simplify code and split into separate routines to facilitate maintaining
* Make retry mechanism work on errors during actual download not only during connection establishment phase
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
* Retry on content too short and various timeout errors
* Show error description on retry
* Closes #506, closes #809, closes #2849, closes #4240, closes #6023, closes #8625, closes #9483
2017-08-27 02:22:30 +07:00
Sergey M․
085d9dd9be [rai] Fix audio formats extraction (closes #14024) 2017-08-26 22:02:49 +07:00
Vijay Singh
151978f38a [mixcloud] Fix extraction (closes #14020) 2017-08-26 19:32:57 +07:00
Sergey M․
c7121fa7b8 [youtube] Fix controversy videos extraction (closes #14027, closes #14029) 2017-08-26 15:38:38 +07:00
Vijay Singh
745968bc72 [mixcloud] Fix extraction (closes #14015) 2017-08-24 22:28:44 +07:00
Sergey M․
df235dbba8 release 2017.08.23 2017-08-23 23:23:13 +07:00
Sergey M․
c4bdc68113 [ChangeLog] Actualize 2017-08-23 23:21:19 +07:00
Sergey M․
5bae33485c [toutv] PEP 8 2017-08-23 22:50:00 +07:00
Sergey M․
0830f3e048 [cbc:watch] Bypass geo-restriction (closes #13993) 2017-08-23 22:45:45 +07:00
Sergey M․
8d7a24aff6 [toutv] Relax DRM check (closes #13994) 2017-08-23 22:28:09 +07:00
Sergey M․
37d9af306a [googledrive] Simplify and carry long lines (#13638) 2017-08-23 00:33:53 +07:00
Sergey M․
e01c3d2ef7 [extractor/common] Introduce _parse_xml 2017-08-23 00:32:41 +07:00
Parmjit Virk
05915e379a [googledrive] Add support for subtitles (fixes #13619) 2017-08-22 23:48:59 +07:00
Yen Chi Hsuan
7b67b60773 Merge pull request #13669 from bmwiedemann/master
[build] Override timestamps in zip file
2017-08-22 21:51:20 +08:00
Sergey M․
8d9c2a681a [pornhub] Relax uploader regex (closes #13906, closes #13975) 2017-08-21 23:06:27 +07:00
Alan Yee
903d4d1625 [README.md] Switch to HTTPS URLs 2017-08-20 23:35:39 +07:00
Luca Steeb
8239c6791a [bandcamp:album] Extract track titles 2017-08-20 23:32:33 +07:00
Sergey M․
b359e977b9 [extractor/common] Make HLS and DASH extraction non fatal in _parse_html5_media_entries (closes #13970) 2017-08-20 14:16:58 +07:00
Bernhard M. Wiedemann
305d99f0bd [build] Override timestamps in zip file
to make build reproducible.
See https://reproducible-builds.org/ for why this is good

Copying files to not interfere with freshness detection.
2017-08-19 21:43:48 +02:00
Sergey M․
d3d45e0a45 [bbccouk] Add support for events URLs (closes #13893) 2017-08-19 23:54:15 +07:00
Yen Chi Hsuan
381ad4f309 [liveleak] Support multi-video pages (closes #6542) 2017-08-19 22:48:00 +08:00
Yen Chi Hsuan
e2481b9b6e [ChangeLog] Fix 2017-08-19 22:28:58 +08:00
Yen Chi Hsuan
09747ba766 [liveleak] Support another liveleak embedding pattern (closes #13336) 2017-08-19 22:28:13 +08:00
Yen Chi Hsuan
f8f18f332f [cda] Fix extraction (closes #13935) 2017-08-19 21:44:47 +08:00
Yen Chi Hsuan
95f3f7c20a [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935) 2017-08-19 21:40:53 +08:00
Sergey M․
f5469da9e6 [laola1tv] Add support for tv.ittf.com (closes #13965) 2017-08-19 19:48:20 +07:00
Sergey M․
d14d9d8903 [mixcloud] Fix extraction (closes #13958) 2017-08-18 23:31:42 +07:00
Sergey M․
ea004d34f8 release 2017.08.18 2017-08-18 01:05:27 +07:00
Sergey M․
2738965d98 [ChangeLog] Actualize 2017-08-18 01:03:20 +07:00
Sergey M․
4a91910365 [qqmusic:toplist] PEP 8 2017-08-18 01:00:07 +07:00
Sergey M․
c0892b2b46 [arte] Detect unavailable videos (closes #13945) 2017-08-18 00:58:23 +07:00
Sergey M․
a5ac0c4755 [YoutubeDL] Sanitize byte string format URLs (#13951) 2017-08-17 23:59:12 +07:00
Sergey M․
5551d7714d [generic] Convert redirect URLs to unicode strings (closes #13951) 2017-08-17 23:58:01 +07:00
Sergey M․
5f5c7b92dd [udemy] Fix paid course detection (#13943) 2017-08-17 23:14:46 +07:00
Sergey M․
93d0583e34 [pluralsight] Use RPC API for course extraction (closes #13937) 2017-08-17 22:45:40 +07:00
Yen Chi Hsuan
5d28169747 Credit Genki Sky for clippit (bfabd17b33) 2017-08-17 21:21:17 +08:00
Yen Chi Hsuan
7ddab7742c [ChangeLog] Add an entry for Genki Sky's patch 2017-08-17 16:56:37 +08:00
Genki Sky
bfabd17b33 Add new extractor 2017-08-17 16:56:06 +08:00
Yen Chi Hsuan
12f5304556 [ChangeLog] Add entry for #13805 2017-08-17 16:40:56 +08:00
Yen Chi Hsuan
25a6e769a1 [qqmusic] Fix tests and cleanup 2017-08-17 16:39:57 +08:00
Yen Chi Hsuan
d22b67f356 Merge pull request #13805 from gam2046/master
Fix QQ Music url changed
2017-08-17 16:11:35 +08:00
Sergey M․
a1aa659662 [periscope] Renew HLS extraction (closes #13917) 2017-08-16 23:03:42 +07:00
Sergey M․
4850478543 [extractor/common] Add support for float durations in _parse_mpd_formats (closes #13919) 2017-08-15 23:58:00 +07:00
forDream
134d85a7bd [qqmusic] review 2017-08-15 13:14:35 +08:00
forDream
5c037c0d1f [qqmusic]support QQMusicSingerIE 2017-08-15 13:14:35 +08:00
forDream
5d1bd3b907 [qqmusic]update valid url 2017-08-15 13:14:34 +08:00
forDream
19ada898dc fix QQ Music Url changed 2017-08-15 13:14:34 +08:00
Sergey M․
da20951a57 [mixcloud] Extract decrypt key 2017-08-14 22:39:05 +07:00
Sergey M․
16393d6535 release 2017.08.13 2017-08-13 08:58:30 +07:00
Sergey M․
4f049e4aa8 [ChangeLog] Actualize 2017-08-13 08:00:15 +07:00
Sergey M․
475bcb225f [pornhub:playlistbase] Skip videos from drop-down menu for all playlists (closes #12819, closes #13902) 2017-08-13 07:53:02 +07:00
Sergey M․
b3c6515365 [fourtube] Add support for other sites (closes #6022, closes #7859, closes #13901) 2017-08-13 07:23:29 +07:00
Sergey M․
eb02940cc7 [generic] Add test for #13895 2017-08-13 01:11:27 +07:00
Sergey M․
4ef9152428 [limelight] Improve embeds detection (closes #13895) 2017-08-13 00:58:39 +07:00
Sergey M․
0c43a481b9 [reddit] Add extractors (closes #13847) 2017-08-12 23:43:51 +07:00
Sergey M․
868f79db41 [extractor/common] Fix _media_formats 2017-08-12 19:24:26 +07:00
Sergey M․
70851a95c3 [aparat] Extract all formats (closes #13887) 2017-08-12 17:18:23 +07:00
Sergey M․
e74e3b63e3 [YoutubeDL] Make sure format id is not empty 2017-08-12 17:14:11 +07:00
Sergey M․
ac8491fcca [extractor/common] Make _family_friendly_search optional 2017-08-12 17:11:35 +07:00
Sergey M․
82889d4ae5 [extractor/common] Respect source's type attribute for HTML5 media (closes #13892) 2017-08-12 16:48:11 +07:00
Sergey M․
92a5c41532 [mixcloud] Fix play info decryption (closes #13885) 2017-08-12 16:30:50 +07:00
Sergey M․
1663bd6e1c [generic] Replace vzaar embed test 2017-08-11 22:02:00 +07:00
tetra-eder
41918eaa5c [generic] Add support for vzaar embeds 2017-08-11 22:00:39 +07:00
Tithen-Firion
feee8d32e4 [phantomjs] add exe version to debug info 2017-08-03 14:17:25 +02:00
Tithen-Firion
c89267d31a Merge branch 'master' into openload-phantomjs-method 2017-05-04 11:00:06 +02:00
Remita Amine
5ff1bc0cc1 [YoutubeDL] write raw subtitle files 2017-04-29 20:03:03 +01:00
Tithen-Firion
7552f96352 [openload] Add required version 2017-04-29 12:41:57 +02:00
Tithen-Firion
98f9d87381 [phantomjs] Add required version checking 2017-04-29 12:41:42 +02:00
Tithen-Firion
fcace2d1ad [openload] raise not found before executing js 2017-04-29 10:30:45 +02:00
Tithen-Firion
40e41780f1 [phantomjs] add cookie support 2017-04-25 15:12:54 +02:00
Tithen-Firion
da57ebaf84 [openload] separate PhantomJS code from extractor 2017-04-25 01:06:14 +02:00
Tithen-Firion
47e0cef46e [openload] rewrite extractor 2017-04-16 00:34:34 +02:00
317 changed files with 12554 additions and 4957 deletions

View File

@@ -6,12 +6,13 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.09**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.02.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.02.26**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
### What is the purpose of your *issue*?
- [ ] Bug report (encountered problems with youtube-dl)
@@ -35,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.08.09
[debug] youtube-dl version 2018.02.26
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -12,6 +12,7 @@
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
### What is the purpose of your *issue*?
- [ ] Bug report (encountered problems with youtube-dl)

View File

@@ -9,6 +9,7 @@
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)

1
.gitignore vendored
View File

@@ -22,6 +22,7 @@ cover/
updates_key.pem
*.egg-info
*.srt
*.ttml
*.sbv
*.vtt
*.flv

View File

@@ -7,16 +7,21 @@ python:
- "3.4"
- "3.5"
- "3.6"
- "pypy"
- "pypy3"
sudo: false
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
matrix:
include:
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
before_install:
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
script: ./devscripts/run_tests.sh
notifications:
email:
- filippo.valsorda@gmail.com
- yasoob.khld@gmail.com
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"
# skip_join: true

13
AUTHORS
View File

@@ -223,3 +223,16 @@ Jan Kundrát
Giuseppe Fabiano
Örn Guðjónsson
Parmjit Virk
Genki Sky
Ľuboš Katrinec
Corey Nicholson
Ashutosh Chaudhary
John Dong
Tatsuyuki Ishi
Daniel Weber
Kay Bouché
Yang Hongbo
Lei Wang
Petr Novák
Leonardo Taccari
Martin Weinelt

View File

@@ -3,7 +3,7 @@
$ youtube-dl -v <your command line>
[debug] System config: []
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
@@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
### Are you using the latest version?
@@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py
nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need
* python
@@ -118,7 +120,7 @@ After you have ensured this site is distributing its content legally, you can fo
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
@@ -149,10 +151,10 @@ After you have ensured this site is distributing its content legally, you can fo
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py

746
ChangeLog
View File

@@ -1,3 +1,745 @@
version 2018.02.26
Extractors
* [udemy] Use custom User-Agent (#15571)
version 2018.02.25
Core
* [postprocessor/embedthumbnail] Skip embedding when there aren't any
thumbnails (#12573)
* [extractor/common] Improve jwplayer subtitles extraction (#15695)
Extractors
+ [vidlii] Add support for vidlii.com (#14472, #14512, #14779)
+ [streamango] Capture and output error messages
* [streamango] Fix extraction (#14160, #14256)
+ [telequebec] Add support for emissions (#14649, #14655)
+ [telequebec:live] Add support for live streams (#15688)
+ [mailru:music] Add support for mail.ru/music (#15618)
* [aenetworks] Switch to akamai HLS formats (#15612)
* [ytsearch] Fix flat title extraction (#11260, #15681)
version 2018.02.22
Core
+ [utils] Fixup some common URL typos in sanitize_url (#15649)
* Respect --prefer-insecure while updating (#15497)
Extractors
* [vidio] Fix HLS URL extraction (#15675)
+ [nexx] Add support for arc.nexx.cloud URLs
* [nexx] Switch to arc API (#15652)
* [redtube] Fix duration extraction (#15659)
+ [sonyliv] Respect referrer (#15648)
+ [brightcove:new] Use referrer for formats' HTTP headers
+ [cbc] Add support for olympics.cbc.ca (#15535)
+ [fusion] Add support for fusion.tv (#15628)
* [npo] Improve quality metadata extraction
* [npo] Relax URL regular expression (#14987, #14994)
+ [npo] Capture and output error message
+ [pornhub] Add support for channels (#15613)
* [youtube] Handle shared URLs with generic extractor (#14303)
version 2018.02.11
Core
+ [YoutubeDL] Add support for filesize_approx in format selector (#15550)
Extractors
+ [francetv] Add support for live streams (#13689)
+ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103,
#15012)
* [francetv] Separate main extractor and rework others to delegate to it
* [francetv] Improve manifest URL signing (#15536)
+ [francetv] Sign m3u8 manifest URLs (#15565)
+ [veoh] Add support for embed URLs (#15561)
* [afreecatv] Fix extraction (#15556)
* [periscope] Use accessVideoPublic endpoint (#15554)
* [discovery] Fix auth request (#15542)
+ [6play] Extract subtitles (#15541)
* [newgrounds] Fix metadata extraction (#15531)
+ [nbc] Add support for stream.nbcolympics.com (#10295)
* [dvtv] Fix live streams extraction (#15442)
version 2018.02.08
Extractors
+ [myvi] Extend URL regular expression
+ [myvi:embed] Add support for myvi.tv embeds (#15521)
+ [prosiebensat1] Extend URL regular expression (#15520)
* [pokemon] Relax URL regular expression and extend title extraction (#15518)
+ [gameinformer] Use geo verification headers
* [la7] Fix extraction (#15501, #15502)
* [gameinformer] Fix brightcove id extraction (#15416)
+ [afreecatv] Pass referrer to video info request (#15507)
+ [telebruxelles] Add support for live streams
* [telebruxelles] Relax URL regular expression
* [telebruxelles] Fix extraction (#15504)
* [extractor/common] Respect secure schemes in _extract_wowza_formats
version 2018.02.04
Core
* [downloader/http] Randomize HTTP chunk size
+ [downloader/http] Add ability to pass downloader options via info dict
* [downloader/http] Fix 302 infinite loops by not reusing requests
+ Document http_chunk_size
Extractors
+ [brightcove] Pass embed page URL as referrer (#15486)
+ [youtube] Enforce using chunked HTTP downloading for DASH formats
version 2018.02.03
Core
+ Introduce --http-chunk-size for chunk-based HTTP downloading
+ Add support for IronPython
* [downloader/ism] Fix Python 3.2 support
Extractors
* [redbulltv] Fix extraction (#15481)
* [redtube] Fix metadata extraction (#15472)
* [pladform] Respect platform id and extract HLS formats (#15468)
- [rtlnl] Remove progressive formats (#15459)
* [6play] Do no modify asset URLs with a token (#15248)
* [nationalgeographic] Relax URL regular expression
* [dplay] Relax URL regular expression (#15458)
* [cbsinteractive] Fix data extraction (#15451)
+ [amcnetworks] Add support for sundancetv.com (#9260)
version 2018.01.27
Core
* [extractor/common] Improve _json_ld for articles
* Switch codebase to use compat_b64decode
+ [compat] Add compat_b64decode
Extractors
+ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616)
* [dplay] Bypass geo restriction
+ [dplay] Add support for disco-api videos (#15396)
* [youtube] Extract precise error messages (#15284)
* [teachertube] Capture and output error message
* [teachertube] Fix and relax thumbnail extraction (#15403)
+ [prosiebensat1] Add another clip id regular expression (#15378)
* [tbs] Update tokenizer url (#15395)
* [mixcloud] Use compat_b64decode (#15394)
- [thesixtyone] Remove extractor (#15341)
version 2018.01.21
Core
* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187)
* [utils] Improve scientific notation handling in js_to_json (#14789)
Extractors
+ [southparkdk] Add support for southparkstudios.nu
+ [southpark] Add support for collections (#14803)
* [franceinter] Fix upload date extraction (#14996)
+ [rtvs] Add support for rtvs.sk (#9242, #15187)
* [restudy] Fix extraction and extend URL regular expression (#15347)
* [youtube:live] Improve live detection (#15365)
+ [springboardplatform] Add support for springboardplatform.com
* [prosiebensat1] Add another clip id regular expression (#15290)
- [ringtv] Remove extractor (#15345)
version 2018.01.18
Extractors
* [soundcloud] Update client id (#15306)
- [kamcord] Remove extractor (#15322)
+ [spiegel] Add support for nexx videos (#15285)
* [twitch] Fix authentication and error capture (#14090, #15264)
* [vk] Detect more errors due to copyright complaints (#15259)
version 2018.01.14
Extractors
* [youtube] Fix live streams extraction (#15202)
* [wdr] Bypass geo restriction
* [wdr] Rework extractors (#14598)
+ [wdr] Add support for wdrmaus.de/elefantenseite (#14598)
+ [gamestar] Add support for gamepro.de (#3384)
* [viafree] Skip rtmp formats (#15232)
+ [pandoratv] Add support for mobile URLs (#12441)
+ [pandoratv] Add support for new URL format (#15131)
+ [ximalaya] Add support for ximalaya.com (#14687)
+ [digg] Add support for digg.com (#15214)
* [limelight] Tolerate empty pc formats (#15150, #15151, #15207)
* [ndr:embed:base] Make separate formats extraction non fatal (#15203)
+ [weibo] Add extractor (#15079)
+ [ok] Add support for live streams
* [canalplus] Fix extraction (#15072)
* [bilibili] Fix extraction (#15188)
version 2018.01.07
Core
* [utils] Fix youtube-dl under PyPy3 on Windows
* [YoutubeDL] Output python implementation in debug header
Extractors
+ [jwplatform] Add support for multiple embeds (#15192)
* [mitele] Fix extraction (#15186)
+ [motherless] Add support for groups (#15124)
* [lynda] Relax URL regular expression (#15185)
* [soundcloud] Fallback to avatar picture for thumbnail (#12878)
* [youku] Fix list extraction (#15135)
* [openload] Fix extraction (#15166)
* [lynda] Skip invalid subtitles (#15159)
* [twitch] Pass video id to url_result when extracting playlist (#15139)
* [rtve.es:alacarta] Fix extraction of some new URLs
* [acast] Fix extraction (#15147)
version 2017.12.31
Core
+ [extractor/common] Add container meta field for formats extracted
in _parse_mpd_formats (#13616)
+ [downloader/hls] Use HTTP headers for key request
* [common] Use AACL as the default fourcc when AudioTag is 255
* [extractor/common] Fix extraction of DASH formats with the same
representation id (#15111)
Extractors
+ [slutload] Add support for mobile URLs (#14806)
* [abc:iview] Bypass geo restriction
* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985,
#15035, #15057, #15061, #15071, #15095, #15106)
* [openload] Fix extraction (#15118)
- [sandia] Remove extractor
- [collegerama] Remove extractor
+ [mediasite] Add support for sites based on Mediasite Video Platform (#5428,
#11185, #14343)
+ [ufctv] Add support for ufc.tv (#14520)
* [pluralsight] Fix missing first line of subtitles (#11118)
* [openload] Fallback on f-page extraction (#14665, #14879)
* [vimeo] Improve password protected videos extraction (#15114)
* [aws] Fix canonical/signed headers generation on python 2 (#15102)
version 2017.12.28
Extractors
+ [internazionale] Add support for internazionale.it (#14973)
* [playtvak] Relax video regular expression and make description optional
(#15037)
+ [filmweb] Add support for filmweb.no (#8773, #10368)
+ [23video] Add support for 23video.com
+ [espn] Add support for fivethirtyeight.com (#6864)
+ [umg:de] Add support for universal-music.de (#11582, #11584)
+ [espn] Add support for espnfc and extract more formats (#8053)
* [youku] Update ccode (#14880)
+ [openload] Add support for oload.stream (#15070)
* [youku] Fix list extraction (#15065)
version 2017.12.23
Core
* [extractor/common] Move X-Forwarded-For setup code into _request_webpage
+ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in
output template (#11427, #15018)
+ [extractor/common] Introduce uploader, uploader_id and uploader_url
meta fields for playlists (#11427, #15018)
* [downloader/fragment] Encode filename of fragment being removed (#15020)
+ [utils] Add another date format pattern (#14999)
Extractors
+ [kaltura] Add another embed pattern for entry_id
+ [7plus] Add support for 7plus.com.au (#15043)
* [animeondemand] Relax login error regular expression
+ [shahid] Add support for show pages (#7401)
+ [youtube] Extract uploader, uploader_id and uploader_url for playlists
(#11427, #15018)
* [afreecatv] Improve format extraction (#15019)
+ [cspan] Add support for audio only pages and catch page errors (#14995)
+ [mailru] Add support for embed URLs (#14904)
* [crunchyroll] Future-proof XML element checks (#15013)
* [cbslocal] Fix timestamp extraction (#14999, #15000)
* [discoverygo] Correct TTML subtitle extension
* [vk] Make view count optional (#14979)
* [disney] Skip Apple FairPlay formats (#14982)
* [voot] Fix format extraction (#14758)
version 2017.12.14
Core
* [postprocessor/xattr] Clarify NO_SPACE message (#14970)
* [downloader/http] Return actual download result from real_download (#14971)
Extractors
+ [itv] Extract more subtitles and duration
* [itv] Improve extraction (#14944)
+ [byutv] Add support for geo restricted videos
* [byutv] Fix extraction (#14966, #14967)
+ [bbccouk] Fix extraction for 320k HLS streams
+ [toutv] Add support for special video URLs (#14179)
* [discovery] Fix free videos extraction (#14157, #14954)
* [tvnow] Fix extraction (#7831)
+ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893)
* [nick] Improve extraction (#14876)
* [tbs] Fix extraction (#13658)
version 2017.12.10
Core
+ [utils] Add sami mimetype to mimetype2ext
Extractors
* [culturebox] Improve video id extraction (#14947)
* [twitter] Improve extraction (#14197)
+ [udemy] Extract more HLS formats
* [udemy] Improve course id extraction (#14938)
+ [stretchinternet] Add support for portal.stretchinternet.com (#14576)
* [ellentube] Fix extraction (#14407, #14570)
+ [raiplay:playlist] Add support for playlists (#14563)
* [sonyliv] Bypass geo restriction
* [sonyliv] Extract higher quality formats (#14922)
* [fox] Extract subtitles
+ [fox] Add support for Adobe Pass authentication (#14205, #14489)
- [dailymotion:cloud] Remove extractor (#6794)
* [xhamster] Fix thumbnail extraction (#14780)
+ [xhamster] Add support for mobile URLs (#14780)
* [generic] Don't pass video id as mpd id while extracting DASH (#14902)
* [ard] Skip invalid stream URLs (#14906)
* [porncom] Fix metadata extraction (#14911)
* [pluralsight] Detect agreement request (#14913)
* [toutv] Fix login (#14614)
version 2017.12.02
Core
+ [downloader/fragment] Commit part file after each fragment
+ [extractor/common] Add durations for DASH fragments with bare SegmentURLs
+ [extractor/common] Add support for DASH manifests with SegmentLists with
bare SegmentURLs (#14844)
+ [utils] Add hvc1 codec code to parse_codecs
Extractors
* [xhamster] Fix extraction (#14884)
* [youku] Update ccode (#14872)
* [mnet] Fix format extraction (#14883)
+ [xiami] Add Referer header to API request
* [mtv] Correct scc extention in extracted subtitles (#13730)
* [vvvvid] Fix extraction for kenc videos (#13406)
+ [br] Add support for BR Mediathek videos (#14560, #14788)
+ [daisuki] Add support for motto.daisuki.com (#14681)
* [odnoklassniki] Fix API metadata request (#14862)
* [itv] Fix HLS formats extraction
+ [pbs] Add another media id regular expression
version 2017.11.26
Core
* [extractor/common] Use final URL when dumping request (#14769)
Extractors
* [fczenit] Fix extraction
- [firstpost] Remove extractor
* [freespeech] Fix extraction
* [nexx] Extract more formats
+ [openload] Add support for openload.link (#14763)
* [empflix] Relax URL regular expression
* [empflix] Fix extractrion
* [tnaflix] Don't modify download URLs (#14811)
- [gamersyde] Remove extractor
* [francetv:generationwhat] Fix extraction
+ [massengeschmacktv] Add support for Massengeschmack TV
* [fox9] Fix extraction
* [faz] Fix extraction and add support for Perform Group embeds (#14714)
+ [performgroup] Add support for performgroup.com
+ [jwplatform] Add support for iframes (#14828)
* [culturebox] Fix extraction (#14827)
* [youku] Fix extraction; update ccode (#14815)
* [livestream] Make SMIL extraction non fatal (#14792)
+ [drtuber] Add support for mobile URLs (#14772)
+ [spankbang] Add support for mobile URLs (#14771)
* [instagram] Fix description, timestamp and counters extraction (#14755)
version 2017.11.15
Core
* [common] Skip Apple FairPlay m3u8 manifests (#14741)
* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740)
Extractors
* [vshare] Capture and output error message
* [vshare] Fix extraction (#14473)
* [crunchyroll] Extract old RTMP formats
* [tva] Fix extraction (#14736)
* [gamespot] Lower preference of HTTP formats (#14652)
* [instagram:user] Fix extraction (#14699)
* [ccma] Fix typo (#14730)
- Remove sensitive data from logging in messages
* [instagram:user] Fix extraction (#14699)
+ [gamespot] Add support for article URLs (#14652)
* [gamespot] Skip Brightcove Once HTTP formats (#14652)
* [cartoonnetwork] Update tokenizer_src (#14666)
+ [wsj] Recognize another URL pattern (#14704)
* [pandatv] Update API URL and sign format URLs (#14693)
* [crunchyroll] Use old login method (#11572)
version 2017.11.06
Core
+ [extractor/common] Add protocol for f4m formats
* [f4m] Prefer baseURL for relative URLs (#14660)
* [extractor/common] Respect URL query in _extract_wowza_formats (14645)
Extractors
+ [hotstar:playlist] Add support for playlists (#12465)
* [hotstar] Bypass geo restriction (#14672)
- [22tracks] Remove extractor (#11024, #14628)
+ [skysport] Sdd support ooyala videos protected with embed_token (#14641)
* [gamespot] Extract formats referenced with new data fields (#14652)
* [spankbang] Detect unavailable videos (#14644)
version 2017.10.29
Core
* [extractor/common] Prefix format id for audio only HLS formats
+ [utils] Add support for zero years and months in parse_duration
Extractors
* [egghead] Fix extraction (#14388)
+ [fxnetworks] Extract series metadata (#14603)
+ [younow] Add support for younow.com (#9255, #9432, #12436)
* [dctptv] Fix extraction (#14599)
* [youtube] Restrict embed regular expression (#14600)
* [vimeo] Restrict iframe embed regular expression (#14600)
* [soundgasm] Improve extraction (#14588)
- [myvideo] Remove extractor (#8557)
+ [nbc] Add support for classic-tv videos (#14575)
+ [vrtnu] Add support for cookies authentication and simplify (#11873)
+ [canvas] Add support for vrt.be/vrtnu (#11873)
* [twitch:clips] Fix title extraction (#14566)
+ [ndtv] Add support for sub-sites (#14534)
* [dramafever] Fix login error message extraction
+ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
ro, hu) (#14553)
version 2017.10.20
Core
* [downloader/fragment] Report warning instead of error on inconsistent
download state
* [downloader/hls] Fix total fragments count when ad fragments exist
Extractors
* [parliamentliveuk] Fix extraction (#14524)
* [soundcloud] Update client id (#14546)
+ [servus] Add support for servus.com (#14362)
+ [unity] Add support for unity3d.com (#14528)
* [youtube] Replace youtube redirect URLs in description (#14517)
* [pbs] Restrict direct video URL regular expression (#14519)
* [drtv] Respect preference for direct HTTP formats (#14509)
+ [eporner] Add support for embed URLs (#14507)
* [arte] Capture and output error message
* [niconico] Improve uploader metadata extraction robustness (#14135)
version 2017.10.15.1
Core
* [downloader/hls] Ignore anvato ad fragments (#14496)
* [downloader/fragment] Output ad fragment count
Extractors
* [scrippsnetworks:watch] Bypass geo restriction
+ [anvato] Add ability to bypass geo restriction
* [redditr] Fix extraction for URLs with query (#14495)
version 2017.10.15
Core
+ [common] Add support for jwplayer youtube embeds
Extractors
* [scrippsnetworks:watch] Fix extraction (#14389)
* [anvato] Process master m3u8 manifests
* [youtube] Fix relative URLs in description
* [spike] Bypass geo restriction
+ [howstuffworks] Add support for more domains
* [infoq] Fix http format downloading
+ [rtlnl] Add support for another type of embeds
+ [onionstudios] Add support for bulbs-video embeds
* [udn] Fix extraction
* [shahid] Fix extraction (#14448)
* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
* [vh1] Fix extraction (#9613)
version 2017.10.12
Core
* [YoutubeDL] Improve _default_format_spec (#14461)
Extractors
* [steam] Fix extraction (#14067)
+ [funk] Add support for funk.net (#14464)
+ [nexx] Add support for shortcuts and relax domain id extraction
+ [voxmedia] Add support for recode.net (#14173)
+ [once] Add support for vmap URLs
+ [generic] Add support for channel9 embeds (#14469)
* [tva] Fix extraction (#14328)
+ [tubitv] Add support for new URL format (#14460)
- [afreecatv:global] Remove extractor
- [youtube:shared] Removed extractor (#14420)
+ [slideslive] Add support for slideslive.com (#2680)
+ [facebook] Support thumbnails (#14416)
* [vvvvid] Fix episode number extraction (#14456)
* [hrti:playlist] Relax URL regular expression
* [wdr] Relax media link regular expression (#14447)
* [hrti] Relax URL regular expression (#14443)
* [fox] Delegate extraction to uplynk:preplay (#14147)
+ [youtube] Add support for hooktube.com (#14437)
version 2017.10.07
Core
* [YoutubeDL] Ignore duplicates in --playlist-items
* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
reduce code duplication (#14425)
+ [utils] Use cache in OnDemandPagedList by default
* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
Extractors
* [reddit] Sort formats (#14430)
* [lnkgo] Relax URL regular expression (#14423)
* [pornflip] Extend URL regular expression (#14405, #14406)
+ [xtube] Add support for embed URLs (#14417)
+ [xvideos] Add support for embed URLs and improve extraction (#14409)
* [beeg] Fix extraction (#14403)
* [tvn24] Relax URL regular expression (#14395)
* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
#14392, #14414, #14419, #14431)
+ [ketnet] Add support for videos without direct sources (#14377)
* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
+ [afreecatv] Add support for adult videos (#14376)
version 2017.10.01
Core
* [YoutubeDL] Document youtube_include_dash_manifest
Extractors
+ [tvp] Add support for new URL schema (#14368)
+ [generic] Add support for single format Video.js embeds (#14371)
* [yahoo] Bypass geo restriction for brightcove (#14210)
* [yahoo] Use extracted brightcove account id (#14210)
* [rtve:alacarta] Fix extraction (#14290)
+ [yahoo] Add support for custom brigthcove embeds (#14210)
+ [generic] Add support for Video.js embeds
+ [gfycat] Add support for /gifs/detail URLs (#14322)
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
* [xhamsterembed] Fix extraction (#14308)
version 2017.09.24
Core
+ [options] Accept lrc as a subtitle conversion target format (#14292)
* [utils] Fix handling raw TTML subtitles (#14191)
Extractors
* [24video] Fix timestamp extraction and make non fatal (#14295)
+ [24video] Add support for 24video.adult (#14295)
+ [kakao] Add support for tv.kakao.com (#12298, #14007)
+ [twitter] Add support for URLs without user id (#14270)
+ [americastestkitchen] Add support for americastestkitchen.com (#10764,
#13996)
* [generic] Fix support for multiple HTML5 videos on one page (#14080)
* [mixcloud] Fix extraction (#14088, #14132)
+ [lynda] Add support for educourse.ga (#14286)
* [beeg] Fix extraction (#14275)
* [nbcsports:vplayer] Correct theplatform URL (#13873)
* [twitter] Fix duration extraction (#14141)
* [tvplay] Bypass geo restriction
+ [heise] Add support for YouTube embeds (#14109)
+ [popcorntv] Add support for popcorntv.it (#5914, #14211)
* [viki] Update app data (#14181)
* [morningstar] Relax URL regular expression (#14222)
* [openload] Fix extraction (#14225, #14257)
* [noovo] Fix extraction (#14214)
* [dailymotion:playlist] Relax URL regular expression (#14219)
+ [twitch] Add support for go.twitch.tv URLs (#14215)
* [vgtv] Relax URL regular expression (#14223)
version 2017.09.15
Core
* [downloader/fragment] Restart inconsistent incomplete fragment downloads
(#13731)
* [YoutubeDL] Download raw subtitles files (#12909, #14191)
Extractors
* [condenast] Fix extraction (#14196, #14207)
+ [orf] Add support for f4m stories
* [tv4] Relax URL regular expression (#14206)
* [animeondemand] Bypass geo restriction
+ [animeondemand] Add support for flash videos (#9944)
version 2017.09.11
Extractors
* [rutube:playlist] Fix suitable (#14166)
version 2017.09.10
Core
+ [utils] Introduce bool_or_none
* [YoutubeDL] Ensure dir existence for each requested format (#14116)
Extractors
* [fox] Fix extraction (#14147)
* [rutube] Use bool_or_none
* [rutube] Rework and generalize playlist extractors (#13565)
+ [rutube:playlist] Add support for playlists (#13534, #13565)
+ [radiocanada] Add fallback for title extraction (#14145)
* [vk] Use dedicated YouTube embeds extraction routine
* [vice] Use dedicated YouTube embeds extraction routine
* [cracked] Use dedicated YouTube embeds extraction routine
* [chilloutzone] Use dedicated YouTube embeds extraction routine
* [abcnews] Use dedicated YouTube embeds extraction routine
* [youtube] Separate methods for embeds extraction
* [redtube] Fix formats extraction (#14122)
* [arte] Relax unavailability check (#14112)
+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
* [vidme:user] Relax URL regular expression (#14054)
* [bpb] Fix extraction (#14043, #14086)
* [soundcloud] Fix download URL with private tracks (#14093)
* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
* [viidea] Capture and output lecture error message (#14099)
* [radiocanada] Skip unsupported platforms (#14100)
version 2017.09.02
Extractors
* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
#14077, #14079, #14082, #14083, #14094, #14095, #14096)
* [youtube] Fix upload date extraction (#14065)
+ [charlierose] Add support for episodes (#14062)
+ [bbccouk] Add support for w-prefixed ids (#14056)
* [googledrive] Extend URL regular expression (#9785)
+ [googledrive] Add support for source format (#14046)
* [pornhd] Fix extraction (#14005)
version 2017.08.27.1
Extractors
* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
version 2017.08.27
Core
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
#8625, #9483)
* Simplify code and split into separate routines to facilitate maintaining
* Make retry mechanism work on errors during actual download not only
during connection establishment phase
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
* Retry on content too short
* Show error description on retry
Extractors
* [generic] Lower preference for extraction from LD-JSON
* [rai] Fix audio formats extraction (#14024)
* [youtube] Fix controversy videos extraction (#14027, #14029)
* [mixcloud] Fix extraction (#14015, #14020)
version 2017.08.23
Core
+ [extractor/common] Introduce _parse_xml
* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
non fatal (#13970)
* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
Extractors
* [cbc:watch] Bypass geo restriction (#13993)
* [toutv] Relax DRM check (#13994)
+ [googledrive] Add support for subtitles (#13619, #13638)
* [pornhub] Relax uploader regular expression (#13906, #13975)
* [bandcamp:album] Extract track titles (#13962)
+ [bbccouk] Add support for events URLs (#13893)
+ [liveleak] Support multi-video pages (#6542)
+ [liveleak] Support another liveleak embedding pattern (#13336)
* [cda] Fix extraction (#13935)
+ [laola1tv] Add support for tv.ittf.com (#13965)
* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
version 2017.08.18
Core
* [YoutubeDL] Sanitize byte string format URLs (#13951)
+ [extractor/common] Add support for float durations in _parse_mpd_formats
(#13919)
Extractors
* [arte] Detect unavailable videos (#13945)
* [generic] Convert redirect URLs to unicode strings (#13951)
* [udemy] Fix paid course detection (#13943)
* [pluralsight] Use RPC API for course extraction (#13937)
+ [clippit] Add support for clippituser.tv
+ [qqmusic] Support new URL schemes (#13805)
* [periscope] Renew HLS extraction (#13917)
* [mixcloud] Extract decrypt key
version 2017.08.13
Core
* [YoutubeDL] Make sure format id is not empty
* [extractor/common] Make _family_friendly_search optional
* [extractor/common] Respect source's type attribute for HTML5 media (#13892)
Extractors
* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902)
+ [fourtube] Add support pornerbros.com (#6022)
+ [fourtube] Add support porntube.com (#7859, #13901)
+ [fourtube] Add support fux.com
* [limelight] Improve embeds detection (#13895)
+ [reddit] Add support for v.redd.it and reddit.com (#13847)
* [aparat] Extract all formats (#13887)
* [mixcloud] Fix play info decryption (#13885)
+ [generic] Add support for vzaar embeds (#13876)
version 2017.08.09
Core
@@ -81,7 +823,7 @@ Extractors
* [youku:show] Fix playlist extraction (#13248)
+ [dispeak] Recognize sevt subdomain (#13276)
* [adn] Improve error reporting (#13663)
* [crunchyroll] Relax series and season regex (#13659)
* [crunchyroll] Relax series and season regular expression (#13659)
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
+ [nexx:embed] Add support for iframe embeds
* [nexx] Improve JS embed extraction
@@ -554,7 +1296,7 @@ version 2017.04.14
Core
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
+ [adobepass] Improve Comcast and Verison login code (#10803)
+ [adobepass] Improve Comcast and Verizon login code (#10803)
+ [adobepass] Add support for Verizon (#10803)
Extractors

View File

@@ -1,7 +1,9 @@
include README.md
include test/*.py
include test/*.json
include LICENSE
include AUTHORS
include ChangeLog
include youtube-dl.bash-completion
include youtube-dl.fish
include youtube-dl.1
recursive-include docs Makefile conf.py *.rst
recursive-include test *

View File

@@ -36,8 +36,17 @@ test:
ot: offlinetest
# Keep this list in sync with devscripts/run_tests.sh
offlinetest: codetest
$(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py
$(PYTHON) -m nose --verbose test \
--exclude test_age_restriction.py \
--exclude test_download.py \
--exclude test_iqiyi_sdk_interpreter.py \
--exclude test_socks.py \
--exclude test_subtitles.py \
--exclude test_write_annotations.py \
--exclude test_youtube_lists.py \
--exclude test_youtube_signature.py
tar: youtube-dl.tar.gz
@@ -46,8 +55,15 @@ tar: youtube-dl.tar.gz
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
mkdir -p zip
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\
done
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
mv zip/youtube_dl/__main__.py zip/
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
rm -rf zip
echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip
@@ -94,7 +110,7 @@ _EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -in
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \
--exclude '*.kate-swp' \
@@ -103,11 +119,10 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '*~' \
--exclude '__pycache__' \
--exclude '.git' \
--exclude 'testdata' \
--exclude 'docs/_build' \
-- \
bin devscripts test youtube_dl docs \
ChangeLog LICENSE README.md README.txt \
ChangeLog AUTHORS LICENSE README.md README.txt \
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
youtube-dl.zsh youtube-dl.fish setup.py \
youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
youtube-dl

View File

@@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms
- [INSTALLATION](#installation)
@@ -25,7 +27,7 @@ If you do not have curl, you can alternatively use a recent wget:
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+rx /usr/local/bin/youtube-dl
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
You can also use pip:
@@ -33,7 +35,7 @@ You can also use pip:
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
brew install youtube-dl
@@ -44,7 +46,7 @@ Or with [MacPorts](https://www.macports.org/):
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
# DESCRIPTION
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
youtube-dl [OPTIONS] URL [URL...]
@@ -196,6 +198,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
size. By default, the buffer size is
automatically resized from an initial value
of SIZE.
--http-chunk-size SIZE Size of a chunk for chunk-based HTTP
downloading (e.g. 10485760 or 10M) (default
is disabled). May be useful for bypassing
bandwidth throttling imposed by a webserver
(experimental)
--playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with
@@ -303,8 +310,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--encoding ENCODING Force the specified encoding (experimental)
--no-check-certificate Suppress HTTPS certificate validation
--prefer-insecure Use an unencrypted connection to retrieve
information about the video. (Currently
supported only for YouTube)
information whenever possible
--user-agent UA Specify a custom user agent
--referer URL Specify a custom referer, use if the video
access is restricted to one domain
@@ -427,7 +433,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt)
(currently supported: srt|ass|vtt|lrc)
# CONFIGURATION
@@ -458,7 +464,7 @@ You can also use `--config-location` if you want to use custom configuration fil
### Authentication with `.netrc` file
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
```
touch $HOME/.netrc
chmod a-rwx,u+rw $HOME/.netrc
@@ -485,7 +491,7 @@ The `-o` option allows users to indicate a template for the output file names.
**tl;dr:** [navigate me to examples](#output-template-examples).
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
- `id` (string): Video identifier
- `title` (string): Video title
@@ -509,6 +515,9 @@ The basic usage is not to set any template arguments when downloading a single f
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
- `comment_count` (numeric): Number of comments on the video
- `age_limit` (numeric): Age restriction for the video (years)
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- `format` (string): A human-readable description of the format
- `format_id` (string): Format code specified by `--format`
- `format_note` (string): Additional info about the format
@@ -534,6 +543,8 @@ The basic usage is not to set any template arguments when downloading a single f
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
- `playlist_id` (string): Playlist identifier
- `playlist_title` (string): Playlist title
- `playlist_uploader` (string): Full name of the playlist uploader
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
Available for the video that belongs to some logical chapter or section:
@@ -603,7 +614,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
# Stream the video being downloaded to stdout
$ youtube-dl -o - BaW_jenozKc
@@ -716,17 +727,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
### How do I update youtube-dl?
If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
sudo apt-get remove -y youtube-dl
Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
```
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
@@ -766,11 +777,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
### I have downloaded a video but how can I play it?
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
@@ -845,10 +856,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o
### How do I download a video starting with a `-`?
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
youtube-dl -- -wNyEUrxzFU
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
### How do I pass cookies to youtube-dl?
@@ -856,15 +867,15 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, Mac OS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
### How do I stream directly to media player?
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
### How do I download only new videos from a playlist?
@@ -884,7 +895,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
@@ -910,7 +921,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
@@ -924,7 +935,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -936,6 +947,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py
nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need
* python
@@ -972,7 +985,7 @@ After you have ensured this site is distributing its content legally, you can fo
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
@@ -1003,10 +1016,10 @@ After you have ensured this site is distributing its content legally, you can fo
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
@@ -1162,10 +1175,10 @@ import youtube_dl
ydl_opts = {}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
@@ -1201,19 +1214,19 @@ ydl_opts = {
'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
$ youtube-dl -v <your command line>
[debug] System config: []
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
@@ -1244,7 +1257,7 @@ For bug reports, this means that your report should contain the *complete* outpu
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
### Are you using the latest version?

View File

@@ -14,7 +14,7 @@ import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from test.helper import gettestcases
from youtube_dl.utils import compat_urllib_parse_urlparse
from youtube_dl.utils import compat_urllib_request
@@ -24,7 +24,7 @@ if len(sys.argv) > 1:
else:
METHOD = 'EURISTIC'
for test in get_testcases():
for test in gettestcases():
if METHOD == 'EURISTIC':
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()

5
devscripts/install_jython.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/bin/bash
wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
$HOME/jython/bin/jython -m pip install nose

View File

@@ -1,6 +1,7 @@
#!/bin/bash
DOWNLOAD_TESTS="age_restriction|download|subtitles|write_annotations|iqiyi_sdk_interpreter|youtube_lists"
# Keep this list in sync with the `offlinetest` target in Makefile
DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature"
test_set=""
multiprocess_args=""

View File

@@ -3,8 +3,7 @@
- **1up.com**
- **20min**
- **220.ro**
- **22tracks:genre**
- **22tracks:track**
- **23video**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
@@ -12,6 +11,7 @@
- **56.com**
- **5min**
- **6play**
- **7plus**
- **8tracks**
- **91porn**
- **9c9media**
@@ -36,12 +36,13 @@
- **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
- **afreecatv**: afreecatv.com
- **afreecatv:global**: afreecatv.com
- **AirMozilla**
- **AliExpressLive**
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
- **AMCNetworks**
- **AmericasTestKitchen**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- **anitube.se**
@@ -113,26 +114,28 @@
- **BokeCC**
- **BostonGlobe**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
- **BR**: Bayerischer Rundfunk
- **BravoTV**
- **Break**
- **brightcove:legacy**
- **brightcove:new**
- **BRMediathek**: Bayerischer Rundfunk Mediathek
- **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BuzzFeed**
- **BYUtv**
- **BYUtvEvent**
- **Camdemy**
- **CamdemyFolder**
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas**: canvas.be and een.be
- **Canalplus**: mycanal.fr and piwiplus.fr
- **Canvas**
- **CanvasEen**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
- **cbc.ca**
- **cbc.ca:olympics**
- **cbc.ca:player**
- **cbc.ca:watch**
- **cbc.ca:watch:video**
@@ -156,6 +159,7 @@
- **Cinchcast**
- **CJSW**
- **cliphunter**
- **Clippit**
- **ClipRs**
- **Clipsyndicate**
- **CloserToTruth**
@@ -168,7 +172,6 @@
- **CNN**
- **CNNArticle**
- **CNNBlogs**
- **CollegeRama**
- **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralFullEpisodes**
@@ -187,7 +190,7 @@
- **CSpan**: C-SPAN
- **CtsNews**: 華視新聞
- **CTVNews**
- **culturebox.francetvinfo.fr**
- **Culturebox**
- **CultureUnplugged**
- **curiositystream**
- **curiositystream:collection**
@@ -196,9 +199,8 @@
- **dailymotion**
- **dailymotion:playlist**
- **dailymotion:user**
- **DailymotionCloud**
- **Daisuki**
- **DaisukiPlaylist**
- **DaisukiMotto**
- **DaisukiMottoPlaylist**
- **daum.net**
- **daum.net:clip**
- **daum.net:playlist**
@@ -209,6 +211,7 @@
- **defense.gouv.fr**
- **democracynow**
- **DHM**: Filmarchiv - Deutsches Historisches Museum
- **Digg**
- **DigitallySpeaking**
- **Digiteka**
- **Discovery**
@@ -241,8 +244,9 @@
- **eHow**
- **Einthusan**
- **eitb.tv**
- **EllenTV**
- **EllenTV:clips**
- **EllenTube**
- **EllenTubePlaylist**
- **EllenTubeVideo**
- **ElPais**: El País
- **Embedly**
- **EMPFlix**
@@ -265,10 +269,10 @@
- **fc2**
- **fc2:embed**
- **Fczenit**
- **fernsehkritik.tv**
- **filmon**
- **filmon:channel**
- **Firstpost**
- **Filmweb**
- **FiveThirtyEight**
- **FiveTV**
- **Flickr**
- **Flipagram**
@@ -282,23 +286,26 @@
- **foxnews:article**
- **foxnews:insider**
- **FoxSports**
- **france2.fr:generation-quoi**
- **france2.fr:generation-what**
- **FranceCulture**
- **FranceInter**
- **FranceTV**
- **FranceTVEmbed**
- **francetvinfo.fr**
- **FranceTVJeunesse**
- **FranceTVSite**
- **Freesound**
- **freespeech.org**
- **FreshLive**
- **Funimation**
- **Funk**
- **FunnyOrDie**
- **Fusion**
- **Fux**
- **FXNetworks**
- **GameInformer**
- **GameOne**
- **gameone:playlist**
- **Gamersyde**
- **GameSpot**
- **GameStar**
- **Gaskrank**
@@ -337,6 +344,7 @@
- **HornBunny**
- **HotNewHipHop**
- **HotStar**
- **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
- **HRTi**
@@ -357,10 +365,12 @@
- **InfoQ**
- **Instagram**
- **instagram:user**: Instagram user profile
- **Internazionale**
- **InternetVideoArchive**
- **IPrima**
- **iqiyi**: 爱奇艺
- **Ir90Tv**
- **ITTF**
- **ITV**
- **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations
@@ -374,8 +384,8 @@
- **Jove**
- **jpopsuki.tv**
- **JWPlatform**
- **Kakao**
- **Kaltura**
- **Kamcord**
- **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
- **Karaoketv**
@@ -417,6 +427,7 @@
- **limelight:channel_list**
- **LiTV**
- **LiveLeak**
- **LiveLeakEmbed**
- **livestream**
- **livestream:original**
- **LnkGo**
@@ -429,15 +440,20 @@
- **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
- **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru
- **MakersChannel**
- **MakerTV**
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Medialaan**
- **Mediaset**
- **Mediasite**
- **Medici**
- **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: 美拍
@@ -467,6 +483,7 @@
- **Moniker**: allmyvideos.net and vidspot.net
- **Morningstar**: morningstar.com
- **Motherless**
- **MotherlessGroup**
- **Motorsport**: motorsport.com
- **MovieClips**
- **MovieFap**
@@ -489,8 +506,8 @@
- **MySpace:album**
- **MySpass**
- **Myvi**
- **myvideo** (Currently broken)
- **MyVidster**
- **MyviEmbed**
- **n-tv.de**
- **natgeo**
- **natgeo:episodeguide**
@@ -499,7 +516,8 @@
- **NBA**
- **NBC**
- **NBCNews**
- **NBCOlympics**
- **nbcolympics**
- **nbcolympics:stream**
- **NBCSports**
- **NBCSportsVPlayer**
- **ndr**: NDR.de - Norddeutscher Rundfunk
@@ -532,6 +550,7 @@
- **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com**
- **nick.de**
- **nickelodeon:br**
- **nickelodeonru**
- **nicknight**
- **niconico**: ニコニコ動画
@@ -550,8 +569,6 @@
- **nowness**
- **nowness:playlist**
- **nowness:series**
- **NowTV** (Currently broken)
- **NowTVList**
- **nowvideo**: NowVideo
- **Noz**
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
@@ -587,6 +604,7 @@
- **Openload**
- **OraTV**
- **orf:fm4**: radio FM4
- **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
@@ -600,6 +618,7 @@
- **pcmag**
- **PearVideo**
- **People**
- **PerformGroup**
- **periscope**: Periscope
- **periscope:user**: Periscope user videos
- **PhilharmonieDeParis**: Philharmonie de Paris
@@ -620,7 +639,9 @@
- **Pokemon**
- **PolskieRadio**
- **PolskieRadioCategory**
- **PopcornTV**
- **PornCom**
- **PornerBros**
- **PornFlip**
- **PornHd**
- **PornHub**: PornHub and Thumbzilla
@@ -629,6 +650,7 @@
- **Pornotube**
- **PornoVoisines**
- **PornoXO**
- **PornTube**
- **PressTV**
- **PrimeShareTV**
- **PromptFile**
@@ -651,9 +673,12 @@
- **Rai**
- **RaiPlay**
- **RaiPlayLive**
- **RaiPlayPlaylist**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
- **Reddit**
- **RedditR**
- **RedTube**
- **RegioTV**
- **RENTV**
@@ -664,7 +689,6 @@
- **revision**
- **revision3:embed**
- **RICE**
- **RingTV**
- **RMCDecouverte**
- **RockstarGames**
- **RoosterTeeth**
@@ -685,6 +709,7 @@
- **rtve.es:live**: RTVE.es live streams
- **rtve.es:television**
- **RTVNH**
- **RTVS**
- **Rudo**
- **RUHD**
- **RulePorn**
@@ -693,13 +718,13 @@
- **rutube:embed**: Rutube embedded videos
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **rutube:playlist**: Rutube playlists
- **RUTV**: RUTV.RU
- **Ruutu**
- **Ruv**
- **safari**: safaribooksonline.com online video
- **safari:api**
- **safari:course**: safaribooksonline.com online courses
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
@@ -712,8 +737,12 @@
- **SenateISVP**
- **SendtoNews**
- **ServingSys**
- **Servus**
- **Sexu**
- **SeznamZpravy**
- **SeznamZpravyArticle**
- **Shahid**
- **ShahidShow**
- **Shared**: shared.sx
- **ShowRoomLive**
- **Sina**
@@ -722,6 +751,7 @@
- **skynewsarabia:video**
- **SkySports**
- **Slideshare**
- **SlidesLive**
- **Slutload**
- **smotri**: Smotri.com
- **smotri:broadcast**: Smotri.com broadcasts
@@ -752,7 +782,7 @@
- **Sport5**
- **SportBoxEmbed**
- **SportDeutschland**
- **Sportschau**
- **SpringboardPlatform**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR**
@@ -765,6 +795,7 @@
- **streamcloud.eu**
- **StreamCZ**
- **StreetVoice**
- **StretchInternet**
- **SunPorno**
- **SVT**
- **SVTPlay**: SVT Play and Öppet arkiv
@@ -776,7 +807,7 @@
- **tagesschau:player**
- **Tass**
- **TastyTrade**
- **TBS** (Currently broken)
- **TBS**
- **TDSLifeway**
- **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos
@@ -791,6 +822,8 @@
- **Telegraaf**
- **TeleMB**
- **TeleQuebec**
- **TeleQuebecEmission**
- **TeleQuebecLive**
- **TeleTask**
- **Telewebion**
- **TF1**
@@ -800,7 +833,6 @@
- **ThePlatform**
- **ThePlatformFeed**
- **TheScene**
- **TheSixtyOne**
- **TheStar**
- **TheSun**
- **TheWeatherChannel**
@@ -847,6 +879,8 @@
- **tvland.com**
- **TVN24**
- **TVNoe**
- **TVNow**
- **TVNowList**
- **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska
- **tvp:series**
@@ -868,8 +902,11 @@
- **udemy**
- **udemy:course**
- **UDNEmbed**: 聯合影音
- **UFCTV**
- **UKTVPlay**
- **umg:de**: Universal Music Deutschland
- **Unistra**
- **Unity**
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
@@ -913,6 +950,7 @@
- **VideoPress**
- **videoweed**: VideoWeed
- **Vidio**
- **VidLii**
- **vidme**
- **vidme:user**
- **vidme:user:likes**
@@ -953,10 +991,12 @@
- **VoiceRepublic**
- **Voot**
- **VoxMedia**
- **VoxMediaVolume**
- **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
@@ -973,10 +1013,14 @@
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**
- **WDRElefant**
- **WDRPage**
- **Webcaster**
- **WebcasterFeed**
- **WebOfStories**
- **WebOfStoriesPlaylist**
- **Weibo**
- **WeiboMobile**
- **WeiqiTV**: WQTV
- **wholecloud**: WholeCloud
- **Wimp**
@@ -996,6 +1040,8 @@
- **xiami:artist**: 虾米音乐 - 歌手
- **xiami:collection**: 虾米音乐 - 精选集
- **xiami:song**: 虾米音乐
- **ximalaya**: 喜马拉雅FM
- **ximalaya:album**: 喜马拉雅FM 专辑
- **XMinus**
- **XNXX**
- **Xstream**
@@ -1015,6 +1061,9 @@
- **YouJizz**
- **youku**: 优酷
- **youku:show**
- **YouNowChannel**
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com
@@ -1028,7 +1077,6 @@
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)

View File

@@ -3,4 +3,4 @@ universal = True
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
ignore = E402,E501,E731
ignore = E402,E501,E731,E741

View File

@@ -109,6 +109,7 @@ setup(
author_email='ytdl@yt-dl.org',
maintainer='Sergey M.',
maintainer_email='dstftw@gmail.com',
license='Unlicense',
packages=[
'youtube_dl',
'youtube_dl.extractor', 'youtube_dl.downloader',

View File

@@ -10,6 +10,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, expect_dict, expect_value
from youtube_dl.compat import compat_etree_fromstring
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
@@ -488,6 +489,211 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_mpd_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/13919
# Also tests duplicate representation ids, see
# https://github.com/rg3/youtube-dl/issues/15111
'float_duration',
'http://unknown/manifest.mpd',
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'm4a',
'format_id': '318597',
'format_note': 'DASH audio',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 61.587,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '318597',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.42001f',
'tbr': 318.597,
'width': 340,
'height': 192,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '638590',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.42001f',
'tbr': 638.59,
'width': 512,
'height': 288,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '1022565',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4d001f',
'tbr': 1022.565,
'width': 688,
'height': 384,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '2046506',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4d001f',
'tbr': 2046.506,
'width': 1024,
'height': 576,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '3998017',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640029',
'tbr': 3998.017,
'width': 1280,
'height': 720,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': '5997485',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.640032',
'tbr': 5997.485,
'width': 1920,
'height': 1080,
}]
), (
# https://github.com/rg3/youtube-dl/pull/14844
'urls_only',
'http://unknown/manifest.mpd',
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_144p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 200,
'width': 256,
'height': 144,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_240p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 400,
'width': 424,
'height': 240,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_360p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 800,
'width': 640,
'height': 360,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_480p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 1200,
'width': 856,
'height': 480,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_576p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 1600,
'width': 1024,
'height': 576,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_720p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 2400,
'width': 1280,
'height': 720,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
'format_id': 'h264_aac_1080p_m4s',
'format_note': 'DASH video',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'avc3.42c01e',
'tbr': 4400,
'width': 1920,
'height': 1080,
}]
)
]
for mpd_file, mpd_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_mpd_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
# https://github.com/rg3/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
'ext': 'flv',
'format_id': '2148',
'protocol': 'f4m',
'tbr': 2148,
'width': 1280,
'height': 720,
}]
),
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
if __name__ == '__main__':
unittest.main()

View File

@@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best')
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
class TestYoutubeDL(unittest.TestCase):
@@ -770,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '10'})
self.assertEqual(result, [])
result = get_ids({'playlist_items': '3-10'})
self.assertEqual(result, [3, 4])
result = get_ids({'playlist_items': '2-4,3-4,3'})
self.assertEqual(result, [2, 3, 4])
def test_urlopen_no_file_protocol(self):
# see https://github.com/rg3/youtube-dl/issues/8227
ydl = YDL()

View File

@@ -92,8 +92,8 @@ class TestDownload(unittest.TestCase):
def generator(test_case, tname):
def test_template(self):
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])()
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
is_playlist = any(k.startswith('playlist') for k in test_case)
test_cases = test_case.get(
'playlist', [] if is_playlist else [test_case])

View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import re
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import try_rm
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename
import ssl
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]
TEST_SIZE = 10 * 1024
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
def send_content_range(self, total=None):
range_header = self.headers.get('Range')
start = end = None
if range_header:
mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header)
if mobj:
start = int(mobj.group(1))
end = int(mobj.group(2))
valid_range = start is not None and end is not None
if valid_range:
content_range = 'bytes %d-%d' % (start, end)
if total:
content_range += '/%d' % total
self.send_header('Content-Range', content_range)
return (end - start + 1) if valid_range else total
def serve(self, range=True, content_length=True):
self.send_response(200)
self.send_header('Content-Type', 'video/mp4')
size = TEST_SIZE
if range:
size = self.send_content_range(TEST_SIZE)
if content_length:
self.send_header('Content-Length', size)
self.end_headers()
self.wfile.write(b'#' * size)
def do_GET(self):
if self.path == '/regular':
self.serve()
elif self.path == '/no-content-length':
self.serve(content_length=False)
elif self.path == '/no-range':
self.serve(range=False)
elif self.path == '/no-range-no-content-length':
self.serve(range=False, content_length=False)
else:
assert False
class FakeLogger(object):
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
pass
class TestHttpFD(unittest.TestCase):
def setUp(self):
self.httpd = compat_http_server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
def download(self, params, ep):
params['logger'] = FakeLogger()
ydl = YoutubeDL(params)
downloader = HttpFD(ydl, params)
filename = 'testfile.mp4'
try_rm(encodeFilename(filename))
self.assertTrue(downloader.real_download(filename, {
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
}))
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
try_rm(encodeFilename(filename))
def download_all(self, params):
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
self.download(params, ep)
def test_regular(self):
self.download_all({})
def test_chunked(self):
self.download_all({
'http_chunk_size': 1000,
})
if __name__ == '__main__':
unittest.main()

View File

@@ -47,7 +47,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
self.end_headers()
return
new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server)
new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
self.send_response(302)
self.send_header(b'Location', new_url.encode('utf-8'))
self.end_headers()
@@ -74,7 +74,7 @@ class FakeLogger(object):
class TestHTTP(unittest.TestCase):
def setUp(self):
self.httpd = compat_http_server.HTTPServer(
('localhost', 0), HTTPTestRequestHandler)
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
@@ -86,15 +86,15 @@ class TestHTTP(unittest.TestCase):
return
ydl = YoutubeDL({'logger': FakeLogger()})
r = ydl.extract_info('http://localhost:%d/302' % self.port)
self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
class TestHTTPS(unittest.TestCase):
def setUp(self):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
self.httpd = compat_http_server.HTTPServer(
('localhost', 0), HTTPTestRequestHandler)
('127.0.0.1', 0), HTTPTestRequestHandler)
self.httpd.socket = ssl.wrap_socket(
self.httpd.socket, certfile=certfn, server_side=True)
self.port = http_server_port(self.httpd)
@@ -107,11 +107,11 @@ class TestHTTPS(unittest.TestCase):
ydl = YoutubeDL({'logger': FakeLogger()})
self.assertRaises(
Exception,
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
def _build_proxy_handler(name):
@@ -132,23 +132,23 @@ def _build_proxy_handler(name):
class TestProxy(unittest.TestCase):
def setUp(self):
self.proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('normal'))
('127.0.0.1', 0), _build_proxy_handler('normal'))
self.port = http_server_port(self.proxy)
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True
self.proxy_thread.start()
self.geo_proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('geo'))
('127.0.0.1', 0), _build_proxy_handler('geo'))
self.geo_port = http_server_port(self.geo_proxy)
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
self.geo_proxy_thread.daemon = True
self.geo_proxy_thread.start()
def test_proxy(self):
geo_proxy = 'localhost:{0}'.format(self.geo_port)
geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
'proxy': '127.0.0.1:{0}'.format(self.port),
'geo_verification_proxy': geo_proxy,
})
url = 'http://foo.com/bar'
@@ -162,7 +162,7 @@ class TestProxy(unittest.TestCase):
def test_proxy_with_idn(self):
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
'proxy': '127.0.0.1:{0}'.format(self.port),
})
url = 'http://中文.tw/'
response = ydl.urlopen(url).read().decode('utf-8')

View File

@@ -57,6 +57,7 @@ from youtube_dl.utils import (
read_batch_urls,
sanitize_filename,
sanitize_path,
sanitize_url,
expand_path,
prepend_extension,
replace_extension,
@@ -219,6 +220,12 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
def test_sanitize_url(self):
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
def test_expand_path(self):
def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
@@ -279,6 +286,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unescapeHTML('&#47;'), '/')
self.assertEqual(unescapeHTML('&eacute;'), 'é')
self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
# HTML5 entities
self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
@@ -342,6 +350,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
def test_determine_ext(self):
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
@@ -539,6 +548,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self):
self.assertEqual(
@@ -811,6 +821,9 @@ class TestUtil(unittest.TestCase):
inp = '''{"duration": "00:01:07"}'''
self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
@@ -882,6 +895,13 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
self.assertEqual(json.loads(on), {'42': 42})
on = js_to_json('{42:4.2e1}')
self.assertEqual(json.loads(on), {'42': 42.0})
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
@@ -1063,7 +1083,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols
@@ -1088,7 +1108,7 @@ Line
<p begin="0" end="1">The first line</p>
</div>
</body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line
@@ -1114,7 +1134,7 @@ The first line
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
</div>
</body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1
00:00:02,080 --> 00:00:05,839
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
@@ -1137,6 +1157,26 @@ part 3</font></u>
'''
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
<body>
<div xml:lang="en">
<p begin="0" end="1">Line 1</p>
<p begin="1" end="2">第二行</p>
</div>
</body>
</tt>'''.encode('utf-16')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
Line 1
2
00:00:01,000 --> 00:00:02,000
第二行
'''
self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
def test_cli_option(self):
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])

10
test/testdata/f4m/custom_base_url.f4m vendored Normal file
View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<manifest xmlns="http://ns.adobe.com/f4m/1.0">
<streamType>recorded</streamType>
<baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL>
<duration>269.293</duration>
<bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo>
<media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2">
<metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata>
</media>
</manifest>

18
test/testdata/mpd/float_duration.mpd vendored Normal file
View File

@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S">
<Period bitstreamSwitching="true">
<AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true">
<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
<Representation id="318597" bandwidth="61587"></Representation>
</AdaptationSet>
<AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true">
<SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate>
<Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation>
<Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation>
<Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation>
<Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation>
<Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation>
<Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation>
</AdaptationSet>
</Period>
</MPD>

218
test/testdata/mpd/urls_only.mpd vendored Normal file
View File

@@ -0,0 +1,218 @@
<?xml version="1.0" ?>
<MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
<Period duration="PT0H4M1.728S">
<AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true">
<ContentComponent contentType="video" id="1"/>
<Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
<Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920">
<SegmentList duration="10000" timescale="1000">
<Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/>
<SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@@ -65,6 +65,7 @@ from .utils import (
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
orderedSet,
PagedList,
parse_filesize,
PerRequestProxyHandler,
@@ -92,6 +93,7 @@ from .utils import (
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
@@ -296,13 +298,20 @@ class YoutubeDL(object):
the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
xattr_set_filesize, external_downloader_args, hls_use_mpegts.
xattr_set_filesize, external_downloader_args, hls_use_mpegts,
http_chunk_size.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
otherwise prefer avconv.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
The following options are used by the Youtube extractor:
youtube_include_dash_manifest: If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH.
"""
_NUMERIC_FIELDS = set((
@@ -901,15 +910,25 @@ class YoutubeDL(object):
yield int(item)
else:
yield int(string_segment)
playlistitems = iter_playlistitems(playlistitems_str)
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries']
def make_playlistitems_entries(list_ie_entries):
num_entries = len(list_ie_entries)
return [
list_ie_entries[i - 1] for i in playlistitems
if -num_entries <= i - 1 < num_entries]
def report_download(num_entries):
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, num_entries))
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
if playlistitems:
entries = [
ie_entries[i - 1] for i in playlistitems
if -n_all_entries <= i - 1 < n_all_entries]
entries = make_playlistitems_entries(ie_entries)
else:
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
@@ -927,20 +946,16 @@ class YoutubeDL(object):
entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
else: # iterable
if playlistitems:
entry_list = list(ie_entries)
entries = [entry_list[i - 1] for i in playlistitems]
entries = make_playlistitems_entries(list(itertools.islice(
ie_entries, 0, max(playlistitems))))
else:
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
if self.params.get('playlistreverse', False):
entries = entries[::-1]
@@ -961,6 +976,8 @@ class YoutubeDL(object):
'playlist': playlist,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_index': i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
@@ -1016,7 +1033,7 @@ class YoutubeDL(object):
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
$
@@ -1065,22 +1082,27 @@ class YoutubeDL(object):
return _filter
def _default_format_spec(self, info_dict, download=True):
req_format_list = []
def can_have_partial_formats():
if self.params.get('simulate', False):
return True
if not download:
return True
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return False
if info_dict.get('is_live'):
return False
def can_merge():
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
if can_have_partial_formats():
req_format_list.append('bestvideo+bestaudio')
req_format_list.append('best')
def prefer_best():
if self.params.get('simulate', False):
return False
if not download:
return False
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return True
if info_dict.get('is_live'):
return True
if not can_merge():
return True
return False
req_format_list = ['bestvideo+bestaudio', 'best']
if prefer_best():
req_format_list.reverse()
return '/'.join(req_format_list)
def build_format_selector(self, format_spec):
@@ -1483,12 +1505,14 @@ class YoutubeDL(object):
def is_wellformed(f):
url = f.get('url')
valid_url = url and isinstance(url, compat_str)
if not valid_url:
if not url:
self.report_warning(
'"url" field is missing or empty - skipping format, '
'there is an error in extractor')
return valid_url
return False
if isinstance(url, bytes):
sanitize_string_field(f, 'url')
return True
# Filter out malformed formats for better extraction robustness
formats = list(filter(is_wellformed, formats))
@@ -1500,7 +1524,7 @@ class YoutubeDL(object):
sanitize_string_field(format, 'format_id')
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
if format.get('format_id') is None:
if not format.get('format_id'):
format['format_id'] = compat_str(i)
else:
# Sanitize format_id from characters used in format selector expression
@@ -1708,12 +1732,17 @@ class YoutubeDL(object):
if filename is None:
return
try:
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + error_to_compat_str(err))
def ensure_dir_exists(path):
try:
dn = os.path.dirname(path)
if dn and not os.path.exists(dn):
os.makedirs(dn)
return True
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + error_to_compat_str(err))
return False
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
return
if self.params.get('writedescription', False):
@@ -1756,29 +1785,30 @@ class YoutubeDL(object):
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
if sub_info.get('data') is not None:
sub_data = sub_info['data']
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
try:
sub_data = ie._download_webpage(
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
# Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
if self.params.get('writeinfojson', False):
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@@ -1851,8 +1881,11 @@ class YoutubeDL(object):
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
fname = self.prepare_filename(new_info)
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
fname = prepend_extension(
self.prepare_filename(new_info),
'f%s' % f['format_id'], new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname)
partial_success = dl(fname, new_info)
success = success and partial_success
@@ -2201,11 +2234,20 @@ class YoutubeDL(object):
sys.exc_clear()
except Exception:
pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
def python_implementation():
impl_name = platform.python_implementation()
if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
return impl_name
self._write_string('[debug] Python version %s (%s) - %s\n' % (
platform.python_version(), python_implementation(),
platform_name()))
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
'%s %s' % (exe, v)
for exe, v in sorted(exe_versions.items())

View File

@@ -191,6 +191,11 @@ def _real_main(argv=None):
if numeric_buffersize is None:
parser.error('invalid buffer size specified')
opts.buffersize = numeric_buffersize
if opts.http_chunk_size is not None:
numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
if not numeric_chunksize:
parser.error('invalid http chunk size specified')
opts.http_chunk_size = numeric_chunksize
if opts.playliststart <= 0:
raise ValueError('Playlist start must be positive')
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
@@ -206,7 +211,7 @@ def _real_main(argv=None):
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
parser.error('invalid subtitle format specified')
if opts.date is not None:
@@ -346,6 +351,7 @@ def _real_main(argv=None):
'keep_fragments': opts.keep_fragments,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'http_chunk_size': opts.http_chunk_size,
'continuedl': opts.continue_dl,
'noprogress': opts.noprogress,
'progress_with_newline': opts.progress_with_newline,
@@ -432,7 +438,7 @@ def _real_main(argv=None):
with YoutubeDL(ydl_opts) as ydl:
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose, ydl._opener)
update_self(ydl.to_screen, opts.verbose, ydl._opener, opts.prefer_insecure)
# Remove cache dir
if opts.rm_cachedir:

View File

@@ -1,8 +1,8 @@
from __future__ import unicode_literals
import base64
from math import ceil
from .compat import compat_b64decode
from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16
@@ -180,7 +180,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
"""
NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
data = bytes_to_intlist(compat_b64decode(data))
password = bytes_to_intlist(password.encode('utf-8'))
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))

View File

@@ -1,13 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import binascii
import collections
import ctypes
import email
import getpass
import io
import itertools
import optparse
import os
import platform
import re
import shlex
import shutil
@@ -15,7 +19,6 @@ import socket
import struct
import subprocess
import sys
import itertools
import xml.etree.ElementTree
@@ -2894,19 +2897,72 @@ except TypeError:
if isinstance(spec, compat_str):
spec = spec.encode('ascii')
return struct.unpack(spec, *args)
class compat_Struct(struct.Struct):
def __init__(self, fmt):
if isinstance(fmt, compat_str):
fmt = fmt.encode('ascii')
super(compat_Struct, self).__init__(fmt)
else:
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
class compat_Struct(struct.Struct):
def unpack(self, string):
if not isinstance(string, buffer): # noqa: F821
string = buffer(string) # noqa: F821
return super(compat_Struct, self).unpack(string)
else:
compat_Struct = struct.Struct
try:
from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x
try:
from itertools import izip as compat_zip # < 2.5 or 3.x
except ImportError:
compat_zip = zip
if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs):
if isinstance(s, compat_str):
s = s.encode('ascii')
return base64.b64decode(s, *args, **kwargs)
else:
compat_b64decode = base64.b64decode
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2].
# 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
# 2. https://github.com/rg3/youtube-dl/pull/4392
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
real = ctypes.WINFUNCTYPE(*args, **kwargs)
def resf(tpl, *args, **kwargs):
funcname, dll = tpl
return real((str(funcname), dll), *args, **kwargs)
return resf
else:
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)
__all__ = [
'compat_HTMLParseError',
'compat_HTMLParser',
'compat_HTTPError',
'compat_Struct',
'compat_b64decode',
'compat_basestring',
'compat_chr',
'compat_cookiejar',
'compat_cookies',
'compat_ctypes_WINFUNCTYPE',
'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser',
@@ -2948,5 +3004,6 @@ __all__ = [
'compat_urlretrieve',
'compat_xml_parse_error',
'compat_xpath',
'compat_zip',
'workaround_optparse_bug9161',
]

View File

@@ -49,6 +49,9 @@ class FileDownloader(object):
external_downloader_args: A list of additional command-line arguments for the
external downloader.
hls_use_mpegts: Use the mpegts container for HLS videos.
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
Subclasses of this one must re-define the real_download method.
"""
@@ -304,11 +307,11 @@ class FileDownloader(object):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
% (count, self.format_retries(retries)))
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
% (error_to_compat_str(err), count, self.format_retries(retries)))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""

View File

@@ -1,12 +1,12 @@
from __future__ import division, unicode_literals
import base64
import io
import itertools
import time
from .fragment import FragmentFD
from ..compat import (
compat_b64decode,
compat_etree_fromstring,
compat_urlparse,
compat_urllib_error,
@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
media))
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
def _add_ns(prop, ver=1):
return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
def get_base_url(manifest):
base_url = xpath_text(
manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
return base_url
class F4mFD(FragmentFD):
@@ -303,7 +312,7 @@ class F4mFD(FragmentFD):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text.encode('ascii'))
bootstrap = compat_b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url
@@ -330,17 +339,17 @@ class F4mFD(FragmentFD):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
# From Adobe F4M 3.0 spec:
# The <baseURL> element SHALL be the base URL for all relative
# (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
# URLs should be relative to the location of the containing document.
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
metadata = compat_b64decode(metadata_node.text)
else:
metadata = None

View File

@@ -107,19 +107,26 @@ class FragmentFD(FileDownloader):
def _append_fragment(self, ctx, frag_content):
try:
ctx['dest_stream'].write(frag_content)
ctx['dest_stream'].flush()
finally:
if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False):
os.remove(ctx['fragment_filename_sanitized'])
os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(
'[%s] Total fragments: %s'
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.report_destination(ctx['filename'])
dl = HttpQuietDownloader(
self.ydl,
@@ -151,10 +158,15 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
if ctx['fragment_index'] > 0 and resume_len == 0:
self.report_warning(
'Inconsistent state of incomplete fragment download. '
'Restarting from the beginning...')
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx)
else:
self._write_ytdl_file(ctx)
if ctx['fragment_index'] > 0:
assert resume_len > 0
assert ctx['fragment_index'] == 0
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)

View File

@@ -75,15 +75,30 @@ class HlsFD(FragmentFD):
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
total_frags = 0
def anvato_ad(s):
return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
media_frags = 0
ad_frags = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line and not line.startswith('#'):
total_frags += 1
if not line:
continue
if line.startswith('#'):
if anvato_ad(line):
ad_frags += 1
ad_frag_next = True
continue
if ad_frag_next:
ad_frag_next = False
continue
media_frags += 1
ctx = {
'filename': filename,
'total_frags': total_frags,
'total_frags': media_frags,
'ad_frags': ad_frags,
}
self._prepare_and_start_frag_download(ctx)
@@ -101,10 +116,14 @@ class HlsFD(FragmentFD):
decrypt_info = {'METHOD': 'NONE'}
byte_range = {}
frag_index = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line:
if not line.startswith('#'):
if ad_frag_next:
ad_frag_next = False
continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
@@ -144,7 +163,8 @@ class HlsFD(FragmentFD):
return False
if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, decrypt_info['URI'])).read()
frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
self._append_fragment(ctx, frag_content)
@@ -175,6 +195,8 @@ class HlsFD(FragmentFD):
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
elif anvato_ad(line):
ad_frag_next = True
self._finish_frag_download(ctx)

View File

@@ -4,13 +4,18 @@ import errno
import os
import socket
import time
import random
import re
from .common import FileDownloader
from ..compat import compat_urllib_error
from ..compat import (
compat_str,
compat_urllib_error,
)
from ..utils import (
ContentTooShortError,
encodeFilename,
int_or_none,
sanitize_open,
sanitized_Request,
write_xattr,
@@ -22,79 +27,133 @@ from ..utils import (
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
tmpfilename = self.temp_name(filename)
stream = None
class DownloadContext(dict):
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
ctx = DownloadContext()
ctx.filename = filename
ctx.tmpfilename = self.temp_name(filename)
ctx.stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
basic_request = sanitized_Request(url, None, headers)
request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else (
info_dict.get('downloader_options', {}).get('http_chunk_size') or
self.params.get('http_chunk_size') or 0)
if is_test:
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
ctx.open_mode = 'wb'
ctx.resume_len = 0
ctx.data_len = None
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
ctx.chunk_size = None
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
if self.params.get('continuedl', True):
# Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
ctx.resume_len = os.path.getsize(
encodeFilename(ctx.tmpfilename))
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
ctx.is_resume = ctx.resume_len > 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
class SucceedDownload(Exception):
pass
class RetryDownload(Exception):
def __init__(self, source_error):
self.source_error = source_error
class NextFragment(Exception):
pass
def set_range(req, start, end):
range_header = 'bytes=%d-' % start
if end:
range_header += compat_str(end)
req.add_header('Range', range_header)
def establish_connection():
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
if not is_test and chunk_size else chunk_size)
if ctx.resume_len > 0:
range_start = ctx.resume_len
if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab'
elif ctx.chunk_size > 0:
range_start = 0
else:
range_start = None
ctx.is_resume = False
range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
range_end = ctx.data_len - 1
has_range = range_start is not None
ctx.has_range = has_range
request = sanitized_Request(url, None, headers)
if has_range:
set_range(request, range_start, range_end)
# Establish connection
try:
data = self.ydl.urlopen(request)
ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
if resume_len > 0:
content_range = data.headers.get('Content-Range')
if has_range:
content_range = ctx.data.headers.get('Content-Range')
if content_range:
content_range_m = re.search(r'bytes (\d+)-', content_range)
content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
# Content-Range is present and matches requested Range, resume is possible
if content_range_m and resume_len == int(content_range_m.group(1)):
break
if content_range_m:
if range_start == int(content_range_m.group(1)):
content_range_end = int_or_none(content_range_m.group(2))
content_len = int_or_none(content_range_m.group(3))
accept_content_len = (
# Non-chunked download
not ctx.chunk_size or
# Chunked download and requested piece or
# its part is promised to be served
content_range_end == range_end or
content_len < range_end)
if accept_content_len:
ctx.data_len = content_len
return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
resume_len = 0
open_mode = 'wb'
break
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
return
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = self.ydl.urlopen(basic_request)
content_length = data.info()['Content-Length']
ctx.data = self.ydl.urlopen(
sanitized_Request(url, None, headers))
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
@@ -102,152 +161,193 @@ class HttpFD(FileDownloader):
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self.report_file_already_downloaded(ctx.filename)
self.try_rename(ctx.tmpfilename, ctx.filename)
self._hook_progress({
'filename': filename,
'filename': ctx.filename,
'status': 'finished',
'downloaded_bytes': resume_len,
'total_bytes': resume_len,
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
})
return True
raise SucceedDownload()
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
resume_len = 0
open_mode = 'wb'
break
except socket.error as e:
if e.errno != errno.ECONNRESET:
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
elif err.code < 500 or err.code >= 600:
# Unexpected HTTP error
raise
raise RetryDownload(err)
except socket.error as err:
if err.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry
raise
raise RetryDownload(err)
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
def download():
data_len = ctx.data.info().get('Content-length', None)
if count > retries:
self.report_error('giving up after %s retries' % retries)
return False
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
data_len = self._TEST_FILE_SIZE
data_len = data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
data_len = self._TEST_FILE_SIZE
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
before = start # start measuring
while True:
# Download and write
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
byte_counter += len(data_block)
# exit loop when download is finished
if len(data_block) == 0:
break
# Open destination file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error('unable to open for writing: %s' % str(err))
if data_len is not None:
data_len = int(data_len) + ctx.resume_len
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
byte_counter = 0 + ctx.resume_len
block_size = ctx.block_size
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
before = start # start measuring
def retry(e):
if ctx.tmpfilename != '-':
ctx.stream.close()
ctx.stream = None
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
raise RetryDownload(e)
while True:
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket.timeout as e:
retry(e)
except socket.error as e:
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
raise
retry(e)
byte_counter += len(data_block)
# exit loop when download is finished
if len(data_block) == 0:
break
# Open destination file just in time
if ctx.stream is None:
try:
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
ctx.stream, ctx.tmpfilename = sanitize_open(
ctx.tmpfilename, ctx.open_mode)
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
except (OSError, IOError) as err:
self.report_error('unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError) as err:
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
ctx.stream.write(data_block)
except (IOError, OSError) as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
return False
# Apply rate limit
self.slow_down(start, now, byte_counter - ctx.resume_len)
# end measuring of one loop run
now = time.time()
after = now
# Adjust block size
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
before = after
# Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
if ctx.data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': ctx.data_len,
'tmpfilename': ctx.tmpfilename,
'filename': ctx.filename,
'eta': eta,
'speed': speed,
'elapsed': now - ctx.start_time,
})
if is_test and byte_counter == data_len:
break
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
if ctx.stream is None:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
self.report_error('Did not get any data blocks')
return False
if ctx.tmpfilename != '-':
ctx.stream.close()
# Apply rate limit
self.slow_down(start, now, byte_counter - resume_len)
if data_len is not None and byte_counter != data_len:
err = ContentTooShortError(byte_counter, int(data_len))
if count <= retries:
retry(err)
raise err
# end measuring of one loop run
now = time.time()
after = now
self.try_rename(ctx.tmpfilename, ctx.filename)
# Adjust block size
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
before = after
# Progress message
speed = self.calc_speed(start, now, byte_counter - resume_len)
if data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'eta': eta,
'speed': speed,
'elapsed': now - start,
'total_bytes': byte_counter,
'filename': ctx.filename,
'status': 'finished',
'elapsed': time.time() - ctx.start_time,
})
if is_test and byte_counter == data_len:
break
return True
if stream is None:
self.to_stderr('\n')
self.report_error('Did not get any data blocks')
return False
if tmpfilename != '-':
stream.close()
while count <= retries:
try:
establish_connection()
return download()
except RetryDownload as e:
count += 1
if count <= retries:
self.report_retry(e.source_error, count, retries)
continue
except NextFragment:
continue
except SucceedDownload:
return True
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
'elapsed': time.time() - start,
})
return True
self.report_error('giving up after %s retries' % retries)
return False

View File

@@ -1,25 +1,27 @@
from __future__ import unicode_literals
import time
import struct
import binascii
import io
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..compat import (
compat_Struct,
compat_urllib_error,
)
u8 = struct.Struct(b'>B')
u88 = struct.Struct(b'>Bx')
u16 = struct.Struct(b'>H')
u1616 = struct.Struct(b'>Hxx')
u32 = struct.Struct(b'>I')
u64 = struct.Struct(b'>Q')
u8 = compat_Struct('>B')
u88 = compat_Struct('>Bx')
u16 = compat_Struct('>H')
u1616 = compat_Struct('>Hxx')
u32 = compat_Struct('>I')
u64 = compat_Struct('>Q')
s88 = struct.Struct(b'>bx')
s16 = struct.Struct(b'>h')
s1616 = struct.Struct(b'>hxx')
s32 = struct.Struct(b'>i')
s88 = compat_Struct('>bx')
s16 = compat_Struct('>h')
s1616 = compat_Struct('>hxx')
s32 = compat_Struct('>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
@@ -139,7 +141,7 @@ def write_piff_header(stream, params):
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
codec_private_data = binascii.unhexlify(params['codec_private_data'])
codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version

View File

@@ -1,6 +1,9 @@
from __future__ import unicode_literals
import hashlib
import hmac
import re
import time
from .common import InfoExtractor
from ..compat import compat_str
@@ -10,6 +13,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
try_get,
update_url_query,
)
@@ -101,21 +105,24 @@ class ABCIE(InfoExtractor):
class ABCIViewIE(InfoExtractor):
IE_NAME = 'abc.net.au:iview'
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
'url': 'http://iview.abc.net.au/programs/call-the-midwife/ZW0898A003S00',
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': {
'id': 'ZX9735A001S00',
'id': 'ZW0898A003S00',
'ext': 'mp4',
'title': 'Diaries Of A Broken Mind',
'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
'upload_date': '20161010',
'uploader_id': 'abc2',
'timestamp': 1476064920,
'title': 'Series 5 Ep 3',
'description': 'md5:e0ef7d4f92055b86c4f33611f180ed79',
'upload_date': '20171228',
'uploader_id': 'abc1',
'timestamp': 1514499187,
},
'params': {
'skip_download': True,
},
'skip': 'Video gone',
}]
def _real_extract(self, url):
@@ -126,20 +133,30 @@ class ABCIViewIE(InfoExtractor):
title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
format_urls = [
try_get(stream, lambda x: x['hds-unmetered'], compat_str)]
house_number = video_params.get('episodeHouseNumber')
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
int(time.time()), house_number)
sig = hmac.new(
'android.content.res.Resources'.encode('utf-8'),
path.encode('utf-8'), hashlib.sha256).hexdigest()
token = self._download_webpage(
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
# May have higher quality video
sd_url = try_get(
stream, lambda x: x['streams']['hds']['sd'], compat_str)
if sd_url:
format_urls.append(sd_url.replace('metered', 'um'))
def tokenize_url(url, token):
return update_url_query(url, {
'hdnea': token,
})
formats = []
for format_url in format_urls:
if format_url:
formats.extend(
self._extract_akamai_formats(format_url, video_id))
for sd in ('sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
if not sd_url:
continue
formats = self._extract_m3u8_formats(
tokenize_url(sd_url, token), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
if formats:
break
self._sort_formats(formats)
subtitles = {}

View File

@@ -7,6 +7,7 @@ import time
from .amp import AMPIE
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_urlparse
@@ -65,7 +66,7 @@ class AbcNewsIE(InfoExtractor):
_TESTS = [{
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
'info_dict': {
'id': '10498713',
'id': '10505354',
'ext': 'flv',
'display_id': 'dramatic-video-rare-death-job-america',
'title': 'Occupational Hazards',
@@ -78,7 +79,7 @@ class AbcNewsIE(InfoExtractor):
}, {
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
'info_dict': {
'id': '39125818',
'id': '38897857',
'ext': 'mp4',
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
'title': 'Justin Timberlake Drops Hints For Secret Single',
@@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
full_video_url = compat_urlparse.urljoin(url, video_url)
youtube_url = self._html_search_regex(
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
webpage, 'YouTube URL', default=None)
youtube_url = YoutubeIE._extract_url(webpage)
timestamp = None
date_str = self._html_search_regex(
@@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
}
if youtube_url:
entries = [entry, self.url_result(youtube_url, 'Youtube')]
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
return self.playlist_result(entries)
return entry

View File

@@ -8,7 +8,7 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_iso8601,
unified_timestamp,
OnDemandPagedList,
)
@@ -32,7 +32,7 @@ class ACastIE(InfoExtractor):
}, {
# test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': '55c0097badd7095f494c99a172f86501',
'md5': 'e87d5b8516cd04c0d81b6ee1caca28d0',
'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3',
@@ -40,23 +40,24 @@ class ACastIE(InfoExtractor):
'timestamp': 1477346700,
'upload_date': '20161024',
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
'duration': 2797,
'duration': 2766,
}
}]
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
cast_data = self._download_json(
'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id)
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)
e = cast_data['result']['episode']
return {
'id': compat_str(cast_data['id']),
'id': compat_str(e['id']),
'display_id': display_id,
'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
'title': cast_data['name'],
'description': cast_data.get('description'),
'thumbnail': cast_data.get('image'),
'timestamp': parse_iso8601(cast_data.get('publishingDate')),
'duration': int_or_none(cast_data.get('duration')),
'url': e['mediaUrl'],
'title': e['name'],
'description': e.get('description'),
'thumbnail': e.get('image'),
'timestamp': unified_timestamp(e.get('publishingDate')),
'duration': int_or_none(e.get('duration')),
}

View File

@@ -1,13 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import json
import os
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..compat import compat_ord
from ..compat import (
compat_b64decode,
compat_ord,
)
from ..utils import (
bytes_to_intlist,
ExtractorError,
@@ -48,9 +50,9 @@ class ADNIE(InfoExtractor):
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),

View File

@@ -122,7 +122,8 @@ class AENetworksIE(AENetworksBaseIE):
query = {
'mbr': 'true',
'assetTypes': 'high_video_s3'
'assetTypes': 'high_video_ak',
'switch': 'hls_high_ak',
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
@@ -131,7 +132,7 @@ class AENetworksIE(AENetworksBaseIE):
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]

View File

@@ -138,6 +138,23 @@ class AfreecaTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# adult video
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
'info_dict': {
'id': '20171001_F1AE1711_196617479_1',
'ext': 'mp4',
'title': '[생]서아 초심 찾기 방송 (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'BJ서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20171001',
'duration': 3600,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
@@ -158,9 +175,30 @@ class AfreecaTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
station_id = self._search_regex(
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
bbs_id = self._search_regex(
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, query={'nTitleNo': video_id})
video_id, headers={
'Referer': 'http://vod.afreecatv.com/embed.php',
}, query={
'nTitleNo': video_id,
'nStationNo': station_id,
'nBbsNo': bbs_id,
'partialView': 'SKIP_ADULT',
})
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag != 'SUCCEED':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True)
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
if video_element is None or video_element.text is None:
@@ -203,10 +241,19 @@ class AfreecaTVIE(InfoExtractor):
r'^(\d{8})_', key, 'upload date', default=None)
file_duration = int_or_none(file_element.get('duration'))
format_id = key if key else '%s_%s' % (video_id, file_num)
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
if determine_ext(file_url) == 'm3u8':
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
else:
formats = [{
'url': file_url,
'format_id': 'http',
}]
if not formats:
continue
self._sort_formats(formats)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
@@ -246,107 +293,3 @@ class AfreecaTVIE(InfoExtractor):
})
return info
class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global'
_VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
_TESTS = [{
'url': 'http://afreeca.tv/36853014/v/58301',
'info_dict': {
'id': '58301',
'title': 'tryhard top100',
'uploader_id': '36853014',
'uploader': 'makgi Hearthstone Live!',
},
'playlist_count': 3,
}]
def _real_extract(self, url):
channel_id, video_id = re.match(self._VALID_URL, url).groups()
video_type = 'video' if video_id else 'live'
query = {
'pt': 'view',
'bid': channel_id,
}
if video_id:
query['vno'] = video_id
video_data = self._download_json(
'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
video_id or channel_id, query=query)['channel']
if video_data.get('result') != 1:
raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
title = video_data['title']
info = {
'thumbnail': video_data.get('thumb'),
'view_count': int_or_none(video_data.get('vcnt')),
'age_limit': int_or_none(video_data.get('grade')),
'uploader_id': channel_id,
'uploader': video_data.get('cname'),
}
if video_id:
entries = []
for i, f in enumerate(video_data.get('flist', [])):
video_key = self.parse_video_key(f.get('key', ''))
f_url = f.get('file')
if not video_key or not f_url:
continue
entries.append({
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
'title': title,
'upload_date': video_key.get('upload_date'),
'duration': int_or_none(f.get('length')),
'url': f_url,
'protocol': 'm3u8_native',
'ext': 'mp4',
})
info.update({
'id': video_id,
'title': title,
'duration': int_or_none(video_data.get('length')),
})
if len(entries) > 1:
info['_type'] = 'multi_video'
info['entries'] = entries
elif len(entries) == 1:
i = entries[0].copy()
i.update(info)
info = i
else:
formats = []
for s in video_data.get('strm', []):
s_url = s.get('purl')
if not s_url:
continue
stype = s.get('stype')
if stype == 'HLS':
formats.extend(self._extract_m3u8_formats(
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
elif stype == 'RTMP':
format_id = [stype]
label = s.get('label')
if label:
format_id.append(label)
formats.append({
'format_id': '-'.join(format_id),
'url': s_url,
'tbr': int_or_none(s.get('bps')),
'height': int_or_none(s.get('brt')),
'ext': 'flv',
'rtmp_live': True,
})
self._sort_formats(formats)
info.update({
'id': channel_id,
'title': self._live_title(title),
'is_live': True,
'formats': formats,
})
return info

View File

@@ -0,0 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
try_get,
)
class AliExpressLiveIE(InfoExtractor):
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
_TEST = {
'url': 'https://live.aliexpress.com/live/2800002704436634',
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
'info_dict': {
'id': '2800002704436634',
'ext': 'mp4',
'title': 'CASIMA7.22',
'thumbnail': r're:http://.*\.jpg',
'uploader': 'CASIMA Official Store',
'timestamp': 1500717600,
'upload_date': '20170722',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(
self._search_regex(
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
webpage, 'runParams'),
video_id)
title = data['title']
formats = self._extract_m3u8_formats(
data['replyStreamUrl'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
return {
'id': video_id,
'title': title,
'thumbnail': data.get('coverUrl'),
'uploader': try_get(
data, lambda x: x['followBar']['name'], compat_str),
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats,
}

View File

@@ -11,7 +11,7 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '',
@@ -51,6 +51,9 @@ class AMCNetworksIE(ThePlatformIE):
}, {
'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3',
'only_matching': True,
}, {
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
try_get,
unified_strdate,
)
class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': {
'id': '1_5g5zua6e',
'title': 'Summer Dinner Party',
'ext': 'mp4',
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1497285541,
'upload_date': '20170612',
'uploader_id': 'roger.metcalf@americastestkitchen.com',
'release_date': '20170617',
'series': "America's Test Kitchen",
'season_number': 17,
'episode': 'Summer Dinner Party',
'episode_number': 24,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id')
video_data = self._parse_json(
self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
webpage, 'initial context'),
video_id)
ep_data = try_get(
video_data,
(lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
external_id = ep_data.get('external_id') or ep_meta['external_id']
title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
'description') or ep_meta.get('description'))
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
release_date = unified_strdate(ep_data.get('aired_at'))
season_number = int_or_none(ep_meta.get('season_number'))
episode = ep_meta.get('title')
episode_number = int_or_none(ep_meta.get('episode_number'))
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, external_id),
'ie_key': 'Kaltura',
'title': title,
'description': description,
'thumbnail': thumbnail,
'release_date': release_date,
'series': "America's Test Kitchen",
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
}

View File

@@ -3,16 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_str,
)
from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
sanitized_Request,
urlencode_postdata,
urljoin,
)
@@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand'
# German-speaking countries of Europe
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
_TESTS = [{
# jap, OmU
'url': 'https://www.anime-on-demand.de/anime/161',
@@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
# Full length film, non-series, ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/185',
'only_matching': True,
}, {
# Flash videos
'url': 'https://www.anime-on-demand.de/anime/12',
'only_matching': True,
}]
def _login(self):
@@ -72,19 +75,18 @@ class AnimeOnDemandIE(InfoExtractor):
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL)
post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
post_url, None, 'Logging in',
data=urlencode_postdata(login_form), headers={
'Referer': self._LOGIN_URL,
})
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
error = self._search_regex(
r'<p class="alert alert-danger">(.+?)</p>',
response, 'error', default=None)
r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>',
response, 'error', default=None, group='error')
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
@@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
formats = []
for input_ in re.findall(
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
attributes = extract_attributes(input_)
title = attributes.get('data-dialog-header')
playlist_urls = []
for playlist_key in ('data-playlist', 'data-otherplaylist'):
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
playlist_url = attributes.get(playlist_key)
if isinstance(playlist_url, compat_str) and re.match(
r'/?[\da-zA-Z]+', playlist_url):
@@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
format_id_list.append(compat_str(num))
format_id = '-'.join(format_id_list)
format_note = ', '.join(filter(None, (kind, lang_note)))
request = sanitized_Request(
compat_urlparse.urljoin(url, playlist_url),
item_id_list = []
if format_id:
item_id_list.append(format_id)
item_id_list.append('videomaterial')
playlist = self._download_json(
urljoin(url, playlist_url), video_id,
'Downloading %s JSON' % ' '.join(item_id_list),
headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token,
'Referer': url,
'Accept': 'application/json, text/javascript, */*; q=0.01',
})
playlist = self._download_json(
request, video_id, 'Downloading %s playlist JSON' % format_id,
fatal=False)
}, fatal=False)
if not playlist:
continue
stream_url = playlist.get('streamurl')
if stream_url:
rtmp = re.search(
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
stream_url)
if rtmp:
formats.append({
'url': rtmp.group('url'),
'app': rtmp.group('app'),
'play_path': rtmp.group('playpath'),
'page_url': url,
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
'rtmp_real_time': True,
'format_id': 'rtmp',
'ext': 'flv',
})
continue
start_video = playlist.get('startvideo', 0)
playlist = playlist.get('playlist')
if not playlist or not isinstance(playlist, list):
@@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
f.update({
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
'title': m.group('title'),
'url': compat_urlparse.urljoin(url, m.group('href')),
'url': urljoin(url, m.group('href')),
})
entries.append(f)

View File

@@ -18,6 +18,7 @@ from ..utils import (
int_or_none,
strip_jsonp,
unescapeHTML,
unsmuggle_url,
)
@@ -197,12 +198,16 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None,
}
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
if tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
if media_format == 'm3u8' and tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
elif media_format == 'm3u8-variant' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
else:
@@ -271,6 +276,9 @@ class AnvatoIE(InfoExtractor):
anvplayer_data['accessKey'], anvplayer_data['video'])
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:

View File

@@ -3,13 +3,13 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
HEADRequest,
int_or_none,
mimetype2ext,
)
class AparatIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'http://www.aparat.com/v/wP8On',
@@ -29,30 +29,41 @@ class AparatIE(InfoExtractor):
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
webpage = self._download_webpage(embed_url, video_id)
file_list = self._parse_json(self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
for i, item in enumerate(file_list[0]):
video_url = item['file']
req = HEADRequest(video_url)
res = self._request_webpage(
req, video_id, note='Testing video URL %d' % i, errnote=False)
if res:
break
else:
raise ExtractorError('No working video URLs found')
webpage = self._download_webpage(
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id)
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
file_list = self._parse_json(
self._search_regex(
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
'file list'),
video_id)
formats = []
for item in file_list[0]:
file_url = item.get('file')
if not file_url:
continue
ext = mimetype2ext(item.get('type'))
label = item.get('label')
formats.append({
'url': file_url,
'ext': ext,
'format_id': label or ext,
'height': int_or_none(self._search_regex(
r'(\d+)[pP]', label or '', 'height', default=None)),
})
self._sort_formats(formats)
thumbnail = self._search_regex(
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
'age_limit': self._family_friendly_search(webpage),
'formats': formats,
}

View File

@@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
@@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
formats = []
for format in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
formats.append({
'url': format_url,
'format': format['type'],

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from .generic import GenericIE
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
@@ -23,57 +24,30 @@ class ARDMediathekIE(InfoExtractor):
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
# available till 26.07.2022
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
'info_dict': {
'id': '29582122',
'id': '44726822',
'ext': 'mp4',
'title': 'Ich liebe das Leben trotzdem',
'description': 'md5:45e4c225c72b27993314b31a84a5261c',
'duration': 4557,
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
'duration': 1740,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'http://www.ardmediathek.de/tv/Tatort/Tatort-Scheinwelten-H%C3%B6rfassung-Video/Das-Erste/Video?documentId=29522730&bcastId=602916',
'md5': 'f4d98b10759ac06c0072bbcd1f0b9e3e',
'info_dict': {
'id': '29522730',
'ext': 'mp4',
'title': 'Tatort: Scheinwelten - Hörfassung (Video tgl. ab 20 Uhr)',
'description': 'md5:196392e79876d0ac94c94e8cdb2875f1',
'duration': 5252,
},
'skip': 'HTTP Error 404: Not Found',
}
}, {
# audio
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
'md5': '219d94d8980b4f538c7fcb0865eb7f2c',
'info_dict': {
'id': '28488308',
'ext': 'mp3',
'title': 'Tod eines Fußballers',
'description': 'md5:f6e39f3461f0e1f54bfa48c8875c86ef',
'duration': 3240,
},
'skip': 'HTTP Error 404: Not Found',
'only_matching': True,
}, {
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'only_matching': True,
}, {
# audio
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
'md5': '4e8f00631aac0395fee17368ac0e9867',
'info_dict': {
'id': '30796318',
'ext': 'mp3',
'title': 'Vor dem Fest',
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
'duration': 3287,
},
'skip': 'Video is no longer available',
'only_matching': True,
}]
def _extract_media_info(self, media_info_url, webpage, video_id):
@@ -126,6 +100,8 @@ class ARDMediathekIE(InfoExtractor):
quality = stream.get('_quality')
server = stream.get('_server')
for stream_url in stream_urls:
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
continue
ext = determine_ext(stream_url)
if quality != 'auto' and ext in ('f4m', 'm3u8'):
continue
@@ -146,13 +122,11 @@ class ARDMediathekIE(InfoExtractor):
'play_path': stream_url,
'format_id': 'a%s-rtmp-%s' % (num, quality),
}
elif stream_url.startswith('http'):
else:
f = {
'url': stream_url,
'format_id': 'a%s-%s-%s' % (num, ext, quality)
}
else:
continue
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
if m:
f.update({
@@ -195,7 +169,7 @@ class ARDMediathekIE(InfoExtractor):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title')
description = self._html_search_meta(
@@ -251,20 +225,23 @@ class ARDMediathekIE(InfoExtractor):
class ARDIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
_TEST = {
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
'md5': 'd216c3a86493f9322545e045ddc3eb35',
_TESTS = [{
# available till 14.02.2019
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
'info_dict': {
'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
'id': '100',
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
'id': '102',
'ext': 'mp4',
'duration': 2600,
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
'upload_date': '20140804',
'duration': 4435.0,
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
'upload_date': '20180214',
'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'HTTP Error 404: Not Found',
}
}, {
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -6,15 +6,18 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
find_xpath_attr,
unified_strdate,
get_element_by_attribute,
int_or_none,
NO_DEFAULT,
qualities,
try_get,
unified_strdate,
)
# There are different sources of video in arte.tv, the extraction process
@@ -79,6 +82,16 @@ class ArteTVBaseIE(InfoExtractor):
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
vsr = try_get(player_info, lambda x: x['VSR'], dict)
if not vsr:
error = None
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
error = try_get(
player_info, lambda x: x['custom_msg']['msg'], compat_str)
if not error:
error = 'Video %s is not available' % player_info.get('VID') or video_id
raise ExtractorError(error, expected=True)
upload_date_str = player_info.get('shootingDate')
if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
@@ -107,7 +120,7 @@ class ArteTVBaseIE(InfoExtractor):
langcode = LANGS.get(lang, lang)
formats = []
for format_id, format_dict in player_info['VSR'].items():
for format_id, format_dict in vsr.items():
f = dict(format_dict)
versionCode = f.get('versionCode')
l = re.escape(langcode)

View File

@@ -87,7 +87,7 @@ class AtresPlayerIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
error = self._html_search_regex(
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',

View File

@@ -0,0 +1,78 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import hashlib
import hmac
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class AWSIE(InfoExtractor):
_AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
_AWS_REGION = 'us-east-1'
def _aws_execute_api(self, aws_dict, video_id, query=None):
query = query or {}
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
date = amz_date[:8]
headers = {
'Accept': 'application/json',
'Host': self._AWS_PROXY_HOST,
'X-Amz-Date': amz_date,
'X-Api-Key': self._AWS_API_KEY
}
session_token = aws_dict.get('session_token')
if session_token:
headers['X-Amz-Security-Token'] = session_token
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
canonical_querystring = compat_urllib_parse_urlencode(query)
canonical_headers = ''
for header_name, header_value in sorted(headers.items()):
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
canonical_request = '\n'.join([
'GET',
aws_dict['uri'],
canonical_querystring,
canonical_headers,
signed_headers,
aws_hash('')
])
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
credential_scope = '/'.join(credential_scope_list)
string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest()
def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest()
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
for value in credential_scope_list:
k_signing = aws_hmac_digest(k_signing, value)
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
headers['Authorization'] = ', '.join([
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
'SignedHeaders=%s' % signed_headers,
'Signature=%s' % signature,
])
return self._download_json(
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
video_id, headers=headers)

View File

@@ -47,7 +47,7 @@ class AZMedienIE(AZMedienBaseIE):
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': {
'id': '1_2444peh4',
'ext': 'mov',
'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
'uploader_id': 'TeleZ?ri',

View File

@@ -59,7 +59,7 @@ class BambuserIE(InfoExtractor):
self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
login_error = self._html_search_regex(
r'(?s)<div class="messages error">(.+?)</div>',

View File

@@ -242,7 +242,12 @@ class BandcampAlbumIE(InfoExtractor):
raise ExtractorError('The page doesn\'t contain any tracks')
# Only tracks with duration info have songs
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
self.url_result(
compat_urlparse.urljoin(url, t_path),
ie=BandcampIE.ie_key(),
video_title=self._search_regex(
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
elem_content, 'track title', fatal=False))
for elem_content, t_path in track_elements
if self._html_search_meta('duration', elem_content, default=None)]

View File

@@ -29,7 +29,7 @@ from ..compat import (
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pb][\da-z]{7}'
_ID_REGEX = r'[pbw][\da-z]{7}'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
@@ -37,7 +37,8 @@ class BBCCoUkIE(InfoExtractor):
programmes/(?!articles/)|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/
radio/player/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
@@ -232,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
}, {
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
'only_matching': True,
}]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'

View File

@@ -9,6 +9,7 @@ from ..compat import (
from ..utils import (
int_or_none,
parse_iso8601,
urljoin,
)
@@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
cpl_url = self._search_regex(
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
webpage, 'cpl', default=None, group='url')
cpl_url = urljoin(url, cpl_url)
beeg_version, beeg_salt = [None] * 2
if cpl_url:
@@ -54,12 +57,16 @@ class BeegIE(InfoExtractor):
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
default=None, group='beeg_salt')
beeg_version = beeg_version or '2000'
beeg_version = beeg_version or '2185'
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
video = self._download_json(
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
video_id)
for api_path in ('', 'api.'):
video = self._download_json(
'https://%sbeeg.com/api/v6/%s/video/%s'
% (api_path, beeg_version, video_id), video_id,
fatal=api_path == 'api.')
if video:
break
def split(o, e):
def cut(s, x):

View File

@@ -1,11 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
class BigflixIE(InfoExtractor):
@@ -39,8 +41,8 @@ class BigflixIE(InfoExtractor):
webpage, 'title')
def decode_url(quoted_b64_url):
return base64.b64decode(compat_urllib_parse_unquote(
quoted_b64_url).encode('ascii')).decode('utf-8')
return compat_b64decode(compat_urllib_parse_unquote(
quoted_b64_url)).decode('utf-8')
formats = []
for height, encoded_url in re.findall(

View File

@@ -102,6 +102,7 @@ class BiliBiliIE(InfoExtractor):
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
headers = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': url
}
headers.update(self.geo_verification_headers())
@@ -116,10 +117,15 @@ class BiliBiliIE(InfoExtractor):
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
headers = {
'Referer': url
}
headers.update(self.geo_verification_headers())
video_info = self._download_json(
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
video_id, note='Downloading video info page',
headers=self.geo_verification_headers())
headers=headers)
if 'durl' not in video_info:
self._report_error(video_info)

View File

@@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
title = self._html_search_regex(
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
video_info_dicts = re.findall(
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
formats = []
for video_info in video_info_dicts:
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
quality = video_info['quality']
video_url = video_info['src']
video_info = self._parse_json(
video_info, video_id, transform_source=js_to_json, fatal=False)
if not video_info:
continue
video_url = video_info.get('src')
if not video_url:
continue
quality = 'high' if '_high' in video_url else 'low'
formats.append({
'url': video_url,
'preference': 10 if quality == 'high' else 0,

View File

@@ -1,20 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_duration,
parse_iso8601,
xpath_element,
xpath_text,
)
class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
IE_DESC = 'Bayerischer Rundfunk'
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_TESTS = [
@@ -123,10 +126,10 @@ class BRIE(InfoExtractor):
for asset in assets.findall('asset'):
format_url = xpath_text(asset, ['downloadUrl', 'url'])
asset_type = asset.get('type')
if asset_type == 'HDS':
if asset_type.startswith('HDS'):
formats.extend(self._extract_f4m_formats(
format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False))
elif asset_type == 'HLS':
elif asset_type.startswith('HLS'):
formats.extend(self._extract_m3u8_formats(
format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False))
else:
@@ -169,3 +172,140 @@ class BRIE(InfoExtractor):
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails
class BRMediathekIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})'
_TESTS = [{
'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
'md5': 'fdc3d485835966d1622587d08ba632ec',
'info_dict': {
'id': 'av:5a1e6a6e8fce6d001871cc8e',
'ext': 'mp4',
'title': 'Die Sendung vom 28.11.2017',
'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
'timestamp': 1511942766,
'upload_date': '20171129',
}
}]
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._download_json(
'https://proxy-base.master.mango.express/graphql',
clip_id, data=json.dumps({
"query": """{
viewer {
clip(id: "%s") {
title
description
duration
createdAt
ageRestriction
videoFiles {
edges {
node {
publicLocation
fileSize
videoProfile {
width
height
bitrate
encoding
}
}
}
}
captionFiles {
edges {
node {
publicLocation
}
}
}
teaserImages {
edges {
node {
imageFiles {
edges {
node {
publicLocation
width
height
}
}
}
}
}
}
}
}
}""" % clip_id}).encode(), headers={
'Content-Type': 'application/json',
})['data']['viewer']['clip']
title = clip['title']
formats = []
for edge in clip.get('videoFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
ext = determine_ext(n_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
n_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
video_profile = node.get('videoProfile', {})
tbr = int_or_none(video_profile.get('bitrate'))
format_id = 'http'
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': n_url,
'width': int_or_none(video_profile.get('width')),
'height': int_or_none(video_profile.get('height')),
'tbr': tbr,
'filesize': int_or_none(node.get('fileSize')),
})
self._sort_formats(formats)
subtitles = {}
for edge in clip.get('captionFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
subtitles.setdefault('de', []).append({
'url': n_url,
})
thumbnails = []
for edge in clip.get('teaserImages', {}).get('edges', []):
for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
node = image_edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
thumbnails.append({
'url': n_url,
'width': int_or_none(node.get('width')),
'height': int_or_none(node.get('height')),
})
return {
'id': clip_id,
'title': title,
'description': clip.get('description'),
'duration': int_or_none(clip.get('duration')),
'timestamp': parse_iso8601(clip.get('createdAt')),
'age_limit': int_or_none(clip.get('ageRestriction')),
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
}

View File

@@ -464,7 +464,7 @@ class BrightcoveNewIE(AdobePassIE):
'timestamp': 1441391203,
'upload_date': '20150904',
'uploader_id': '929656772001',
'formats': 'mincount:22',
'formats': 'mincount:20',
},
}, {
# with rtmp streams
@@ -478,7 +478,7 @@ class BrightcoveNewIE(AdobePassIE):
'timestamp': 1433556729,
'upload_date': '20150606',
'uploader_id': '4036320279001',
'formats': 'mincount:41',
'formats': 'mincount:39',
},
'params': {
# m3u8 download
@@ -564,59 +564,7 @@ class BrightcoveNewIE(AdobePassIE):
return entries
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
try:
json_data = self._download_json(api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
custom_fields = json_data['custom_fields']
tve_token = self._extract_mvpd_auth(
smuggled_data['source_url'], video_id,
custom_fields['bcadobepassrequestorid'],
custom_fields['bcadobepassresourceid'])
json_data = self._download_json(
api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
}, query={
'tveToken': tve_token,
})
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
title = json_data['name'].strip()
formats = []
@@ -682,6 +630,7 @@ class BrightcoveNewIE(AdobePassIE):
})
formats.append(f)
errors = json_data.get('errors')
if not formats and errors:
error = errors[0]
raise ExtractorError(
@@ -689,6 +638,9 @@ class BrightcoveNewIE(AdobePassIE):
self._sort_formats(formats)
for f in formats:
f.setdefault('http_headers', {}).update(headers)
subtitles = {}
for text_track in json_data.get('text_tracks', []):
if text_track.get('src'):
@@ -708,9 +660,72 @@ class BrightcoveNewIE(AdobePassIE):
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
'duration': duration,
'timestamp': parse_iso8601(json_data.get('published_at')),
'uploader_id': account_id,
'uploader_id': json_data.get('account_id'),
'formats': formats,
'subtitles': subtitles,
'tags': json_data.get('tags', []),
'is_live': is_live,
}
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
headers = {
'Accept': 'application/json;pk=%s' % policy_key,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers.update({
'Referer': referrer,
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
})
try:
json_data = self._download_json(api_url, video_id, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
custom_fields = json_data['custom_fields']
tve_token = self._extract_mvpd_auth(
smuggled_data['source_url'], video_id,
custom_fields['bcadobepassrequestorid'],
custom_fields['bcadobepassresourceid'])
json_data = self._download_json(
api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
}, query={
'tveToken': tve_token,
})
return self._parse_brightcove_metadata(
json_data, video_id, headers=headers)

View File

@@ -3,20 +3,19 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class BYUtvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': {
'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
'display_id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
'title': 'Season 5 Episode 5',
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65',
'thumbnail': r're:^https?://.*',
'duration': 1486.486,
},
'params': {
@@ -26,6 +25,9 @@ class BYUtvIE(InfoExtractor):
}, {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
'only_matching': True,
}, {
'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -33,16 +35,16 @@ class BYUtvIE(InfoExtractor):
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
episode_code = self._search_regex(
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
ep = self._parse_json(
episode_code, display_id, transform_source=lambda s:
re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
if ep['providerType'] != 'Ooyala':
raise ExtractorError('Unsupported provider %s' % ep['provider'])
ep = self._download_json(
'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id,
query={
'contentid': video_id,
'channel': 'byutv',
'x-byutv-context': 'web$US',
}, headers={
'x-byutv-context': 'web$US',
'x-byutv-platformkey': 'xsaaw9c7y5',
})['ooyalaVOD']
return {
'_type': 'url_transparent',
@@ -50,44 +52,7 @@ class BYUtvIE(InfoExtractor):
'url': 'ooyala:%s' % ep['providerId'],
'id': video_id,
'display_id': display_id,
'title': ep['title'],
'title': ep.get('title'),
'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'),
}
class BYUtvEventIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
_TEST = {
'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
'info_dict': {
'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
'ext': 'mp4',
'title': 'Toledo vs. BYU (9/30/16)',
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
ooyala_id = self._search_regex(
r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'ooyala id', group='id')
title = self._search_regex(
r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
'title').strip()
return {
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % ooyala_id,
'id': video_id,
'title': title,
}

View File

@@ -4,59 +4,36 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
dict_get,
# ExtractorError,
# HEADRequest,
int_or_none,
qualities,
remove_end,
unified_strdate,
)
class CanalplusIE(InfoExtractor):
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
_VALID_URL = r'''(?x)
https?://
(?:
(?:
(?:(?:www|m)\.)?canalplus\.fr|
(?:www\.)?piwiplus\.fr|
(?:www\.)?d8\.tv|
(?:www\.)?c8\.fr|
(?:www\.)?d17\.tv|
(?:(?:football|www)\.)?cstar\.fr|
(?:www\.)?itele\.fr
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
player\.canalplus\.fr/#/(?P<id>\d+)
)
'''
IE_DESC = 'mycanal.fr and piwiplus.fr'
_VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'
_SITE_ID_MAP = {
'canalplus': 'cplus',
'mycanal': 'cplus',
'piwiplus': 'teletoon',
'd8': 'd8',
'c8': 'd8',
'd17': 'd17',
'cstar': 'd17',
'itele': 'itele',
}
# Only works for direct mp4 URLs
_GEO_COUNTRIES = ['FR']
_TESTS = [{
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061',
'info_dict': {
'id': '1405510',
'display_id': 'pid1830-c-zapping',
'id': '1397061',
'display_id': 'lolywood',
'ext': 'mp4',
'title': 'Zapping - 02/07/2016',
'description': 'Le meilleur de toutes les chaînes, tous les jours',
'upload_date': '20160702',
'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34',
'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e',
'upload_date': '20160602',
},
}, {
# geo restricted, bypassed
@@ -70,64 +47,12 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20140724',
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
# geo restricted, bypassed
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html?vid=1443684',
'md5': 'bb6f9f343296ab7ebd88c97b660ecf8d',
'info_dict': {
'id': '1443684',
'display_id': 'pid6318-videos-integrales',
'ext': 'mp4',
'title': 'Guess my iep ! - TPMP - 07/04/2017',
'description': 'md5:6f005933f6e06760a9236d9b3b5f17fa',
'upload_date': '20170407',
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'info_dict': {
'id': '1420176',
'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'ext': 'mp4',
'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
'upload_date': '20161014',
},
}, {
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
'info_dict': {
'id': '1416769',
'display_id': 'pid7566-feminines-videos',
'ext': 'mp4',
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
'upload_date': '20160921',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://m.canalplus.fr/?vid=1398231',
'only_matching': True,
}, {
'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site, display_id, video_id = re.match(self._VALID_URL, url).groups()
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
# Beware, some subclasses do not define an id group
display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
r'id=["\']canal_video_player(?P<id>\d+)',
r'data-video=["\'](?P<id>\d+)'],
webpage, 'video id', default=mobj.group('vid'), group='id')
site_id = self._SITE_ID_MAP[site]
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
@@ -161,7 +86,7 @@ class CanalplusIE(InfoExtractor):
format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
# the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js
# the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js
'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',
'format_id': format_id,
'preference': preference(format_id),

View File

@@ -1,26 +1,112 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import float_or_none
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
strip_or_none,
float_or_none,
int_or_none,
parse_iso8601,
)
class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'flv',
'title': 'Nachtwacht: De Greystook',
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.03,
},
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, {
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site_id'), mobj.group('id')
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), video_id)
title = data['title']
description = data.get('description')
formats = []
for target in data['targetUrls']:
format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type:
continue
if format_type == 'HLS':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_type, fatal=False))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id=format_type, fatal=False))
elif format_type == 'HSS':
formats.extend(self._extract_ism_formats(
format_url, video_id, ism_id='mss', fatal=False))
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
return {
'id': video_id,
'display_id': video_id,
'title': title,
'description': description,
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}
class CanvasEenIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
'md5': 'ea838375a547ac787d4064d8c7860a6c',
'md5': 'ed66976748d12350b118455979cca293',
'info_dict': {
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
'ext': 'mp4',
'ext': 'flv',
'title': 'De afspraak veilt voor de Warmste Week',
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 49.02,
}
},
'expected_warnings': ['is not a supported codec'],
}, {
# with subtitles
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
@@ -40,7 +126,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'Pagina niet gevonden',
}, {
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
'info_dict': {
@@ -54,7 +141,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'Episode no longer available',
}, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True,
@@ -66,55 +154,157 @@ class CanvasIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
title = (self._search_regex(
title = strip_or_none(self._search_regex(
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
webpage, 'title', default=None) or self._og_search_title(
webpage)).strip()
webpage, default=None))
video_id = self._html_search_regex(
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), display_id)
formats = []
for target in data['targetUrls']:
format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type:
continue
if format_type == 'HLS':
formats.extend(self._extract_m3u8_formats(
format_url, display_id, entry_protocol='m3u8_native',
ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, display_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id=format_type, fatal=False))
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
group='id')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}
class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
'info_dict': {
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'flv',
'title': 'De zwarte weduwe',
'description': 'md5:d90c21dced7db869a85db89a623998d4',
'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$',
'season': '1',
'season_number': 1,
'episode_number': 1,
},
'skip': 'This video is only available for registered users'
}]
_NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CONTEXT_ID = 'R3595707040'
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
auth_data = {
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
}
auth_info = self._gigya_login(auth_data)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
# When requesting a token, no actual token is returned, but the
# necessary cookies are set.
self._request_webpage(
'https://token.vrt.be',
None, note='Requesting a token', errnote='Could not get a token',
headers={
'Content-Type': 'application/json',
'Referer': 'https://www.vrt.be/vrtnu/',
},
data=json.dumps({
'uid': auth_info['UID'],
'uidsig': auth_info['UIDSignature'],
'ts': auth_info['signatureTimestamp'],
'email': auth_info['profile']['email'],
}).encode('utf-8'))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
self.report_warning('Authentication failed')
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
else:
raise e
else:
break
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title').strip()
description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>',
webpage, 'description', default=None)
season = self._html_search_regex(
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
<span>seizoen\ (.+?)</span>\s*
</div>''',
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
webpage, 'season', default=None)
season_number = int_or_none(season)
episode_number = int_or_none(self._html_search_regex(
r'''(?xms)<div\ class="content__episode">\s*
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
</div>''',
webpage, 'episode_number', default=None))
release_date = parse_iso8601(self._html_search_regex(
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
webpage, 'release_date', default=None))
# If there's a ? or a # in the URL, remove them and everything after
clean_url = url.split('?')[0].split('#')[0].strip('/')
securevideo_url = clean_url + '.mssecurevideo.json'
try:
video = self._download_json(securevideo_url, display_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self.raise_login_required()
raise
# We are dealing with a '../<show>.relevant' URL
redirect_url = video.get('url')
if redirect_url:
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
# There is only one entry, but with an unknown key, so just get
# the first one
video_id = list(video.values())[0].get('videoid')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'season': season,
'season_number': season_number,
'episode_number': episode_number,
'release_date': release_date,
}

View File

@@ -31,7 +31,7 @@ class CartoonNetworkIE(TurnerBaseIE):
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
},
}, {
'url': url,

View File

@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
@@ -13,6 +14,7 @@ from ..utils import (
xpath_element,
xpath_with_ns,
find_xpath_attr,
parse_duration,
parse_iso8601,
parse_age_limit,
int_or_none,
@@ -200,6 +202,7 @@ class CBCWatchBaseIE(InfoExtractor):
'media': 'http://search.yahoo.com/mrss/',
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
}
_GEO_COUNTRIES = ['CA']
def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path
@@ -287,6 +290,11 @@ class CBCWatchBaseIE(InfoExtractor):
class CBCWatchVideoIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch:video'
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TEST = {
# geo-restricted to Canada, bypassable
'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
'only_matching': True,
}
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -323,9 +331,10 @@ class CBCWatchIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch'
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
_TESTS = [{
# geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
'info_dict': {
'id': '38e815a-009e3ab12e4',
'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
'ext': 'mp4',
'title': 'Customer (Dis)Service',
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
@@ -337,8 +346,8 @@ class CBCWatchIE(CBCWatchBaseIE):
'skip_download': True,
'format': 'bestvideo',
},
'skip': 'Geo-restricted to Canada',
}, {
# geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
'info_dict': {
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
@@ -346,10 +355,69 @@ class CBCWatchIE(CBCWatchBaseIE):
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
},
'playlist_mincount': 30,
'skip': 'Geo-restricted to Canada',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
rss = self._call_api('web/browse/' + video_id, video_id)
return self._parse_rss_feed(rss)
class CBCOlympicsIE(InfoExtractor):
IE_NAME = 'cbc.ca:olympics'
_VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._hidden_inputs(webpage)['videoId']
video_doc = self._download_xml(
'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
title = xpath_text(video_doc, 'title', fatal=True)
is_live = xpath_text(video_doc, 'kind') == 'Live'
if is_live:
title = self._live_title(title)
formats = []
for video_source in video_doc.findall('videoSources/videoSource'):
uri = xpath_text(video_source, 'uri')
if not uri:
continue
tokenize = self._download_json(
'https://olympics.cbc.ca/api/api-akamai/tokenize',
video_id, data=json.dumps({
'VideoSource': uri,
}).encode(), headers={
'Content-Type': 'application/json',
'Referer': url,
# d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
}, fatal=False)
if not tokenize:
continue
content_url = tokenize['ContentUrl']
video_source_format = video_source.get('format')
if video_source_format == 'IIS':
formats.extend(self._extract_ism_formats(
content_url, video_id, ism_id=video_source_format, fatal=False))
else:
formats.extend(self._extract_m3u8_formats(
content_url, video_id, 'mp4',
'm3u8' if is_live else 'm3u8_native',
m3u8_id=video_source_format, fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': xpath_text(video_doc, 'description'),
'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
'duration': parse_duration(xpath_text(video_doc, 'duration')),
'formats': formats,
'is_live': is_live,
}

View File

@@ -75,10 +75,10 @@ class CBSInteractiveIE(CBSIE):
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
webpage, 'data json')
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or data['videos'][0]
vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
video_id = vdata['mpxRefId']

View File

@@ -91,12 +91,10 @@ class CBSLocalIE(AnvatoIE):
info_dict = self._extract_anvato_videos(webpage, display_id)
time_str = self._html_search_regex(
r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
if time_str:
timestamp = unified_timestamp(time_str)
else:
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
timestamp = unified_timestamp(self._html_search_regex(
r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
'released date', default=None)) or parse_iso8601(
self._html_search_meta('uploadDate', webpage))
info_dict.update({
'display_id': display_id,

View File

@@ -93,7 +93,7 @@ class CCMAIE(InfoExtractor):
'description': clean_html(informacio.get('descripcio')),
'duration': duration,
'timestamp': timestamp,
'thumnails': thumbnails,
'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}

View File

@@ -124,7 +124,7 @@ class CDAIE(InfoExtractor):
}
def extract_format(page, version):
json_str = self._search_regex(
json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
'%s player_json' % version, fatal=False, group='player_data')
if not json_str:

View File

@@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
webpage)
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id

View File

@@ -5,7 +5,7 @@ from ..utils import remove_end
class CharlieRoseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://charlierose.com/videos/27996',
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
@@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
}, {
'url': 'https://charlierose.com/videos/27996',
'only_matching': True,
}, {
'url': 'https://charlierose.com/episodes/30887?autoplay=true',
'only_matching': True,
}]
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'

View File

@@ -1,10 +1,11 @@
from __future__ import unicode_literals
import re
import base64
import json
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_b64decode
from ..utils import (
clean_html,
ExtractorError
@@ -57,7 +58,7 @@ class ChilloutzoneIE(InfoExtractor):
base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data')
decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
video_info_dict = json.loads(decoded_video_info)
# get video information from dict
@@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor):
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None:
youtube_url = self._html_search_regex(
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
webpage, 'fallback video URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, ie='Youtube')
youtube_url = YoutubeIE._extract_url(webpage)
if youtube_url:
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN

View File

@@ -1,10 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import parse_duration
@@ -44,8 +44,7 @@ class ChirbitIE(InfoExtractor):
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look
# for soundURL)
audio_url = base64.b64decode(
data_fd[::-1].encode('ascii')).decode('utf-8')
audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
title = self._search_regex(
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')

View File

@@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
qualities,
)
import re
class ClippitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)'
_TEST = {
'url': 'https://www.clippituser.tv/c/evmgm',
'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09',
'info_dict': {
'id': 'evmgm',
'ext': 'mp4',
'title': 'Bye bye Brutus. #BattleBots - Clippit',
'uploader': 'lizllove',
'uploader_url': 'https://www.clippituser.tv/p/lizllove',
'timestamp': 1472183818,
'upload_date': '20160826',
'description': 'BattleBots | ABC',
'thumbnail': r're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title.*>(.+?)</title>', webpage, 'title')
FORMATS = ('sd', 'hd')
quality = qualities(FORMATS)
formats = []
for format_id in FORMATS:
url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id,
webpage, 'url', fatal=False)
if not url:
continue
match = re.search(r'/(?P<height>\d+)\.mp4', url)
formats.append({
'url': url,
'format_id': format_id,
'quality': quality(format_id),
'height': int(match.group('height')) if match else None,
})
uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n',
webpage, 'uploader', fatal=False)
uploader_url = ('https://www.clippituser.tv/p/' + uploader
if uploader else None)
timestamp = self._html_search_regex(r'datetime="(.+?)"',
webpage, 'date', fatal=False)
thumbnail = self._html_search_regex(r'data-image="(.+?)"',
webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'formats': formats,
'uploader': uploader,
'uploader_url': uploader_url,
'timestamp': parse_iso8601(timestamp),
'description': self._og_search_description(webpage),
'thumbnail': thumbnail,
}

View File

@@ -1,93 +0,0 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
sanitized_Request,
)
class CollegeRamaIE(InfoExtractor):
_VALID_URL = r'https?://collegerama\.tudelft\.nl/Mediasite/Play/(?P<id>[\da-f]+)'
_TESTS = [
{
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
'md5': '481fda1c11f67588c0d9d8fbdced4e39',
'info_dict': {
'id': '585a43626e544bdd97aeb71a0ec907a01d',
'ext': 'mp4',
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
'description': '',
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
'duration': 7713.088,
'timestamp': 1413309600,
'upload_date': '20141014',
},
},
{
'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
'md5': 'ef1fdded95bdf19b12c5999949419c92',
'info_dict': {
'id': '86a9ea9f53e149079fbdb4202b521ed21d',
'ext': 'wmv',
'title': '64ste Vakantiecursus: Afvalwater',
'description': 'md5:7fd774865cc69d972f542b157c328305',
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
'duration': 10853,
'timestamp': 1326446400,
'upload_date': '20120113',
},
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
player_options_request = {
'getPlayerOptionsRequest': {
'ResourceId': video_id,
'QueryString': '',
}
}
request = sanitized_Request(
'http://collegerama.tudelft.nl/Mediasite/PlayerService/PlayerService.svc/json/GetPlayerOptions',
json.dumps(player_options_request))
request.add_header('Content-Type', 'application/json')
player_options = self._download_json(request, video_id)
presentation = player_options['d']['Presentation']
title = presentation['Title']
description = presentation.get('Description')
thumbnail = None
duration = float_or_none(presentation.get('Duration'), 1000)
timestamp = int_or_none(presentation.get('UnixTime'), 1000)
formats = []
for stream in presentation['Streams']:
for video in stream['VideoUrls']:
thumbnail_url = stream.get('ThumbnailUrl')
if thumbnail_url:
thumbnail = 'http://collegerama.tudelft.nl' + thumbnail_url
format_id = video['MediaType']
if format_id == 'SS':
continue
formats.append({
'url': video['Location'],
'format_id': format_id,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
}

View File

@@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
class ComedyCentralShortnameIE(InfoExtractor):
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
_TESTS = [{
'url': ':tds',
'only_matching': True,
}, {
'url': ':thedailyshow',
'only_matching': True,
}, {
'url': ':theopposition',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
shortcut_map = {
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
}
return self.url_result(shortcut_map[video_id])

View File

@@ -27,8 +27,12 @@ from ..compat import (
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
from ..downloader.f4m import (
get_base_url,
remove_encrypted_media,
)
from ..downloader.f4m import remove_encrypted_media
from ..utils import (
NO_DEFAULT,
age_restricted,
@@ -170,6 +174,8 @@ class InfoExtractor(object):
width : height ratio as float.
* no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean.
* downloader_options A dictionary of downloader options as
described in FileDownloader
url: Final video URL.
ext: Video filename extension.
@@ -297,8 +303,9 @@ class InfoExtractor(object):
There must be a key "entries", which is a list, an iterable, or a PagedList
object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title", "description" and "id" attributes
with the same semantics as videos (see above).
Additionally, playlists can have "id", "title", "description", "uploader",
"uploader_id", "uploader_url" attributes with the same semantics as videos
(see above).
_type "multi_video" indicates that there are multiple videos that
@@ -490,6 +497,16 @@ class InfoExtractor(object):
self.to_screen('%s' % (note,))
else:
self.to_screen('%s: %s' % (video_id, note))
# Some sites check X-Forwarded-For HTTP header in order to figure out
# the origin of the client behind proxy. This allows bypassing geo
# restriction by faking this header's value to IP that belongs to some
# geo unrestricted country. We will do so once we encounter any
# geo restriction error.
if self._x_forwarded_for_ip:
if 'X-Forwarded-For' not in headers:
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
if isinstance(url_or_request, compat_urllib_request.Request):
url_or_request = update_Request(
url_or_request, data=data, headers=headers, query=query)
@@ -519,15 +536,6 @@ class InfoExtractor(object):
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
# Some sites check X-Forwarded-For HTTP header in order to figure out
# the origin of the client behind proxy. This allows bypassing geo
# restriction by faking this header's value to IP that belongs to some
# geo unrestricted country. We will do so once we encounter any
# geo restriction error.
if self._x_forwarded_for_ip:
if 'X-Forwarded-For' not in headers:
headers['X-Forwarded-For'] = self._x_forwarded_for_ip
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
if urlh is False:
assert not fatal
@@ -588,19 +596,11 @@ class InfoExtractor(object):
if not encoding:
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self._downloader.params.get('dump_intermediate_pages', False):
try:
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
self.to_screen('Dumping request to ' + url)
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
if self._downloader.params.get('write_pages', False):
try:
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
basen = '%s_%s' % (video_id, url)
basen = '%s_%s' % (video_id, urlh.geturl())
if len(basen) > 240:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
basen = basen[:240 - len(h)] + h
@@ -646,15 +646,29 @@ class InfoExtractor(object):
def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)
if xml_string is False:
return xml_string
return self._parse_xml(
xml_string, video_id, transform_source=transform_source,
fatal=fatal)
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source:
xml_string = transform_source(xml_string)
return compat_etree_fromstring(xml_string.encode('utf-8'))
try:
return compat_etree_fromstring(xml_string.encode('utf-8'))
except compat_xml_parse_error as ve:
errmsg = '%s: Failed to parse XML ' % video_id
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
self.report_warning(errmsg + str(ve))
def _download_json(self, url_or_request, video_id,
note='Downloading JSON metadata',
@@ -940,7 +954,8 @@ class InfoExtractor(object):
def _family_friendly_search(self, html):
# See http://schema.org/VideoObject
family_friendly = self._html_search_meta('isFamilyFriendly', html)
family_friendly = self._html_search_meta(
'isFamilyFriendly', html, default=None)
if not family_friendly:
return None
@@ -1014,7 +1029,7 @@ class InfoExtractor(object):
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Article':
elif item_type in ('Article', 'NewsArticle'):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')),
@@ -1223,11 +1238,8 @@ class InfoExtractor(object):
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
base_url = xpath_text(
manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
'base URL', default=None)
if base_url:
base_url = base_url.strip()
manifest_base_url = get_base_url(manifest)
bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1259,7 +1271,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@@ -1294,6 +1306,7 @@ class InfoExtractor(object):
'url': manifest_url,
'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
'protocol': 'f4m',
'tbr': tbr,
'width': width,
'height': height,
@@ -1339,6 +1352,9 @@ class InfoExtractor(object):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return []
if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
return []
formats = []
format_url = lambda u: (
@@ -1385,7 +1401,7 @@ class InfoExtractor(object):
media_url = media.get('URI')
if media_url:
format_id = []
for v in (group_id, name):
for v in (m3u8_id, group_id, name):
if v:
format_id.append(v)
f = {
@@ -1785,7 +1801,7 @@ class InfoExtractor(object):
ms_info['timescale'] = int(timescale)
segment_duration = source.get('duration')
if segment_duration:
ms_info['segment_duration'] = int(segment_duration)
ms_info['segment_duration'] = float(segment_duration)
def extract_Initialization(source):
initialization = source.find(_add_ns('Initialization'))
@@ -1866,6 +1882,7 @@ class InfoExtractor(object):
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
'container': mimetype2ext(mime_type) + '_dash',
}
f.update(parse_codecs(representation_attrib.get('codecs')))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
@@ -1904,7 +1921,7 @@ class InfoExtractor(object):
# can't be used at the same time
if '%(Number' in media_template and 's' not in representation_ms_info:
segment_duration = None
if 'total_number' not in representation_ms_info and 'segment_duration':
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
@@ -1963,6 +1980,22 @@ class InfoExtractor(object):
})
segment_index += 1
representation_ms_info['fragments'] = fragments
elif 'segment_urls' in representation_ms_info:
# Segment URLs with no SegmentTimeline
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
# https://github.com/rg3/youtube-dl/pull/14844
fragments = []
segment_duration = float_or_none(
representation_ms_info['segment_duration'],
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
for segment_url in representation_ms_info['segment_urls']:
fragment = {
location_key(segment_url): segment_url,
}
if segment_duration:
fragment['duration'] = segment_duration
fragments.append(fragment)
representation_ms_info['fragments'] = fragments
# NB: MPD manifest may contain direct URLs to unfragmented media.
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
@@ -1977,16 +2010,14 @@ class InfoExtractor(object):
f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == representation_id)
except StopIteration:
full_info = formats_dict.get(representation_id, {}).copy()
full_info.update(f)
formats.append(full_info)
else:
existing_format.update(f)
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
# is not necessarily unique within a Period thus formats with
# the same `format_id` are quite possible. There are numerous examples
# of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
# https://github.com/rg3/youtube-dl/issues/13919)
full_info = formats_dict.get(representation_id, {}).copy()
full_info.update(f)
formats.append(full_info)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
@@ -2026,7 +2057,7 @@ class InfoExtractor(object):
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
for track in stream.findall('QualityLevel'):
fourcc = track.get('FourCC')
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
# TODO: add support for WVC1 and WMAP
if fourcc not in ('H264', 'AVC1', 'AACL'):
self.report_warning('%s is not a supported codec' % fourcc)
@@ -2114,19 +2145,19 @@ class InfoExtractor(object):
return f
return {}
def _media_formats(src, cur_media_type):
def _media_formats(src, cur_media_type, type_info={}):
full_url = absolute_url(src)
ext = determine_ext(full_url)
ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
preference=preference)
preference=preference, fatal=False)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
full_url, video_id, mpd_id=mpd_id)
full_url, video_id, mpd_id=mpd_id, fatal=False)
else:
is_plain_url = True
formats = [{
@@ -2165,9 +2196,15 @@ class InfoExtractor(object):
src = source_attributes.get('src')
if not src:
continue
is_plain_url, formats = _media_formats(src, media_type)
f = parse_content_type(source_attributes.get('type'))
is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url:
f = parse_content_type(source_attributes.get('type'))
# res attribute is not standard but seen several times
# in the wild
f.update({
'height': int_or_none(source_attributes.get('res')),
'format_id': source_attributes.get('label'),
})
f.update(formats[0])
media_info['formats'].append(f)
else:
@@ -2211,27 +2248,36 @@ class InfoExtractor(object):
return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
url_base = self._search_regex(
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
http_base_url = '%s:%s' % ('http', url_base)
mobj = re.search(
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
url_base = mobj.group('url')
http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
formats = []
def manifest_url(manifest):
m_url = '%s/%s' % (http_base_url, manifest)
if query:
m_url += '?%s' % query
return m_url
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
http_base_url + '/playlist.m3u8', video_id, 'mp4',
manifest_url('playlist.m3u8'), video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
if 'f4m' not in skip_protocols:
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
manifest_url('manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'dash' not in skip_protocols:
formats.extend(self._extract_mpd_formats(
http_base_url + '/manifest.mpd',
manifest_url('manifest.mpd'),
video_id, mpd_id='dash', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
http_base_url + '/jwplayer.smil',
manifest_url('jwplayer.smil'),
video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()
@@ -2300,7 +2346,6 @@ class InfoExtractor(object):
formats = self._parse_jwplayer_formats(
video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
self._sort_formats(formats)
subtitles = {}
tracks = video_data.get('tracks')
@@ -2308,7 +2353,10 @@ class InfoExtractor(object):
for track in tracks:
if not isinstance(track, dict):
continue
if track.get('kind') != 'captions':
track_kind = track.get('kind')
if not track_kind or not isinstance(track_kind, compat_str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
track_url = urljoin(base_url, track.get('file'))
if not track_url:
@@ -2317,16 +2365,25 @@ class InfoExtractor(object):
'url': self._proto_relative_url(track_url)
})
entries.append({
entry = {
'id': this_video_id,
'title': video_data['title'] if require_title else video_data.get('title'),
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'formats': formats,
})
}
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
entry.update({
'_type': 'url_transparent',
'url': formats[0]['url'],
})
else:
self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
if len(entries) == 1:
return entries[0]
else:
@@ -2353,7 +2410,7 @@ class InfoExtractor(object):
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=m3u8_id, fatal=False))
elif ext == 'mpd':
elif source_type == 'dash' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
source_url, video_id, mpd_id=mpd_id, fatal=False))
elif ext == 'smil':
@@ -2427,10 +2484,12 @@ class InfoExtractor(object):
self._downloader.report_warning(msg)
return res
def _set_cookie(self, domain, name, value, expire_time=None):
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs):
cookie = compat_cookiejar.Cookie(
0, name, value, None, None, domain, None,
None, '/', True, False, expire_time, '', None, None, None)
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
self._downloader.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):

View File

@@ -116,16 +116,16 @@ class CondeNastIE(InfoExtractor):
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title)
def _extract_video_params(self, webpage):
query = {}
params = self._search_regex(
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
if params:
query.update({
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
})
def _extract_video_params(self, webpage, display_id):
query = self._parse_json(
self._search_regex(
r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
default='{}'),
display_id, transform_source=js_to_json, fatal=False)
if query:
query['videoId'] = self._search_regex(
r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
webpage, 'video id', default=None)
else:
params = extract_attributes(self._search_regex(
r'(<[^>]+data-js="video-player"[^>]+>)',
@@ -141,17 +141,27 @@ class CondeNastIE(InfoExtractor):
video_id = params['videoId']
video_info = None
if params.get('playerId'):
info_page = self._download_json(
'http://player.cnevids.com/player/video.js',
video_id, 'Downloading video info', fatal=False, query=params)
if info_page:
video_info = info_page.get('video')
if not video_info:
info_page = self._download_webpage(
'http://player.cnevids.com/player/loader.js',
video_id, 'Downloading loader info', query=params)
else:
# New API path
query = params.copy()
query['embedType'] = 'inline'
info_page = self._download_json(
'http://player.cnevids.com/embed-api.json', video_id,
'Downloading embed info', fatal=False, query=query)
# Old fallbacks
if not info_page:
if params.get('playerId'):
info_page = self._download_json(
'http://player.cnevids.com/player/video.js', video_id,
'Downloading video info', fatal=False, query=params)
if info_page:
video_info = info_page.get('video')
if not video_info:
info_page = self._download_webpage(
'http://player.cnevids.com/player/loader.js',
video_id, 'Downloading loader info', query=params)
if not video_info:
info_page = self._download_webpage(
'https://player.cnevids.com/inline/video/%s.js' % video_id,
video_id, 'Downloading inline info', query={
@@ -215,7 +225,7 @@ class CondeNastIE(InfoExtractor):
if url_type == 'series':
return self._extract_series(url, webpage)
else:
params = self._extract_video_params(webpage)
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
info.update(self._extract_video(params))

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
parse_iso8601,
str_to_int,
@@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
youtube_url = self._search_regex(
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
webpage, 'youtube url', default=None)
youtube_url = YoutubeIE._extract_url(webpage)
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
video_url = self._html_search_regex(
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],

View File

@@ -3,13 +3,13 @@ from __future__ import unicode_literals
import re
import json
import base64
import zlib
from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_etree_fromstring,
compat_urllib_parse_urlencode,
compat_urllib_request,
@@ -38,11 +38,32 @@ class CrunchyrollBaseIE(InfoExtractor):
_LOGIN_FORM = 'login_form'
_NETRC_MACHINE = 'crunchyroll'
def _call_rpc_api(self, method, video_id, note=None, data=None):
data = data or {}
data['req'] = 'RpcApi' + method
data = compat_urllib_parse_urlencode(data).encode('utf-8')
return self._download_xml(
'http://www.crunchyroll.com/xml/',
video_id, note, fatal=False, data=data, headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
self._download_webpage(
'https://www.crunchyroll.com/?a=formhandler',
None, 'Logging in', 'Wrong login info',
data=urlencode_postdata({
'formname': 'RpcApiUser_Login',
'next_url': 'https://www.crunchyroll.com/acct/membership',
'name': username,
'password': password,
}))
'''
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -86,6 +107,7 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
'''
def _real_initialize(self):
self._login()
@@ -250,8 +272,8 @@ class CrunchyrollIE(CrunchyrollBaseIE):
}
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
data = bytes_to_intlist(compat_b64decode(data))
iv = bytes_to_intlist(compat_b64decode(iv))
id = int(id)
def obfuscate_key_aux(count, modulo, start):
@@ -365,15 +387,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
sub_doc = self._call_rpc_api(
'Subtitle_GetXml', video_id,
'Downloading subtitles for ' + sub_name, data={
'subtitle_script_id': sub_id,
})
if sub_doc is None:
continue
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
data = xpath_text(sub_doc, 'data', 'subtitle data')
if not sid or not iv or not data:
continue
subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
@@ -444,65 +470,79 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for fmt in available_fmts:
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = sanitized_Request(
'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
% (video_id, stream_format, stream_quality),
compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata = self._download_xml(
streamdata_req, video_id,
note='Downloading media info for %s' % video_format)
stream_info = streamdata.find('./{default}preload/stream_info')
video_encode_id = xpath_text(stream_info, './video_encode_id')
if video_encode_id in video_encode_ids:
continue
video_encode_ids.append(video_encode_id)
stream_infos = []
streamdata = self._call_rpc_api(
'VideoPlayer_GetStandardConfig', video_id,
'Downloading media info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_quality': stream_quality,
'current_page': url,
})
if streamdata is not None:
stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info is not None:
stream_infos.append(stream_info)
stream_info = self._call_rpc_api(
'VideoEncode_GetStreamInfo', video_id,
'Downloading stream info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
if stream_info is not None:
stream_infos.append(stream_info)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id')
if video_encode_id in video_encode_ids:
continue
video_encode_ids.append(video_encode_id)
video_file = xpath_text(stream_info, './file')
if not video_file:
continue
if video_file.startswith('http'):
formats.extend(self._extract_m3u8_formats(
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
video_url = xpath_text(stream_info, './host')
if not video_url:
continue
metadata = stream_info.find('./metadata')
format_info = {
'format': video_format,
'format_id': video_format,
'height': int_or_none(xpath_text(metadata, './height')),
'width': int_or_none(xpath_text(metadata, './width')),
}
if '.fplive.net/' in video_url:
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
parsed_video_url = compat_urlparse.urlparse(video_url)
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
netloc='v.lvlt.crcdn.net',
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
if self._is_valid_url(direct_video_url, video_id, video_format):
format_info.update({
'url': direct_video_url,
})
formats.append(format_info)
video_file = xpath_text(stream_info, './file')
if not video_file:
continue
if video_file.startswith('http'):
formats.extend(self._extract_m3u8_formats(
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
format_info.update({
'url': video_url,
'play_path': video_file,
'ext': 'flv',
})
formats.append(format_info)
self._sort_formats(formats)
video_url = xpath_text(stream_info, './host')
if not video_url:
continue
metadata = stream_info.find('./metadata')
format_info = {
'format': video_format,
'height': int_or_none(xpath_text(metadata, './height')),
'width': int_or_none(xpath_text(metadata, './width')),
}
metadata = self._download_xml(
'http://www.crunchyroll.com/xml', video_id,
note='Downloading media info', query={
'req': 'RpcApiVideoPlayer_GetMediaMetadata',
if '.fplive.net/' in video_url:
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
parsed_video_url = compat_urlparse.urlparse(video_url)
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
netloc='v.lvlt.crcdn.net',
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
if self._is_valid_url(direct_video_url, video_id, video_format):
format_info.update({
'format_id': 'http-' + video_format,
'url': direct_video_url,
})
formats.append(format_info)
continue
format_info.update({
'format_id': 'rtmp-' + video_format,
'url': video_url,
'play_path': video_file,
'ext': 'flv',
})
formats.append(format_info)
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
metadata = self._call_rpc_api(
'VideoPlayer_GetMediaMetadata', video_id,
note='Downloading media info', data={
'media_id': video_id,
})

View File

@@ -4,13 +4,14 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unescapeHTML,
find_xpath_attr,
smuggle_url,
determine_ext,
ExtractorError,
extract_attributes,
find_xpath_attr,
get_element_by_class,
int_or_none,
smuggle_url,
unescapeHTML,
)
from .senateisvp import SenateISVPIE
from .ustream import UstreamIE
@@ -68,6 +69,10 @@ class CSpanIE(InfoExtractor):
'uploader': 'HouseCommittee',
'uploader_id': '12987475',
},
}, {
# Audio Only
'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
@@ -111,7 +116,15 @@ class CSpanIE(InfoExtractor):
title = self._og_search_title(webpage)
surl = smuggle_url(senate_isvp_url, {'force_title': title})
return self.url_result(surl, 'SenateISVP', video_id, title)
video_id = self._search_regex(
r'jwsetup\.clipprog\s*=\s*(\d+);',
webpage, 'jwsetup program id', default=None)
if video_id:
video_type = 'program'
if video_type is None or video_id is None:
error_message = get_element_by_class('VLplayer-error-message', webpage)
if error_message:
raise ExtractorError(error_message)
raise ExtractorError('unable to find video id and type')
def get_text_attr(d, attr):
@@ -138,7 +151,7 @@ class CSpanIE(InfoExtractor):
entries = []
for partnum, f in enumerate(files):
formats = []
for quality in f['qualities']:
for quality in f.get('qualities', []):
formats.append({
'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
'url': unescapeHTML(get_text_attr(quality, 'file')),

View File

@@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
# vevo embed
vevo_id = self._search_regex(
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
webpage, 'vevo embed', default=None)
if vevo_id:
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
@@ -325,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_TESTS = [{
@@ -413,52 +413,3 @@ class DailymotionUserIE(DailymotionPlaylistIE):
'title': full_user,
'entries': self._extract_entries(user),
}
class DailymotionCloudIE(DailymotionBaseInfoExtractor):
_VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/'
_VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
_VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
_TESTS = [{
# From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
# Tested at FranceTvInfo_2
'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
'only_matching': True,
}, {
# http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html
'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1',
'only_matching': True,
}]
@classmethod
def _extract_dmcloud_url(cls, webpage):
mobj = re.search(r'<iframe[^>]+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage)
if mobj:
return mobj.group(1)
mobj = re.search(
r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL,
webpage)
if mobj:
return mobj.group(1)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage_no_ff(url, video_id)
title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
video_info = self._parse_json(self._search_regex(
r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
# TODO: parse ios_url, which is in fact a manifest
video_url = video_info['mp4_url']
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': video_info.get('thumbnail_url'),
}

View File

@@ -10,36 +10,34 @@ from ..aes import (
aes_cbc_decrypt,
aes_cbc_encrypt,
)
from ..compat import compat_b64decode
from ..utils import (
bytes_to_intlist,
bytes_to_long,
clean_html,
extract_attributes,
ExtractorError,
intlist_to_bytes,
get_element_by_id,
js_to_json,
int_or_none,
long_to_bytes,
pkcs1pad,
remove_end,
)
class DaisukiIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?daisuki\.net/[^/]+/[^/]+/[^/]+/watch\.[^.]+\.(?P<id>\d+)\.html'
class DaisukiMottoIE(InfoExtractor):
_VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
_TEST = {
'url': 'http://www.daisuki.net/tw/en/anime/watch.TheIdolMasterCG.11213.html',
'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
'info_dict': {
'id': '11213',
'id': 'V2e',
'ext': 'mp4',
'title': '#01 Who is in the pumpkin carriage? - THE IDOLM@STER CINDERELLA GIRLS',
'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
'subtitles': {
'mul': [{
'ext': 'ttml',
}],
},
'creator': 'BANDAI NAMCO Entertainment',
},
'params': {
'skip_download': True, # AES-encrypted HLS stream
@@ -73,15 +71,17 @@ class DaisukiIE(InfoExtractor):
n, e = self._RSA_KEY
encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
init_data = self._download_json('http://www.daisuki.net/bin/bgn/init', video_id, query={
's': flashvars.get('s', ''),
'c': flashvars.get('ss3_prm', ''),
'e': url,
'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
bytes_to_intlist(json.dumps(data)),
aes_key, iv))).decode('ascii'),
'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
}, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
init_data = self._download_json(
'http://motto.daisuki.net/fastAPI/bgn/init/',
video_id, query={
's': flashvars.get('s', ''),
'c': flashvars.get('ss3_prm', ''),
'e': url,
'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
bytes_to_intlist(json.dumps(data)),
aes_key, iv))).decode('ascii'),
'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
}, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
if 'rtn' in init_data:
encrypted_rtn = init_data['rtn']
@@ -94,18 +94,15 @@ class DaisukiIE(InfoExtractor):
rtn = self._parse_json(
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
base64.b64decode(encrypted_rtn)),
compat_b64decode(encrypted_rtn)),
aes_key, iv)).decode('utf-8').rstrip('\0'),
video_id)
title = rtn['title_str']
formats = self._extract_m3u8_formats(
rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
title = remove_end(self._og_search_title(webpage), ' - DAISUKI')
creator = self._html_search_regex(
r'Creator\s*:\s*([^<]+)', webpage, 'creator', fatal=False)
subtitles = {}
caption_url = rtn.get('caption_url')
if caption_url:
@@ -120,21 +117,18 @@ class DaisukiIE(InfoExtractor):
'title': title,
'formats': formats,
'subtitles': subtitles,
'creator': creator,
}
class DaisukiPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)daisuki\.net/[^/]+/[^/]+/[^/]+/detail\.(?P<id>[a-zA-Z0-9]+)\.html'
class DaisukiMottoPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
_TEST = {
'url': 'http://www.daisuki.net/tw/en/anime/detail.TheIdolMasterCG.html',
'url': 'http://motto.daisuki.net/information/',
'info_dict': {
'id': 'TheIdolMasterCG',
'title': 'THE IDOLM@STER CINDERELLA GIRLS',
'description': 'md5:0f2c028a9339f7a2c7fbf839edc5c5d8',
'title': 'DRAGON BALL SUPER',
},
'playlist_count': 26,
'playlist_mincount': 117,
}
def _real_extract(self, url):
@@ -142,18 +136,19 @@ class DaisukiPlaylistIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
episode_pattern = r'''(?sx)
<img[^>]+delay="[^"]+/(\d+)/movie\.jpg".+?
<p[^>]+class=".*?\bepisodeNumber\b.*?">(?:<a[^>]+>)?([^<]+)'''
entries = [{
'_type': 'url_transparent',
'url': url.replace('detail', 'watch').replace('.html', '.' + movie_id + '.html'),
'episode_id': episode_id,
'episode_number': int_or_none(episode_id),
} for movie_id, episode_id in re.findall(episode_pattern, webpage)]
entries = []
for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
attr = extract_attributes(li)
ad_id = attr.get('data-ad_id')
product_id = attr.get('data-product_id')
if ad_id and product_id:
episode_id = attr.get('data-chapter')
entries.append({
'_type': 'url_transparent',
'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
'episode_id': episode_id,
'episode_number': int_or_none(episode_id),
'ie_key': 'DaisukiMotto',
})
playlist_title = remove_end(
self._og_search_title(webpage, fatal=False), ' - Anime - DAISUKI')
playlist_description = clean_html(get_element_by_id('synopsisTxt', webpage))
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')

View File

@@ -2,53 +2,85 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import unified_strdate
from ..compat import compat_str
from ..utils import (
float_or_none,
unified_strdate,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'mp4',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
object_id = self._html_search_meta('DC.identifier', webpage)
webpage = self._download_webpage(url, display_id)
servers_json = self._download_json(
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
video_id, note='Downloading server list')
server = servers_json[0]['server']
m3u8_path = self._search_regex(
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
formats = self._extract_m3u8_formats(
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
entry_protocol='m3u8_native')
video_id = self._html_search_meta(
'DC.identifier', webpage, 'video id',
default=None) or self._search_regex(
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
title = self._og_search_title(webpage)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
note='Downloading server list', fatal=False)
if servers:
endpoint = next(
server['endpoint']
for server in servers
if isinstance(server.get('endpoint'), compat_str) and
'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
app = self._search_regex(
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
formats = [{
'url': endpoint,
'app': app,
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
'ext': 'flv',
}]
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
duration = float_or_none(self._search_regex(
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
default=None), scale=1000)
return {
'id': object_id,
'id': video_id,
'title': title,
'formats': formats,
'display_id': video_id,
'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
'id': '176747451',
'title': 'Best!',
'uploader': 'Anonymous',
'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
},
'playlist_count': 30,
'skip': 'Only available in .de',

View File

@@ -0,0 +1,56 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import js_to_json
class DiggIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
_TESTS = [{
# JWPlatform via provider
'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
'info_dict': {
'id': 'LcqvmS0b',
'ext': 'mp4',
'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
'upload_date': '20180109',
'timestamp': 1515530551,
},
'params': {
'skip_download': True,
},
}, {
# Youtube via provider
'url': 'http://digg.com/video/dog-boat-seal-play',
'only_matching': True,
}, {
# vimeo as regular embed
'url': 'http://digg.com/video/dream-girl-short-film',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info = self._parse_json(
self._search_regex(
r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
default='{}'), display_id, transform_source=js_to_json,
fatal=False)
video_id = info.get('video_id')
if video_id:
provider = info.get('provider_name')
if provider == 'youtube':
return self.url_result(
video_id, ie='Youtube', video_id=video_id)
elif provider == 'jwplayer':
return self.url_result(
'jwplatform:%s' % video_id, ie='JWPlatform',
video_id=video_id)
return self.url_result(url, 'Generic')

View File

@@ -1,15 +1,20 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
)
import random
import re
import string
from .discoverygo import DiscoveryGoBaseIE
from ..compat import compat_str
from ..utils import (
ExtractorError,
try_get,
)
from ..compat import compat_HTTPError
class DiscoveryIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:
class DiscoveryIE(DiscoveryGoBaseIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
discovery|
investigationdiscovery|
discoverylife|
@@ -19,79 +24,64 @@ class DiscoveryIE(InfoExtractor):
sciencechannel|
tlc|
velocity
)\.com/(?:[^/]+/)*(?P<id>[^./?#]+)'''
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
_TESTS = [{
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
'info_dict': {
'id': '20769',
'id': '5a2d9b4d6b66d17a5026e1fd',
'ext': 'mp4',
'title': 'Mission Impossible Outtakes',
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
' each other -- to the point of confusing Jamie\'s dog -- and '
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
' back.'),
'duration': 156,
'timestamp': 1302032462,
'upload_date': '20110405',
'uploader_id': '103207',
'title': 'Dave Foley',
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
'duration': 608,
},
'params': {
'skip_download': True, # requires ffmpeg
}
}, {
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons',
'info_dict': {
'id': 'mythbusters-the-simpsons',
'title': 'MythBusters: The Simpsons',
},
'playlist_mincount': 10,
}, {
'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',
'info_dict': {
'id': '78326',
'ext': 'mp4',
'title': 'Longfin Eels: Maneaters?',
'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.',
'upload_date': '20140725',
'timestamp': 1406246400,
'duration': 116,
'uploader_id': '103207',
},
'params': {
'skip_download': True, # requires ffmpeg
}
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
'only_matching': True,
}]
_GEO_COUNTRIES = ['US']
_GEO_BYPASS = False
def _real_extract(self, url):
display_id = self._match_id(url)
info = self._download_json(url + '?flat=1', display_id)
site, path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
video_title = info.get('playlist_title') or info.get('video_title')
react_data = self._parse_json(self._search_regex(
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
webpage, 'react data'), display_id)
content_blocks = react_data['layout'][path]['contentBlocks']
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
video_id = video['id']
entries = []
access_token = self._download_json(
'https://www.%s.com/anonymous' % site, display_id, query={
'authRel': 'authorization',
'client_id': try_get(
react_data, lambda x: x['application']['apiClientId'],
compat_str) or '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
})['access_token']
for idx, video_info in enumerate(info['playlist']):
subtitles = {}
caption_url = video_info.get('captionsUrl')
if caption_url:
subtitles = {
'en': [{
'url': caption_url,
}]
}
try:
stream = self._download_json(
'https://api.discovery.com/v1/streaming/video/' + video_id,
display_id, headers={
'Authorization': 'Bearer ' + access_token,
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
e_description = self._parse_json(
e.cause.read().decode(), display_id)['description']
if 'resource not available for country' in e_description:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
if 'Authorized Networks' in e_description:
raise ExtractorError(
'This video is only available via cable service provider subscription that'
' is not currently supported. You may want to use --cookies.', expected=True)
raise ExtractorError(e_description)
raise
entries.append({
'_type': 'url_transparent',
'url': 'http://players.brightcove.net/103207/default_default/index.html?videoId=ref:%s' % video_info['referenceId'],
'id': compat_str(video_info['id']),
'title': video_info['title'],
'description': video_info.get('description'),
'duration': parse_duration(video_info.get('video_length')),
'webpage_url': video_info.get('href') or video_info.get('url'),
'thumbnail': video_info.get('thumbnailURL'),
'alt_title': video_info.get('secondary_title'),
'timestamp': parse_iso8601(video_info.get('publishedDate')),
'subtitles': subtitles,
})
return self.playlist_result(entries, display_id, video_title)
return self._extract_video_info(video, stream, display_id)

View File

@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
int_or_none,
@@ -27,42 +28,9 @@ class DiscoveryGoBaseIE(InfoExtractor):
velocitychannel
)go\.com/%s(?P<id>[^/?#&]+)'''
class DiscoveryGoIE(DiscoveryGoBaseIE):
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
_GEO_COUNTRIES = ['US']
_TEST = {
'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
'info_dict': {
'id': '58c167d86b66d12f2addeb01',
'ext': 'mp4',
'title': 'Reaper Madness',
'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
'duration': 2519,
'series': 'Bering Sea Gold',
'season_number': 8,
'episode_number': 6,
'age_limit': 14,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
container = extract_attributes(
self._search_regex(
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
webpage, 'video container'))
video = self._parse_json(
container.get('data-video') or container.get('data-json'),
display_id)
def _extract_video_info(self, video, stream, display_id):
title = video['name']
stream = video.get('stream')
if not stream:
if video.get('authenticated') is True:
raise ExtractorError(
@@ -106,7 +74,11 @@ class DiscoveryGoIE(DiscoveryGoBaseIE):
not subtitle_url.startswith('http')):
continue
lang = caption.get('fileLang', 'en')
subtitles.setdefault(lang, []).append({'url': subtitle_url})
ext = determine_ext(subtitle_url)
subtitles.setdefault(lang, []).append({
'url': subtitle_url,
'ext': 'ttml' if ext == 'xml' else ext,
})
return {
'id': video_id,
@@ -124,6 +96,43 @@ class DiscoveryGoIE(DiscoveryGoBaseIE):
}
class DiscoveryGoIE(DiscoveryGoBaseIE):
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
_GEO_COUNTRIES = ['US']
_TEST = {
'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
'info_dict': {
'id': '58c167d86b66d12f2addeb01',
'ext': 'mp4',
'title': 'Reaper Madness',
'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
'duration': 2519,
'series': 'Bering Sea Gold',
'season_number': 8,
'episode_number': 6,
'age_limit': 14,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
container = extract_attributes(
self._search_regex(
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
webpage, 'video container'))
video = self._parse_json(
container.get('data-video') or container.get('data-json'),
display_id)
stream = video.get('stream')
return self._extract_video_info(video, stream, display_id)
class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
_TEST = {

View File

@@ -10,6 +10,7 @@ from ..utils import (
compat_str,
determine_ext,
ExtractorError,
update_url_query,
)
@@ -108,9 +109,16 @@ class DisneyIE(InfoExtractor):
continue
tbr = int_or_none(flavor.get('bitrate'))
if tbr == 99999:
formats.extend(self._extract_m3u8_formats(
# wrong ks(Kaltura Signature) causes 404 Error
flavor_url = update_url_query(flavor_url, {'ks': ''})
m3u8_formats = self._extract_m3u8_formats(
flavor_url, video_id, 'mp4',
m3u8_id=flavor_format, fatal=False))
m3u8_id=flavor_format, fatal=False)
for f in m3u8_formats:
# Apple FairPlay
if '/fpshls/' in f['url']:
continue
formats.append(f)
continue
format_id = []
if flavor_format:

View File

@@ -12,25 +12,28 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
remove_end,
try_get,
unified_strdate,
unified_timestamp,
update_url_query,
USER_AGENTS,
)
class DPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
_TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
'info_dict': {
'id': '3172',
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
'ext': 'mp4',
'title': 'Svensken lär sig njuta av livet',
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
@@ -48,7 +51,7 @@ class DPlayIE(InfoExtractor):
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
'info_dict': {
'id': '70816',
'display_id': 'season-6-episode-12',
'display_id': 'mig-og-min-mor/season-6-episode-12',
'ext': 'mp4',
'title': 'Episode 12',
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
@@ -65,6 +68,33 @@ class DPlayIE(InfoExtractor):
# geo restricted, via direct unsigned hls URL
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
'only_matching': True,
}, {
# disco-api
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
'info_dict': {
'id': '40206',
'display_id': 'i-kongens-klr/sesong-1-episode-7',
'ext': 'mp4',
'title': 'Episode 7',
'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
'duration': 2611.16,
'timestamp': 1516726800,
'upload_date': '20180123',
'series': 'I kongens klær',
'season_number': 1,
'episode_number': 7,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
'only_matching': True,
}, {
'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -72,10 +102,81 @@ class DPlayIE(InfoExtractor):
display_id = mobj.group('id')
domain = mobj.group('domain')
self._initialize_geo_bypass([mobj.group('country').upper()])
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'video id')
r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
if not video_id:
host = mobj.group('host')
disco_base = 'https://disco-api.%s' % host
self._download_json(
'%s/token' % disco_base, display_id, 'Downloading token',
query={
'realm': host.replace('.', ''),
})
video = self._download_json(
'%s/content/videos/%s' % (disco_base, display_id), display_id,
headers={
'Referer': url,
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
}, query={
'include': 'show'
})
video_id = video['data']['id']
info = video['data']['attributes']
title = info['name']
formats = []
for format_id, format_dict in self._download_json(
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
display_id)['data']['attributes']['streaming'].items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url:
continue
ext = determine_ext(format_url)
if format_id == 'dash' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id='dash', fatal=False))
elif format_id == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, display_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
series = None
try:
included = video.get('included')
if isinstance(included, list):
show = next(e for e in included if e.get('type') == 'show')
series = try_get(
show, lambda x: x['attributes']['name'], compat_str)
except StopIteration:
pass
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': info.get('description'),
'duration': float_or_none(
info.get('videoDuration'), scale=1000),
'timestamp': unified_timestamp(info.get('publishStart')),
'series': series,
'season_number': int_or_none(info.get('seasonNumber')),
'episode_number': int_or_none(info.get('episodeNumber')),
'age_limit': int_or_none(info.get('minimum_age')),
'formats': formats,
}
info = self._download_json(
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),

View File

@@ -54,12 +54,12 @@ class DramaFeverBaseIE(AMPIE):
request = sanitized_Request(
self._LOGIN_URL, urlencode_postdata(login_form))
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
request, None, 'Logging in')
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
error = self._html_search_regex(
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)

View File

@@ -10,7 +10,7 @@ from ..utils import (
class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_VALID_URL = r'https?://(?:(?:www|m)\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
@@ -28,6 +28,9 @@ class DrTuberIE(InfoExtractor):
}, {
'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True,
}, {
'url': 'http://m.drtuber.com/video/3893529/lingerie-blowjob-from-beautiful-teen',
'only_matching': True,
}]
@staticmethod

View File

@@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor):
'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
'vcodec': 'none' if kind == 'AudioResource' else None,
'preference': preference,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):

View File

@@ -1,10 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import (
qualities,
sanitized_Request,
@@ -42,7 +42,7 @@ class DumpertIE(InfoExtractor):
r'data-files="([^"]+)"', webpage, 'data files')
files = self._parse_json(
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
compat_b64decode(files_base64).decode('utf-8'),
video_id)
quality = qualities(['flv', 'mobile', 'tablet', '720p'])

View File

@@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor):
}, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': {
'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e',
},
'playlist': [{
@@ -91,10 +91,24 @@ class DVTVIE(InfoExtractor):
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
}, {
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
'md5': '87defe16681b1429c91f7a74809823c6',
'info_dict': {
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
'ext': 'mp4',
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
},
'params': {
'skip_download': True,
},
}]
def _parse_video_metadata(self, js, video_id):
def _parse_video_metadata(self, js, video_id, live_js=None):
data = self._parse_json(js, video_id, transform_source=js_to_json)
if live_js:
data.update(self._parse_json(
live_js, video_id, transform_source=js_to_json))
title = unescapeHTML(data['title'])
@@ -142,13 +156,18 @@ class DVTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
# live content
live_item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
webpage, 'video', default=None)
# single video
item = self._search_regex(
r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
webpage, 'video', default=None, fatal=False)
webpage, 'video', default=None)
if item:
return self._parse_video_metadata(item, video_id)
return self._parse_video_metadata(item, video_id, live_item)
# playlist
items = re.findall(

View File

@@ -2,7 +2,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
try_get,
unified_timestamp,
@@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
'id': 'professor-frisby-introduces-composable-functional-javascript',
'id': '72',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
@@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
lessons = self._download_json(
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
playlist_id, 'Downloading course lessons JSON')
entries = [
self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
entries = []
for lesson in lessons:
lesson_url = lesson.get('http_url')
if not lesson_url or not isinstance(lesson_url, compat_str):
continue
lesson_id = lesson.get('id')
if lesson_id:
lesson_id = compat_str(lesson_id)
entries.append(self.url_result(
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id,
playlist_id, 'Downloading course JSON', fatal=False) or {}
playlist_id = course.get('id')
if playlist_id:
playlist_id = compat_str(playlist_id)
return self.playlist_result(
entries, playlist_id, course.get('title'),
@@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
_TEST = {
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
'id': 'fv5yotjxcg',
'id': '1196',
'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
},
'params': {
'skip_download': True,
'format': 'bestvideo',
},
}
}, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True,
}]
def _real_extract(self, url):
lesson_id = self._match_id(url)
display_id = self._match_id(url)
lesson = self._download_json(
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
lesson_id = compat_str(lesson['id'])
title = lesson['title']
formats = []
for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, lesson_id, 'mp4', entry_protocol='m3u8',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, lesson_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
})
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'ie_key': 'Wistia',
'url': 'wistia:%s' % lesson['wistia_id'],
'id': lesson['wistia_id'],
'title': lesson.get('title'),
'id': lesson_id,
'display_id': display_id,
'title': title,
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
'series': try_get(
lesson, lambda x: x['series']['title'], compat_str),
'formats': formats,
}

View File

@@ -1,13 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import json
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_b64decode,
compat_str,
compat_urlparse,
)
from ..utils import (
extract_attributes,
@@ -36,9 +36,9 @@ class EinthusanIE(InfoExtractor):
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
def _decrypt(self, encrypted_data, video_id):
return self._parse_json(base64.b64decode((
return self._parse_json(compat_b64decode((
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
).encode('ascii')).decode('utf-8'), video_id)
)).decode('utf-8'), video_id)
def _real_extract(self, url):
video_id = self._match_id(url)

Some files were not shown because too many files have changed in this diff Show More