Compare commits

...

611 Commits

Author SHA1 Message Date
48c971e073 release 2015.03.24 2015-03-24 16:39:53 +01:00
f5e2efbbf0 [options] Handle special characters in argv (Fixes #5157) 2015-03-24 16:39:46 +01:00
b0872c19ea [npo] Skip broken URL links (Closes #5266) 2015-03-23 22:15:01 +06:00
9f790b9901 [mlb] Improve _VALID_URL (Closes #5260) 2015-03-23 21:23:57 +06:00
93f787070f [twitch] Only match digits for the video id
Urls can also contain contain a query (for example a timestamp '?t=foo')
2015-03-22 15:39:35 +01:00
f9544f6e8f [test/aes] Test aes_decrypt_text with 256 bit 2015-03-22 12:09:58 +01:00
336d19044c [lybsyn] pep8: add space around operator 2015-03-22 11:03:52 +01:00
7866c9e173 Merge branch 'fstirlitz-the-daily-show-podcast' 2015-03-22 08:24:26 +06:00
1a4123de04 [comedycentral] Remove unused import 2015-03-22 08:23:38 +06:00
cf2e2eb1c0 [comedycentral] Drop thedailyshow podcast extractor
Generic extractor is just fine for Libsyn embeds
2015-03-22 08:23:20 +06:00
2051acdeb2 [extractor/generic] Add test for Libsyn embed 2015-03-22 08:20:27 +06:00
cefdf970cc [extractor/generic] Support Libsyn embeds 2015-03-22 08:18:13 +06:00
a1d0aa7b88 [libsyn] Fix extractor alphabetic order 2015-03-22 08:11:47 +06:00
49aeedb8cb [libsyn] Improve and simplify 2015-03-22 08:11:10 +06:00
ef249a2cd7 Merge branch 'the-daily-show-podcast' of https://github.com/fstirlitz/youtube-dl into fstirlitz-the-daily-show-podcast 2015-03-22 07:44:28 +06:00
a09141548a [nrk:playlist] Relax video id regex and improve _VALID_URL 2015-03-21 20:42:48 +06:00
5379a2d40d [test/utils] Test xpath_text 2015-03-21 14:12:43 +01:00
c9450c7ab1 [nrk:playlist] Restrict _VALID_URL
It would also match /videos/PS... urls
2015-03-21 14:00:37 +01:00
faa1b5c292 [nrk:playlist] Add extractor (Closes #5245) 2015-03-21 18:22:08 +06:00
393d9fc6d2 [nrk] Extract duration 2015-03-21 18:21:19 +06:00
4e6a228689 [nrk] Adapt to new URL format 2015-03-21 18:20:49 +06:00
179d6678b1 Remove the 'stitle' field
A warning has been printed for more than 2 years (since 97cd3afc75)
2015-03-21 12:34:44 +01:00
85698c5086 [crunchyroll] Remove unused class 2015-03-21 12:18:33 +01:00
a7d9ded45d [test] Add tests for aes 2015-03-21 12:07:23 +01:00
531980d89c [test/YoutubeDL] test match_filter 2015-03-20 17:05:28 +01:00
1887ecd4d6 [twitch] Fix login 2015-03-20 21:45:09 +06:00
cd32c2caba Merge branch 'ndac-todoroki-niconico_nm' 2015-03-20 20:53:27 +06:00
1c9a1457fc [niconico] Add nm video test 2015-03-20 20:53:14 +06:00
038b0eb1da Merge branch 'niconico_nm' of https://github.com/ndac-todoroki/youtube-dl into ndac-todoroki-niconico_nm 2015-03-20 20:52:56 +06:00
f20bf146e2 [test/YoutubeDL] split in two classes
The name was misleading
2015-03-20 15:14:25 +01:00
01218f919b [test/http] Add test for proxy support 2015-03-20 14:59:38 +01:00
2684871bc1 [vine] Fix formats extraction (Closes #5239) 2015-03-20 01:50:36 +02:00
ccf3960eec [nytimes] Improve _VALID_URL (Fixes #5238) 2015-03-19 20:55:05 +02:00
eecc0685c9 [videomega] Fix extraction and update test (Fixes #5235) 2015-03-19 19:38:03 +02:00
2ed849eccf Merge branch 'master' of github.com:rg3/youtube-dl 2015-03-19 21:27:38 +06:00
3378d67a18 [generic] Add support for nytimes embeds (Closes #5234) 2015-03-19 21:26:57 +06:00
f3c0c667a6 [nytimes] Modernize 2015-03-19 21:23:52 +06:00
0ae8bbac2d [nytimes] Support embed URL 2015-03-19 21:17:04 +06:00
cbc3cfcab4 release 2015.03.18 2015-03-18 22:02:39 +01:00
b30ef07c6c [ultimedia] Handle youtube embeds 2015-03-19 01:06:39 +06:00
73900846b1 [ultimedia] Capture and output unavailable video message 2015-03-19 00:53:26 +06:00
d1dc7e3991 [ultimedia] Fix alphabetic order 2015-03-18 23:11:48 +06:00
3073a6d5e9 [ultimedia] Add extractor
Sponsored by thankyoumotion.com
2015-03-18 23:08:18 +06:00
aae53774f2 [mixcloud] Try preview server first, then further numbers 2015-03-18 17:08:22 +01:00
7a757b7194 [mixcloud] Fix extraction of some metadata
The second test had some wrong info.
I couldn't find the timestamp, so I have removed it.
2015-03-18 17:08:19 +01:00
fa8ce26904 [mixcloud] Fix extraction like-count 2015-03-18 16:30:29 +01:00
2c2c06e359 [krasview] Fix extraction (Closes #5228) 2015-03-18 20:28:00 +06:00
ee580538fa fix nm video DL issue when logged in 2015-03-18 22:24:17 +09:00
c3c5c31517 fix nm video DL issue when logged in 2015-03-18 22:19:55 +09:00
ed9a25dd61 [generic] Generalize redirect regex 2015-03-18 00:05:40 +06:00
9ef4f12b53 testcases for libsyn and The Daily Show Podcast extractors 2015-03-17 18:54:36 +01:00
84f8101606 [generic] Follow redirects specified by Refresh HTTP header 2015-03-17 23:51:40 +06:00
b1337948eb [grooveshark] Fix extraction 2015-03-17 23:13:43 +06:00
98f02fdde2 Credit @jbuchbinder for primesharetv (#5123) 2015-03-17 22:33:05 +06:00
048fdc2292 Merge branch 'bonfy-douyutv' 2015-03-17 22:27:46 +06:00
2ca1c5aa9f [douyutv] Improve and extract all formats 2015-03-17 22:27:33 +06:00
674fb0fcc5 Merge branch 'douyutv' of https://github.com/bonfy/youtube-dl into bonfy-douyutv 2015-03-17 21:41:25 +06:00
00bfe40e4d Merge branch 'yan12125-sohu_fix' 2015-03-17 21:39:45 +06:00
cd459b1d49 [sohu] Fix test's note info 2015-03-17 21:39:31 +06:00
92a4793b3c [utils] Place sanitize url function near other sanitizing functions 2015-03-17 21:34:22 +06:00
dc03a42537 Merge branch 'sohu_fix' of https://github.com/yan12125/youtube-dl into yan12125-sohu_fix 2015-03-17 21:18:36 +06:00
219da6bb68 [megavideoeu] Remove extractor 2015-03-17 21:13:42 +06:00
0499cd866e [primesharetv] Clean up 2015-03-17 21:06:38 +06:00
13047f4135 [Primesharetv] Handle file not existing properly. 2015-03-17 20:33:32 +06:00
af69cab21d [Primesharetv] Add public domain example video 2015-03-17 20:33:24 +06:00
d41a3fa1b4 [Primesharetv] Add primeshare.tv extractor, still need test data 2015-03-17 20:33:16 +06:00
733be371af Add megavideoz.eu support. 2015-03-17 20:33:03 +06:00
576904bce6 [letv] Clarify download message 2015-03-17 20:01:31 +06:00
cf47794f09 Merge pull request #5116 from yan12125/letv_fix
[Letv] Fix test_Letv and test_Letv_1 failures in python 3
2015-03-17 19:58:34 +06:00
c06a9f8730 [arte+7] Check formats (Closes #5224) 2015-03-17 19:42:50 +06:00
2e90dff2c2 The Daily Show Podcast support 2015-03-16 20:05:02 +01:00
90183a46d8 Credit @eferro for the rtve.es:infantil extractor (#5214) 2015-03-15 22:49:03 +01:00
b68eedba23 [rtve.es:infantil] Minor fixes (closes #5214) 2015-03-15 22:18:41 +01:00
d5b559393b [rtve] Add new extractor for rtve infantil 2015-03-15 22:14:36 +01:00
1de4ac1385 release 2015.03.15 2015-03-15 19:38:50 +01:00
39aa42ffbb [ard] Capture and output time restricted videos (Closes #5213) 2015-03-16 00:21:38 +06:00
ec1b9577ba [cloudy] Fix key extraction (Closes #5211) 2015-03-15 22:42:13 +06:00
3b4444f99a Merge pull request #5208 from admire93/master
Fix mistyped docstring indent
2015-03-15 17:20:50 +06:00
613b2d9dc6 Fix mistyped docstring indent 2015-03-15 20:18:23 +09:00
8f4cc22455 [aftenposten] Adapt to new URL format 2015-03-15 10:08:14 +06:00
7c42327e0e tox.ini: Add python 3.4 2015-03-14 21:41:56 +01:00
873383e9bd tox.ini: Run the same command as 'make offlinetest' by default 2015-03-14 21:41:15 +01:00
8508557e77 [test/YoutubeDL] Use valid urls
It failed on python 3.4 when building the http_headers field
2015-03-14 20:51:42 +01:00
4d1652484f [test/unicode_literals] Don't look into the .git and .tox directories
The .tox directory contains python code that we can't control
2015-03-14 20:25:37 +01:00
88cf6fb368 [metadatafromtitle] Some improvements and cleanup
* Remove the 'songtitle' field, 'title' can be used instead.
* Remove newlines in the help text, for consistency with other options.
* Add 'from __future__ import unicode_literals'.
* Call '__init__' from the parent class.
* Add test for the format_to_regex method
2015-03-14 20:06:33 +01:00
e7db87f700 Add metadata from title parser
(Closes #5125)
2015-03-14 19:46:22 +01:00
2cb434e53e [Sohu] Fix title extraction 2015-03-15 01:05:01 +08:00
cd65491c30 [Sohu] Add a multiplart video test case 2015-03-15 00:59:49 +08:00
082b1155a3 [livestream] Extract all videos in events (fixes #5198)
The webpage only contains the most recent ones, but if you scroll down more will appear.
2015-03-14 12:06:01 +01:00
9202b1b787 [eighttracks] Remove unused import 2015-03-14 12:04:49 +01:00
a7e01c438d [8tracks] Modernize 2015-03-14 15:55:21 +06:00
05be67e77d [8tracks] Improve extraction 2015-03-14 15:54:23 +06:00
85741b9986 [8tracks] Use predefined avg duration when duration is negative (Closes #5200) 2015-03-14 15:52:06 +06:00
f247a199fe Merge pull request #5199 from MamayAlexander/yandexmusic
[yandexmusic] Site mirrors
2015-03-14 15:20:48 +06:00
29171bc2d2 [yandexmusic] Site mirrors 2015-03-14 13:56:04 +06:00
7be5a62ed7 [viewster] Improve extraction 2015-03-14 03:18:04 +06:00
3647136f24 [viewster] Add extractor 2015-03-14 02:12:11 +06:00
13598940e3 [kanalplay] Fix test 2015-03-14 01:27:21 +06:00
0eb365868e Merge branch 'djpohly-beatport-pro' 2015-03-13 22:15:00 +06:00
28c6411e49 Credit @djpohly for BeatportPro (#5189) 2015-03-13 22:14:51 +06:00
bba3fc7960 [beatenpro] Fix tests 2015-03-13 22:13:50 +06:00
fcd877013e [beatenpro] Simplify 2015-03-13 22:11:56 +06:00
ba1d4c0488 [beatenpro] Improve display_id 2015-03-13 22:03:58 +06:00
517bcca299 [beatenpro] Simplify and improve 2015-03-13 22:01:15 +06:00
1b53778175 [beatenpro] Use generic format sort 2015-03-13 21:51:49 +06:00
b7a0304d92 Merge branch 'beatport-pro' of https://github.com/djpohly/youtube-dl into djpohly-beatport-pro 2015-03-13 21:47:01 +06:00
545315a985 [nrk] Use generic subtitles timecode formatter 2015-03-13 21:40:34 +06:00
3f4327520c [kanalplay] Extract subtitles 2015-03-13 21:39:29 +06:00
4a34f69ea6 [extractor/common] Add subtitles timecode formatter 2015-03-13 21:38:28 +06:00
fb7e68833c [kanalplay] Add extractor (Closes #5188) 2015-03-13 20:51:44 +06:00
486dd09e0b [YoutubeDL] Check for bytes instead of unicode output templates (#5192)
Also adapt the embedding examples for those poor souls still using 2.x.
2015-03-13 08:40:20 +01:00
054b99a330 [jeuxvideo] Fix extraction (fixes #5190) 2015-03-12 22:33:59 +01:00
65c5e044c7 fix python2 2015-03-12 16:42:55 -04:00
11984c7467 [BeatportPro] Add new extractor
This extractor is for Beatport's 2-minute, low-quality track previews
only.  To obtain an entire track, you obviously have to purchase and
download it normally through the Beatport store!

Possible future improvements:
- Playlists for albums or other track-list pages
- User login to play from My Beatport, Hold Bin, or Cart
2015-03-12 16:03:37 -04:00
3946864c8a [vimeo] Use https for all vimeo.com urls
Unfortunately vimeopro.com doesn't support it yet.
2015-03-12 19:08:16 +01:00
b84037013e [vimeo] Fix login (#3886) 2015-03-12 18:45:00 +01:00
1dbfc62d75 Merge pull request #5186 from leleobhz/master
* Change globo.py flash ver to 17.0.0.132 - Chrome 42.0.2311.22
2015-03-12 23:37:03 +06:00
d7d79106c7 * Change globo.py flash ver to 17.0.0.132 - Chrome 42.0.2311.22 2015-03-12 14:23:42 -03:00
1138491631 [yam] Skip test 2015-03-12 21:59:46 +06:00
71705fa70d [footyroom] Add extractor (Closes #5000) 2015-03-12 21:56:56 +06:00
602814adab Merge pull request #5150 from yan12125/yam_fix
[Yam] Add an error detection and update test cases
2015-03-12 21:01:49 +06:00
3a77719c5a Don't accept '-1' as format, 'all' is clearer 2015-03-11 17:38:35 +01:00
7e195d0e92 [funnyordie] Add subtitles test 2015-03-11 22:00:37 +06:00
e04793401d Merge branch 'pishposhmcgee-master' 2015-03-11 21:56:40 +06:00
a3fbd18824 [funnyordie] Simplify subtitles 2015-03-11 21:56:22 +06:00
c6052b8c14 Merge branch 'master' of https://github.com/pishposhmcgee/youtube-dl into pishposhmcgee-master 2015-03-11 21:45:43 +06:00
c792b5011f [ssa] Add extractor (Closes #5169) 2015-03-11 21:15:36 +06:00
32aaeca775 [npo] Improve smooth stream skipping and set low preference for streams other than hds ans hls (Closes #5175) 2015-03-11 20:34:32 +06:00
1593194c63 Update funnyordie.py 2015-03-10 15:35:35 -05:00
614a7e1e23 Added subtitles for FunnyOrDie 2015-03-10 15:22:46 -05:00
2ebfeacabc [utils] Keep dot and dotdot unmodified (Closes #5171) 2015-03-10 00:50:11 +06:00
f5d8f58a17 [yandexmusic:album] Improve _VALID_URL to avoid matching tracks urls 2015-03-09 18:17:22 +01:00
937daef4a7 [niconico] Use '_match_id' 2015-03-09 18:12:41 +01:00
dd77f14c64 [yandexmusic] PEP8: remove blank line at the end of file 2015-03-09 18:07:31 +01:00
c36cbe5a8a Merge branch 'MamayAlexander-YandexMusic' 2015-03-09 21:46:44 +06:00
41b2194f86 Credit @MamayAlexander for yandexmusic (#5168) 2015-03-09 21:46:31 +06:00
d1e2e8f583 [yamusic] Rename to yandexmusic 2015-03-09 21:44:59 +06:00
47fe42e1ab [yamusic] Improve, simplify, fix python3 issues and add tests 2015-03-09 21:43:46 +06:00
4c60393854 [YandexMusic] Add new extractor 2015-03-09 19:06:49 +06:00
f848215dfc release 2015.03.09 2015-03-09 03:02:03 +01:00
dcca581967 Merge remote-tracking branch 'origin/master'
Conflicts:
	youtube_dl/YoutubeDL.py
2015-03-09 03:01:28 +01:00
d475b3384c [README] Better bug reporting instructions
Also address private emails which I get more and more these days.
2015-03-09 03:00:03 +01:00
dd7831fe94 [breakcom] Process only play purpose media formats (Closes #5164) 2015-03-09 04:55:35 +06:00
cc08b11d16 [adultswim] Improve video_info extraction (Fixes #5152)
Look for video_info inside `slugged_video`, if slug is not found among collections.
Also, simplify a bit.
2015-03-08 21:35:04 +02:00
8bba753cca [options] Rename --dump-intermediate-pages to --dump-pages for consistence with --write-pages 2015-03-08 18:37:43 +01:00
43d6280d0a [downloader/f4m] Fix use of base64 in python 3.2 (fixes #5132)
b64decode needs a byte string, but on 3.4 it also accepts strings.
2015-03-08 18:25:11 +01:00
e5a11a2293 [YoutubeDL] Sanitize path before creating non-existent paths (Closes #4324) 2015-03-08 22:09:42 +06:00
f18ef2d144 [utils] Disallow trailing dot in sanitize_path for a path part 2015-03-08 22:08:48 +06:00
1bb5c511a5 [YoutubeDL] Sanitize outtmpl as path 2015-03-08 20:57:30 +06:00
d55de57b67 [utils] Fix sanitize_open 2015-03-08 20:56:28 +06:00
a2aaf4dbc6 [utils] Add sanitize_path 2015-03-08 20:55:22 +06:00
bdf6eee0ae [gazeta] Extend _VALID_URL 2015-03-08 19:17:54 +06:00
8b910bda0c [teamcoco] Fix extraction 2015-03-08 14:28:53 +02:00
24993e3b39 [vidme] Fix view_count extraction and remove comment_count extraction (Fixes #5133)
Comment counts seem to no longer be listed on vid.me
2015-03-08 14:12:10 +02:00
11101076a1 [pladform] Fix format quality sorting 2015-03-08 18:09:47 +06:00
f838875726 [pladform] Add support for embeds 2015-03-08 18:07:10 +06:00
28778d6bae [pladform] Add extractor 2015-03-08 18:03:12 +06:00
1132eae56d [gazeta] Add new extractor (Closes #4222) 2015-03-08 13:54:01 +02:00
d34e79492d [twitch] Fix live streams (Closes #5158) 2015-03-08 16:54:11 +06:00
ab205b9dc8 Revert "[YoutubeDL] Sanitize outtmpl as it may contain forbidden characters"
This reverts commit 7dcad95d4f.

The output template is most definitly allowed to contain forbidden characters; otherwise -o /foo/bar/vid.mp4 wouldn't work.
2015-03-07 22:18:22 +01:00
7dcad95d4f [YoutubeDL] Sanitize outtmpl as it may contain forbidden characters 2015-03-08 01:13:23 +06:00
8a48223a7b [eagleplatform] Remove debug output 2015-03-07 22:35:36 +06:00
d47ae7f620 [eagleplatform] Add support for ClipYou embeds 2015-03-07 22:34:44 +06:00
135c9c42bf [eagleplatform] Add support for embeds 2015-03-07 22:22:57 +06:00
0bf79ac455 [eagleplatform] Add extractor 2015-03-07 22:16:23 +06:00
98998cded6 [youtube:search_url] Fix extraction (Closes #5155) 2015-03-07 18:59:06 +06:00
14137b5781 [orf:iptv] Add extractor (Closes #5140) 2015-03-07 17:31:03 +06:00
a172d96292 [douyutv] Add new extractor 2015-03-07 14:05:56 +08:00
23ba76bc0e [dailymotion] Replace test
It has been removed.
2015-03-06 22:45:05 +01:00
61e00a9775 [vimeo] Use https for player.vimeo.com urls (closes #5147) 2015-03-06 22:39:05 +01:00
d1508cd68d [vimeo:album] Fix password protected videos
Since it only uses https now, don't recognize http urls.
2015-03-06 22:16:26 +01:00
9c85b5376d [vimeo] Fix and use '_verify_video_password' (#5001)
It only supports verifying the password over https now.

Use it instead of manually setting the 'password' cookie because it allows to check if the password is correct.
2015-03-06 19:08:27 +01:00
3c6f245083 [vimeo] Fix upload date extraction 2015-03-06 18:16:56 +01:00
f207019ce5 [extractor/common] Remove 'm3u8' from quality selection URL 2015-03-06 22:53:53 +06:00
bd05aa4e24 [Yam] Add an error detection and update test cases 2015-03-07 00:53:52 +08:00
8dc9d361c2 [extractor/common] Fix format_id when last_media is None and always include m3u8_id if present
The rationale behind `m3u8_id` was to resolve duplicates when processing several m3u8 playlists within the same media that give equal resulting `format_id`'s,
e.g. `youtube-dl http://www.rts.ch/play/tv/passe-moi-les-jumelles/video/la-fee-des-bois-mustang-les-chemins-du-vent?id=3854925 -F`
2015-03-06 22:52:50 +06:00
d0e958c71c [twitch:vod] Prefer source stream (Fixes #5143) 2015-03-06 10:53:49 +01:00
a0bb7c5593 [extractor/common] Improve m3u format IDs (#5143) 2015-03-06 10:49:42 +01:00
7feddd9fc7 [travis] Declare 3.2 (Fixes #5144) 2015-03-06 10:44:24 +01:00
55969016e9 [utils] Add a function to sanitize consecutive slashes in URLs 2015-03-06 12:43:49 +08:00
9609f02e3c [vidme] Modernize 2015-03-05 22:34:56 +01:00
5c7495a194 [sohu] Correct wrong imports 2015-03-06 02:48:27 +08:00
5ee6fc974e [sohu] Fix info extractor and add tests 2015-03-06 02:43:39 +08:00
c2ebea6580 [extremetube] Fix extraction (Closes #5127) 2015-03-05 14:45:38 +02:00
12a129ec6d [playwire] Add extractor 2015-03-05 02:36:53 +06:00
f28fe66970 [downloader/http] Add missing fields for _hook_progress call
It would fail if you run 'youtube-dl --no-part URL' a second time when the file has already been downloaded.

(Reported in Fedora: https://bugzilla.redhat.com/show_bug.cgi?id=1195779)
2015-03-04 12:14:38 +01:00
123397317c [downloader/http] Remove wrong '_hook_progress' call (fixes #5117) 2015-03-03 18:45:56 +01:00
dc570c4951 [lrt] Pass --realtime to rtmpdump 2015-03-03 18:41:34 +02:00
22d3628319 [tvplay] Adapt _VALID_URL and test case to domain name change 2015-03-03 18:39:28 +02:00
50c9949d7a [youporn] Imrove JSON regex and preserve the old one 2015-03-03 21:39:04 +06:00
376817c6d4 Merge pull request #5115 from chaos33/youporn-json
fix youporn extractor's json search regex
2015-03-03 21:32:13 +06:00
63fc800057 [Letv] Fix test_Letv and test_Letv_1 failures in python 3 2015-03-03 23:20:55 +08:00
e0d0572b73 fix youporn extractor's json search regex 2015-03-03 22:53:05 +08:00
7fde87c77d release 2015.03.03.1 2015-03-03 13:59:38 +01:00
938c3f65b6 Merge branch 'cn-verification-proxy' 2015-03-03 13:57:29 +01:00
2461f79d2a [utils] Correct per-request proxy handling 2015-03-03 13:56:06 +01:00
499bfcbfd0 Make sure netrc works for all extractors with login support
Fixes #5112
2015-03-03 12:59:17 +01:00
07490f8017 release 2015.03.03 2015-03-03 00:05:05 +01:00
91410c9bfa [letv] Add --cn-verification-proxy (Closes #5077) 2015-03-03 00:03:06 +01:00
a7440261c5 [utils] Streap leading dots
Fixes #2865, closes #5087
2015-03-02 19:07:19 +01:00
76c73715fb [generic] Parse RSS enclosure URLs (Fixes #5091) 2015-03-02 18:21:31 +01:00
c75f0b361a [downloader/external] Add support for custom options (Fixes #4885, closes #5098) 2015-03-02 18:21:31 +01:00
295df4edb9 [soundcloud] Fix glitches (#5101) 2015-03-02 22:47:07 +06:00
562ceab13d [soundcloud] Check direct links validity (Closes #5101) 2015-03-02 22:39:32 +06:00
2f0f6578c3 [extractor/common] Assume non HTTP(S) URLs valid 2015-03-02 22:38:44 +06:00
30cbd4e0d6 [lynda] Completely skip videos we don't have access to, extract base class and modernize (Closes #5093) 2015-03-02 22:12:10 +06:00
549e58069c Merge pull request #5105 from Ftornik/Lynda-subtitle-hotfix-2
[lynda] Check for the empty subtitles
2015-03-02 21:15:26 +06:00
7594be85ff [lynda] Check for the empty subtitle 2015-03-02 11:49:39 +02:00
3630034609 [vk] Fix test (Closes #5100) 2015-03-02 03:30:18 +06:00
4e01501bbf [vk] Fix extraction (Closes #4967, closes #4686) 2015-03-01 21:56:30 +06:00
1aa5172f56 [vk] Catch temporarily unavailable video error message 2015-03-01 21:55:43 +06:00
f7e2ee8fa6 Merge branch 'master' of github.com:rg3/youtube-dl 2015-03-01 12:05:13 +01:00
66dc9a3701 [README] Document HTTP 429 (Closes #5092) 2015-03-01 12:04:39 +01:00
31bd39256b --load-info: Use the fileinput module
It automatically handles the '-' filename as stdin
2015-03-01 11:54:48 +01:00
003c69a84b Use shutil.get_terminal_size for getting the terminal width if it's available (python >= 3.3) 2015-02-28 21:44:57 +01:00
0134901108 release 2015.02.28 2015-02-28 21:24:25 +01:00
eee6293d57 [thechive] remove in favor of Kaltura (#5072) 2015-02-28 20:55:49 +01:00
8237bec4f0 [escapist] Extract duration 2015-02-28 20:52:52 +01:00
29cad7ad13 Merge remote-tracking branch 'origin/master' 2015-02-28 20:51:54 +01:00
0d103de3b0 [twitch] Pass api_token along with every request (Closes #3986) 2015-02-28 22:59:55 +06:00
a0090691d0 Merge branch 'HanYOLO-puls4' 2015-02-28 22:26:35 +06:00
6c87c2eea8 [puls4] Improve and extract more metadata 2015-02-28 22:25:57 +06:00
58c2ec6ab3 Merge branch 'puls4' of https://github.com/HanYOLO/youtube-dl 2015-02-28 21:39:10 +06:00
df5ae3eb16 [oppetarkiv] Merge with svtplay 2015-02-28 21:25:04 +06:00
efda2d7854 Merge branch 'thc202-oppetarkiv' 2015-02-28 21:12:23 +06:00
e143f5dae9 [oppetarkiv] Extract f4m formats and age limit 2015-02-28 21:12:06 +06:00
48218cdb97 Merge branch 'oppetarkiv' of https://github.com/thc202/youtube-dl into thc202-oppetarkiv 2015-02-28 20:41:56 +06:00
e9fade72f3 Add postprocessor for converting subtitles (closes #4954) 2015-02-28 14:43:24 +01:00
0f2c0d335b [YoutubeDL] Use the InfoExtractor._download_webpage method for getting the subtitles
It handles encodings better, for example for 'http://www.npo.nl/nos-journaal/14-02-2015/POW_00942207'
2015-02-28 14:03:27 +01:00
40b077bc7e [oppetarkiv] Add new extractor
Some, if not all, of the videos appear to be geo-blocked (Sweden).
Test might fail (403 Forbidden) if not run through a Swedish connection.
2015-02-27 22:27:30 +00:00
a931092cb3 Merge branch 'puls4' of https://github.com/HanYOLO/youtube-dl into HanYOLO-puls4 2015-02-28 00:22:48 +06:00
bd3749ed69 [kaltura] Extend _VALID_URL (Closes #5081) 2015-02-28 00:19:31 +06:00
4ffbf77886 [odnoklassniki] Add extractor (Closes #5075) 2015-02-28 00:15:03 +06:00
781a7ef60a [lynda] Use 'lstrip' for the subtitles
The newlines at the end are important, they separate each piece of text.
2015-02-27 16:18:18 +01:00
5b2949ee0b Merge pull request #5076 from Ftornik/Lynda-subtitles-hotfix
[lynda] Fixed subtitles broken file
2015-02-27 20:56:54 +06:00
a0d646135a [lynda] Extend _VALID_URL 2015-02-27 20:56:06 +06:00
7862ad88b7 puls4 Add new extractor 2015-02-27 15:41:58 +01:00
f3bff94cf9 [rtve] Extract duration 2015-02-27 12:24:51 +01:00
0eba1e1782 [lynda] Fixed subtitles broken file 2015-02-27 00:51:22 +02:00
e3216b82bf [generic] Support dynamic Kaltura embeds (#5016) (#5073) 2015-02-27 00:34:19 +02:00
da419e2332 [musicvault] Use the Kaltura extractor 2015-02-26 23:47:45 +02:00
0d97ef43be [kaltura] Add new extractor 2015-02-26 23:45:54 +02:00
1a2313a6f2 [TheChiveIE] added support for TheChive.com (Closes #5016) 2015-02-27 02:36:45 +10:30
250a9bdfe2 [mpora] Improve _VALID_URL 2015-02-26 21:16:35 +06:00
6317a3e9da [mpora] Fix extraction 2015-02-26 21:10:49 +06:00
7ab7c9e932 [gamestar] Fix title extraction 2015-02-26 16:22:05 +02:00
e129c5bc0d [laola1tv] Allow live stream downloads 2015-02-26 14:35:48 +02:00
2e241242a3 Adding subtitles 2015-02-26 03:59:35 -06:00
9724e5d336 release 2015.02.26.2 2015-02-26 09:45:11 +01:00
63a562f95e [escapist] Detect IP blocking and use another UA (Fixes #5069) 2015-02-26 09:19:26 +01:00
5c340b0387 release 2015.02.26.1 2015-02-26 01:47:16 +01:00
1c6510f57a [Makefile] clean pyc files in clean target 2015-02-26 01:47:12 +01:00
2a15a98a6a [rmtp] Encode filename before invoking subprocess
This fixes #5066.
Reproducible with
LC_ALL=C youtube-dl "http://www.prosieben.de/tv/germanys-next-topmodel/video/playlist/ganze-folge-episode-2-das-casting-in-muenchen"
2015-02-26 01:44:20 +01:00
72a406e7aa [extractor/common] Pass in video_id (#5057) 2015-02-26 01:35:43 +01:00
feccc3ff37 Merge remote-tracking branch 'aajanki/wdr_live' 2015-02-26 01:34:01 +01:00
265bfa2c79 [letv] Simplify 2015-02-26 01:30:18 +01:00
8faf9b9b41 Merge remote-tracking branch 'yan12125/IE_Letv' 2015-02-26 01:26:55 +01:00
84be7c230c Cred @duncankl for airmozilla 2015-02-26 01:25:54 +01:00
3e675fabe0 [airmozilla] Be more tolerant when nonessential items are missing (#5030) 2015-02-26 01:25:00 +01:00
cd5b4b0bc2 Merge remote-tracking branch 'duncankl/airmozilla' 2015-02-26 01:15:08 +01:00
7ef822021b Merge remote-tracking branch 'mmue/fix-rtlnow' 2015-02-26 01:13:03 +01:00
9a48926a57 [escapist] Add support for advertisements 2015-02-26 00:59:53 +01:00
13cd97f3df release 2015.02.26 2015-02-26 00:42:02 +01:00
183139340b [utils] Bump our user agent 2015-02-26 00:40:12 +01:00
1c69bca258 [escapist] Fix config URL matching 2015-02-26 00:24:54 +01:00
c10ea454dc [telecinco] Recognize more urls (closes #5065) 2015-02-25 23:52:54 +01:00
9504fc21b5 Fix the RTL extractor for new episodes by using a different hostname 2015-02-25 23:27:19 +01:00
13d8fbef30 [generic] Don't set the 'title' if it's not defined in the entry (closes #5061)
Some of them may be an 'url' result, which in general don't have the 'title' field.
2015-02-25 17:56:51 +01:00
b8988b63a6 [wdr] Download a live stream 2015-02-24 21:23:59 +02:00
5eaaeb7c31 [f4m] Tolerate missed fragments on live streams 2015-02-24 21:22:59 +02:00
c4f8c453ae [f4m] Refresh fragment list periodically on live streams 2015-02-24 21:22:59 +02:00
6f4ba54079 [extractor/common] Extract HTTP (possibly f4m) URLs from a .smil file 2015-02-24 21:22:59 +02:00
637570326b [extractor/common] Extract the first of a seq of videos in a .smil file 2015-02-24 21:22:59 +02:00
37f885650c [eporner] Simplify and hardcode age limit 2015-02-25 01:08:54 +06:00
c8c34ccb20 Merge pull request #5056 from logon84/master
Eporner Fix (Closes #5050)
2015-02-25 01:05:35 +06:00
e765ed3a9c [eporner] Fix redirect_code error 2015-02-24 19:41:46 +01:00
677063594e [Letv] Update testcases 2015-02-25 02:10:55 +08:00
59c7cbd482 Update eporner.py
Updated to work. Old version shows an error about being unable to extract "redirect_code"
2015-02-24 18:58:32 +01:00
570311610e [Letv] Add playlist support 2015-02-25 01:26:44 +08:00
41b264e77c [nrktv] Workaround subtitles conversion issues on python 2.6 (Closes #5036) 2015-02-24 23:06:44 +06:00
df4bd0d53f [options] Add --yes-playlist as inverse of --no-playlist (Fixes #5051) 2015-02-24 17:25:02 +01:00
7f09a662a0 [Letv] Add new extractor. Single video only 2015-02-24 23:58:21 +08:00
4f3b21e1c7 release 2015.02.24.2 2015-02-24 16:34:42 +01:00
54233c9080 [escapist] Support JavaScript player (Fixes #5034) 2015-02-24 16:33:07 +01:00
db8e13ef71 release 2015.02.24.1 2015-02-24 11:38:21 +01:00
5a42414b9c [utils] Prevent hyphen at beginning of filename (Fixes #5035) 2015-02-24 11:38:01 +01:00
9c665ab72e [rtve] PEP8 2015-02-24 11:37:27 +01:00
b665ba6aa6 release 2015.02.24 2015-02-24 11:24:26 +01:00
ec5913b5cd [bloomberg] Modernize 2015-02-24 11:08:00 +01:00
25ac63ed71 [rtve] Extract subtitles 2015-02-23 23:04:07 +01:00
99209c2916 [youtube] Extract UL playlists as mixes (Closes #5040) 2015-02-24 01:35:15 +06:00
1fbaa0a521 [laola1tv] Use raw strings for regular expressions
Oops
2015-02-23 20:51:30 +02:00
3037b91e05 [laola1tv] Improve extraction and update test case (#3742) 2015-02-23 20:45:52 +02:00
ffdf972b91 [facebook] Extract all the formats (closes #5037) 2015-02-23 18:54:15 +01:00
459e5fbd5f release 2015.02.23.1 2015-02-23 18:17:39 +01:00
bfc993cc91 Merge branch 'subtitles-rework'
(Closes PR #4964)
2015-02-23 17:13:03 +01:00
4432db35d9 [gdcvault] Restore akamai host for rtmp videos 2015-02-23 21:59:11 +06:00
591ab1dff9 [soundgasm] PEP8 2015-02-23 16:51:21 +01:00
5bca2424bc [gdcvault] Remove dead code 2015-02-23 16:51:09 +01:00
bd61a9e770 release 2015.02.23 2015-02-23 16:47:19 +01:00
3438e7acd2 [soundgasm] Remove unused import 2015-02-23 21:40:50 +06:00
09c200acf2 Credit @skypher for chirbit and soundgasm:profile (#5032) 2015-02-23 21:31:57 +06:00
716889cab1 Merge branch 'skypher-chirbit' 2015-02-23 21:30:46 +06:00
409693984f [soundgasm:profile] Fix _VALID_URL 2015-02-23 21:30:30 +06:00
04e8c11080 [chirbit] Clarify extractors' IE_NAMEs 2015-02-23 21:28:14 +06:00
80af2b73ab [soundgasm] Clarify extractors' IE_NAMEs 2015-02-23 21:27:56 +06:00
3cc57f9645 [soundgasm:profile] Simplify 2015-02-23 21:27:24 +06:00
a65d4e7f14 [chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 21:15:16 +06:00
b531cfc019 [YoutubeDL] remove compatiblity with the old subtitles system 2015-02-23 16:12:35 +01:00
543ec2136b Merge branch 'chirbit' of https://github.com/skypher/youtube-dl into skypher-chirbit 2015-02-23 19:21:25 +06:00
93b5071f73 [soundgasm] add profile IE. 2015-02-23 12:11:19 +01:00
ddc369f073 [chirbit] fix profile downloader regex. 2015-02-23 12:00:43 +01:00
1b40dc92eb [airmozilla] Add new extractor 2015-02-23 16:10:08 +13:00
fcc3e6138b [r7] Add extractor (Closes #4405, closes #5004) 2015-02-23 03:32:53 +06:00
9fe6ef7ab2 [extractor/common] Fix preference for m3u8 quality selection URL 2015-02-23 03:30:10 +06:00
c010af6f19 [escapist] Make regexes more robust (Closes #5028) 2015-02-22 23:11:33 +06:00
35b7982303 [appletrailers] Add test (#5027) 2015-02-22 19:58:39 +06:00
f311cfa231 [appletrailers] Extend _VALID_URL (Closes #5027) 2015-02-22 19:53:32 +06:00
80970e531b [test/subtitles] Update checksum for Viki 2015-02-22 11:52:22 +01:00
b7bb76df05 [test/subtitles] Remove some tests
Test only with 'allsubtitles', the language selection is already tested in test_YoutubeDL.py
2015-02-22 11:51:57 +01:00
98c70d6fc7 [YoutubeDL] only add normal subtitles to the 'requested_subtitles' field if 'writesubtitles' is True 2015-02-22 11:37:27 +01:00
ab84349b16 [test/YoutubeDL] Add test for subtitles
Updated the offlinetest make target to not skip it
2015-02-22 11:29:56 +01:00
03091e372f [ted] Always extract the subtitles
The required info is already in the webpage
2015-02-22 00:27:59 +01:00
4d17184817 [YoutubeDL] don't set the 'requested_subtitles' without writesubtitles or writeautomaticsub 2015-02-22 00:27:49 +01:00
e086e0eb6c release 2015.02.21 2015-02-21 21:25:29 +01:00
314368c822 [teamcoco] Fix extraction
Also, use a single style of quotes
2015-02-21 22:19:39 +02:00
c5181ab410 [gdcvault] Fix rtmp streams (Closes #5024) 2015-02-22 02:10:26 +06:00
ea5152cae1 [zapiks] Extend _VALID_URL (#5014) 2015-02-22 01:42:47 +06:00
255fca5eea [generic] Add support for Zapiks embeds (#5014) 2015-02-22 01:39:26 +06:00
4aeccadf4e [zapiks] Add extractor (#5014) 2015-02-22 01:38:57 +06:00
93540ee10e [rtve] Fix the video url
Changing mvod to mvod1 fixes the url, we don't need to add the query.
2015-02-21 19:31:39 +01:00
8fb3ac3649 PEP8: W503 2015-02-21 14:55:13 +01:00
77b2986b5b [extractor/common] Recognize Indian censorship (#5021) 2015-02-21 14:51:07 +01:00
62b013df0d [vimeo] Encode password before hash calculation 2015-02-21 18:31:10 +06:00
fad6768bd1 [vimeo] Fix password protected videos (Closes #5001) 2015-02-21 18:00:25 +06:00
a78125f925 Merge pull request #5019 from cyberjacob/master
Change example URLs in readme (fixes #5018)
2015-02-20 23:56:56 +01:00
a00a8bcc8a Change example URLs in readme (fixes #5018y) 2015-02-20 22:43:51 +00:00
1e9a9e167d release 2015.02.20 2015-02-20 23:23:12 +01:00
3da0db62e6 [escapist] Fix extraction (fixes #5017) 2015-02-20 23:22:47 +01:00
e14ced7918 Merge branch 'master' of github.com:rg3/youtube-dl 2015-02-20 23:20:14 +01:00
ab9d02f53b Merge branch 'minusf-TED_code' 2015-02-21 00:14:51 +06:00
a461a11989 [ted] Improve external video handling and add test 2015-02-21 00:14:38 +06:00
1bd838608f prefer 'code' to 'uri' if present 2015-02-20 18:24:20 +01:00
365577f567 [chirbit] add profile extractor. 2015-02-20 14:48:12 +01:00
50efb383f0 [tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 14:13:41 +02:00
5da6bd0083 [chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00
5e9a033e6e [imgur] Allow alternative values
Every now and then, imgur.com goes crazy and gives us a generic title and description (otherwise it looks all fine though).
Simply update the test case to allow for that craziness.
2015-02-20 02:52:03 +01:00
fb7cb6823e Remove the SubtitlesInfoExtractor class
No longer needed
2015-02-19 23:24:24 +01:00
dd0a58f5f0 [blinkx] Fix extraction 2015-02-20 01:19:38 +06:00
a21420389e release 2015.02.19.3 2015-02-19 19:28:17 +01:00
6140baf4e1 [nationalgeographic] Add extractor (closes #4960) 2015-02-19 18:17:31 +01:00
8fc642eb5b [pornhub] Fix uploader regex 2015-02-19 22:15:49 +06:00
e66e1a0046 [pornhub] Add support for playlists (Closes #4995) 2015-02-19 22:15:19 +06:00
d5c69f1da4 [5min] Cover joystiq.com URLs (Closes #4962) 2015-02-19 21:47:11 +06:00
f13b1e7d7f [bbccouk] Convert to new subtitles system
I haven't found any video available outside the UK, so I haven't added tests.

I have updated how the srt file is build, because (at least for www.bbc.co.uk/programmes/p02j9b69) the subtitles is inside 'span' elements.
2015-02-19 16:46:41 +01:00
5c8a3f862a [nbc] Use a test video that works outside the US 2015-02-19 15:00:39 +01:00
8807f1277f [theplatform] Convert to new subtitles system 2015-02-19 14:54:50 +01:00
a3b9157f49 [cbssports] Add extractor (closes #4996) 2015-02-19 13:06:53 +01:00
b88ba05356 [imgur] Simplify 2015-02-19 05:53:09 +01:00
b74d505577 Merge remote-tracking branch 'jbboehr/imgur-gifv-improvements' 2015-02-19 05:16:11 +01:00
9e2d7dca87 [imgur] improve error check for non-video URLs 2015-02-18 19:47:54 -08:00
d236b37ac9 [imgur] improve regex #4998 2015-02-18 19:28:19 -08:00
e880c66bd8 [theonion] Modernize 2015-02-19 04:12:40 +01:00
383456aa29 [Makefile] Also delete *.avi files in clean 2015-02-19 04:09:52 +01:00
1a13940c8d [imgur] support regular URL 2015-02-18 18:12:48 -08:00
3d54788495 [webofstories] Fix extraction 2015-02-19 02:12:08 +01:00
71d53ace2f [sockshare] Do not require thumbnail anymore
Thumbnail is not present on the website anymore.
2015-02-19 02:04:30 +01:00
f37e3f99f0 [generic] Correct test case
Video has been reuploaded / edited
2015-02-19 02:00:52 +01:00
bd03ffc16e [netzkino] Skip download in test case
Works fine from Germany, but fails from everywhere else
2015-02-19 01:58:54 +01:00
1ac1af9b47 release 2015.02.19.2 2015-02-19 01:43:28 +01:00
3bf5705316 [imgur] Add new extractor 2015-02-19 01:43:20 +01:00
1c2528c8a3 [cbs] Modernize 2015-02-19 01:22:50 +01:00
7bd15b1a03 release 2015.02.19.1 2015-02-19 01:04:24 +01:00
6b961a85fd [patreon] Add support for embedlies (fixes #4969) 2015-02-19 01:04:19 +01:00
7707004043 [patreon] Modernize 2015-02-19 00:38:05 +01:00
a025d3c5a5 release 2015.02.19 2015-02-19 00:31:23 +01:00
c460bdd56b [sandia] Add new extractor (#4974) 2015-02-19 00:31:01 +01:00
b81a359eb6 [YoutubeDL] Use render_table for format listing 2015-02-19 00:28:58 +01:00
d61aefb24c Merge remote-tracking branch 'origin/master' 2015-02-19 00:01:14 +01:00
d305dd73a3 [utils] Fix js_to_json
Previously, the runtime could be atrocious for longer inputs.
2015-02-18 23:59:51 +01:00
93a16ba238 [vimeo] Raise the ExtractorError with expected=True when no video password is given 2015-02-18 22:00:12 +01:00
4f7cea6c53 [viki] Convert to new subtitles system 2015-02-18 20:37:16 +01:00
afbdd3acc3 [rai] Convert to new subtitles system 2015-02-18 20:14:42 +01:00
85d5866177 [yahoo] Remove md5sum from test case
The md5 sum has changed repeatedly, and we check whether it looks like a video anyways nowadays.
2015-02-18 20:03:04 +01:00
9789d7535d [xtube] Fix test case 2015-02-18 19:58:41 +01:00
d8443cd3f7 [wsj] Correct test case 2015-02-18 19:56:24 +01:00
d47c26e168 [brightcove] Correct keys in playlists 2015-02-18 19:56:10 +01:00
01561da142 [nrk] Convert to new subtitles system 2015-02-18 18:57:01 +01:00
0af25f784b [mtv] Convert to new subtitles system 2015-02-18 18:27:45 +01:00
b9b42f2ea0 [npo] Convert to new subtitles system 2015-02-18 17:57:10 +01:00
311c393838 [lynda] Convert to new subtitles system 2015-02-18 17:46:33 +01:00
18c1c42405 [drtv] Convert to new subtitles system 2015-02-18 17:20:22 +01:00
37dd5d4629 [mit] Don't set the subtitles field
YouTube already provides them in more formats
2015-02-18 17:19:56 +01:00
81975f4693 release 2015.02.18.1 2015-02-18 10:54:56 +01:00
b8b928d5cb [README] Add an FAQ entry for the player change in anticipation of many more bug reports 2015-02-18 10:54:45 +01:00
3eff81fbf7 [jsinterp] Disable comment support
We need a proper lexer to be able to understand YouTube's code, which contains /* inside of strings.
For now it's sufficient to just disable comment support altogether.

Fixes #4976, fixes #4979, fixes #4980, fixes #4981, fixes #4982.
Closes #4977.
2015-02-18 10:47:42 +01:00
785521bf4f [youtube] Remove useless if 2015-02-18 10:42:23 +01:00
6d1a55a521 [youtube] Show entire player URL when -v is given 2015-02-18 10:39:14 +01:00
9cad27008b release 2015.02.18 2015-02-18 00:49:34 +01:00
11e611a7fa Extend various playlist tests 2015-02-18 00:49:10 +01:00
72c1f8de06 [bandcamp:album] Fix extractor results and associated test 2015-02-18 00:48:52 +01:00
6e99868e4c [buzzfeed] Fix playlist test case 2015-02-18 00:41:45 +01:00
4d278fde64 [ign] Amend playlist test 2015-02-18 00:38:55 +01:00
f21e915fb9 [test/helper] Render info_dict with a final comma 2015-02-18 00:38:42 +01:00
6f53c63df6 [test/helper] Only output a newline for forgotten keys if keys are really missing 2015-02-18 00:37:54 +01:00
1def5f359e [livestream] Correct playlist ID and add a test for it 2015-02-18 00:34:45 +01:00
15ec669374 [vk] Amend playlist test 2015-02-18 00:33:41 +01:00
a3fa5da496 [vimeo] Amend playlist tests 2015-02-18 00:33:31 +01:00
30965ac66a [vimeo] Prevent infinite loops if video password verification fails
We're seeing this in the tests¹ right now, which do not terminate.

¹  https://travis-ci.org/jaimeMF/youtube-dl/jobs/51135858
2015-02-18 00:27:58 +01:00
09ab40b7d1 Merge branch 'progress-as-hook2' 2015-02-17 23:41:48 +01:00
edab9dbf4d [YoutubeDL] use the 'render_table' function for listing the subtitles 2015-02-17 22:59:19 +01:00
9868ea4936 [extractor/common] Simplify subtitles handling methods
Initially I was going to use a single method for handling both subtitles and automatic captions, that's why I used the 'list_subtitles' and the 'subtitles' variables.
2015-02-17 22:16:29 +01:00
85920dd01d [bliptv] Convert to new subtitles system 2015-02-17 21:56:25 +01:00
fa15607773 PEP8 fixes 2015-02-17 21:46:20 +01:00
a91a2c1a83 [downloader] Remove various unneeded assignments and imports 2015-02-17 21:44:41 +01:00
16e7711e22 [downloader/http] Remove gruesome import 2015-02-17 21:42:31 +01:00
5cda4eda72 [YoutubeDL] Use a progress hook for progress reporting
Instead of every downloader calling two helper functions, let our progress report be an ordinary progress hook like everyone else's.
Closes #4875.
2015-02-17 21:40:35 +01:00
98f000409f [radio.de] Fix extraction 2015-02-17 21:40:09 +01:00
bd7fe0cf66 [walla] Convert to new subtitles system 2015-02-17 21:23:09 +01:00
48246541da [ceskatelevize] Convert to new subtitles system 2015-02-17 21:17:47 +01:00
4a8d4a53b1 [videolecturesnet] Fix rtmp stream glitches (Closes #4968) 2015-02-18 01:16:49 +06:00
4cd95bcbc3 [twitch:stream] Prefer the 'source' format (fixes #4972) 2015-02-17 18:57:01 +01:00
be24c8697f release 2015.02.17.2 2015-02-17 17:38:31 +01:00
0d93378887 [videolecturesnet] Check http format URLs (Closes #4968) 2015-02-17 22:35:27 +06:00
4069766c52 [extractor/common] Test URLs with GET 2015-02-17 22:35:27 +06:00
7010577720 release 2015.02.17.1 2015-02-17 17:35:08 +01:00
8ac27a68e6 [hls] Switch to available as a property 2015-02-17 17:35:03 +01:00
46312e0b46 release 2015.02.17 2015-02-17 17:29:32 +01:00
f9216ed6ad Merge remote-tracking branch 'origin/master' 2015-02-17 17:28:51 +01:00
65bf37ef83 [ffmpeg] Remove trivial helper method 2015-02-17 17:27:29 +01:00
f740fae2a4 [ffmpeg] Make available a property 2015-02-17 17:26:41 +01:00
fbc503d696 [downloader/hls] Fix detection of ffmpeg/avconv (reported in #4966) 2015-02-17 16:40:42 +01:00
662435f728 [YoutubeDL] Use a Request object for getting the cookies (fixes #4970)
So that we don't have to implement all the methods used by the cookiejar.
2015-02-17 16:29:24 +01:00
163d966707 [downloader/external] curl: Add the '--location' flag
curl doesn't follow redirections by default
2015-02-17 16:21:02 +01:00
85729c51af [downloader] Add --hls-prefer-native to use the native HLS downloader (#4966) 2015-02-17 12:09:12 +01:00
360e1ca5cc [youtube] Convert to new subtitles system
The automatic captions are stored in the 'automactic_captions' field, which is used if no normal subtitles are found for an specific language.
2015-02-16 22:47:39 +01:00
a1f2a06b34 [dailymotion] Convert to new subtitles system 2015-02-16 21:51:08 +01:00
c84dd8a90d [YoutubeDL] store the subtitles to download in the 'requested_subtitles' field
We need to keep the orginal subtitles information, so that the '--load-info' option can be used to list or select the subtitles again.
We'll also be able to have a separate field for storing the automatic captions info.
2015-02-16 21:51:08 +01:00
65469a7f8b [vimeo] Convert to new subtitles system
Removed some tests, the behaviour should be checked in a test for the YoutubeDL class
2015-02-16 21:51:07 +01:00
6b597516c1 [atresplayer] Convert to new subtitles system 2015-02-16 21:51:07 +01:00
b5857f62e2 [crunchyroll] Convert to new subtitles system 2015-02-16 21:51:07 +01:00
a504ced097 Improve subtitles support
For each language the extractor builds a list with the available formats sorted (like for video formats), then YoutubeDL selects one of them using the '--sub-format' option which now allows giving the format preferences (for example 'ass/srt/best').
For each format the 'url' field can be set so that we only download the contents if needed, or if the contents needs to be processed (like in crunchyroll) the 'data' field can be used.

The reasons for this change are:
* We weren't checking that the format given with '--sub-format' was available, checking it in each extractor would be repetitive.
* It allows to easily support giving a format preference.
* The subtitles were automatically downloaded in the extractor, but I think that if you use for example the '--dump-json' option you want to finish as fast as possible.

Currently only the ted extractor has been updated, but the old system still works.
2015-02-16 21:51:03 +01:00
1db5fbcfe3 release 2015.02.16.1 2015-02-16 15:47:13 +01:00
59b8ab5834 [rtlnl|generic] Add support for rtl.nl embeds (Fixes #4959) 2015-02-16 15:45:45 +01:00
a568180441 release 2015.02.16 2015-02-16 04:51:20 +01:00
85e80f71cd [yam] Allow faults in optional fields (#4943) 2015-02-16 04:50:57 +01:00
bfa6bdcd8b Merge remote-tracking branch 'yan12125/IE_Yam' 2015-02-16 04:44:28 +01:00
03cd72b007 [extractor/common] Move up filesize
filesize and tbr should correlate, so it doesn't make sense to treat them differently.
2015-02-16 04:39:22 +01:00
5bfd430f81 Merge remote-tracking branch 'origin/master' 2015-02-16 04:09:10 +01:00
73fac4e911 [ffmpeg] Add --ffmpeg-location 2015-02-16 04:05:53 +01:00
8fb474fb17 [test/subtitles] Fix some tests
The checksym for the CeskaTelevize subtitles has changed again, so we just test that it has a reasonable length.
2015-02-15 15:01:07 +01:00
f813928e4b [bbccouk] Fix fallback to legacy playlist 2015-02-15 16:32:38 +06:00
b9c7a97318 [history] Add extractor (Closes #4934) 2015-02-15 04:57:52 +06:00
9fb2f1cd6d [theplatform] Add URL sign capability 2015-02-15 04:56:12 +06:00
6ca7732d5e [extractor/common] Fix link to external documentation 2015-02-14 22:20:24 +01:00
b0ab0fac49 Remove unused imports 2015-02-14 22:19:58 +01:00
a294bce82f [streamcz] Fix extraction (Closes #4940) 2015-02-14 17:48:04 +02:00
76d1466b08 [drtuber] Add one more title regex 2015-02-14 18:50:13 +06:00
1888d3f7b3 Merge pull request #4951 from peugeot/beeg
[beeg] fix test
2015-02-14 18:46:49 +06:00
c2787701cc Merge pull request #4950 from peugeot/drtuber
[drtuber] fix extraction
2015-02-14 18:46:43 +06:00
52e1d0ccc4 [beeg] fix test 2015-02-14 13:42:42 +01:00
10e3c4c221 [drtuber] fix extraction 2015-02-14 13:40:35 +01:00
68f2d273bf [sunporno] Keep old video regex just in case 2015-02-14 18:33:52 +06:00
7c86c21662 Merge pull request #4949 from peugeot/sunporno
[sunporno] fix extraction
2015-02-14 18:32:18 +06:00
ae1580d790 [sunporno] fix extraction 2015-02-14 13:29:44 +01:00
3215c50f25 Credit @ryandesign for nbcnews nightly news (#4948) 2015-02-14 17:44:24 +06:00
36f73e8044 Merge branch 'ryandesign-nbc-nightly-news' 2015-02-14 17:42:32 +06:00
a4f3d779db [nbcnews] Simplify 2015-02-14 17:42:12 +06:00
d9aa2b784d Support NBC Nightly News broadcasts 2015-02-14 04:10:23 -06:00
cffcbc02de [postprocessor/ffmpeg] Don't let ffmpeg read from stdin (fixes #4945)
If you run 'while read aurl ; do youtube-dl --extract-audio "${aurl}"; done < path_to_batch_file'  (batch_file contains one url per line) each call to youtube-dl consumed some characters and 'read' would assing to 'aurl' a non valid url, something like 'tube.com/watch?v=<id>'.
2015-02-13 22:25:34 +01:00
9347fddbfc [1tv] Cover arbitraty URLs 2015-02-14 02:04:28 +06:00
037e9437e4 [camdemy] Fix _VALID_URL 2015-02-13 20:10:42 +06:00
36e7a4ca2e [test/subtitles] Update checksums 2015-02-13 14:43:50 +01:00
ae6423d704 [bambuser] Fix 'uploader_id' extraction (fixes #4944) 2015-02-13 11:36:33 +01:00
7105440cec [Yam] Add new extractor 2015-02-13 15:14:23 +08:00
c80b9cd280 Merge branch 'robin007bond-nporadio' 2015-02-13 01:37:27 +06:00
171ca612af [npo:radio] Move to extractor to common npo place and add extractor for fragments 2015-02-13 01:36:54 +06:00
c3d64fc1b3 [nporadio] Edit to confirm to flake8 standards 2015-02-12 19:28:58 +01:00
7c24ce225d [NPORadio] Added extractor for live radio 2015-02-12 19:19:55 +01:00
08b38d5401 [camdemy] Simplify and make more robust (#4938)
Do not throw errors if view count or upload date extraction fails.
Dispose of re.MULTILINE, which had absolutely no effect without any ^ or $ in sight.
Follow PEP8 naming conventions.
2015-02-12 08:55:06 +01:00
024c53694d Merge remote-tracking branch 'yan12125/IE_camdemy' 2015-02-12 08:44:39 +01:00
7e6011101f [camdemy] Python2 compatibility 2015-02-12 14:23:25 +08:00
c40feaba77 [camdemy] Add support for folders 2015-02-12 14:13:19 +08:00
5277f09dfc release 2015.02.11 2015-02-11 19:02:39 +01:00
2d30521ab9 [youtube] Extract average rating (closes #2362) 2015-02-11 18:39:31 +01:00
050fa43561 flake8: Ignore some error added in pep8 1.6
* E402: we exectute code between imports, like modifying 'sys.path' in the tests
* E731: we assign to lambdas in a lot of places, we may want to consider defining functions in a single line instead (what pep8 recommends)
2015-02-11 18:15:15 +01:00
f36f92f4da [aes] style: Put __all__ variable at the end of the file 2015-02-11 18:15:15 +01:00
124f3bc67d [dotsub] Fix extraction and modernize 2015-02-11 22:33:03 +06:00
d304209a85 [test/parameters.json] Set 'fixup' to 'never'
The fixed audio files for Youtube have a size lower that the minimum required.
2015-02-11 17:25:04 +01:00
8367d3f3cb [camdemy] Detection of external sources 2015-02-12 00:11:33 +08:00
c56d7d899d [dctptv] Skip rtmp download 2015-02-11 22:10:33 +06:00
ea5db8469e [canalplus] Add support for itele.fr URLs (Closes #4931) 2015-02-11 16:21:52 +02:00
3811c567e7 [teamcoco] Fix video id extraction 2015-02-11 15:47:19 +02:00
8708d76425 [camdemy] Add new extractor
Single file download done, while folder extaction in plan
2015-02-11 16:39:15 +08:00
054fe3cc40 [ntvru] Adapt to new direct delivery and modernize (Closes #4918) 2015-02-10 21:35:34 +06:00
af0d11f244 release 2015.02.10.5 2015-02-10 15:56:04 +01:00
9650885be9 [escapist] Filter video differently (Fixes #4919) 2015-02-10 15:55:51 +01:00
596ac6e31f [escapist] Modernize 2015-02-10 15:45:36 +01:00
612ee37365 release 2015.02.10.4 2015-02-10 11:28:34 +01:00
442c37b7a9 [YoutubeDL] Do not perform filter matching on partial results (Fixes #4921) 2015-02-10 11:28:28 +01:00
04bbe41330 release 2015.02.10.3 2015-02-10 05:42:47 +01:00
8f84f57183 [ccc] Add new extractor (Fixes #4890) 2015-02-10 05:42:41 +01:00
6a78740211 [test/test_youtube_signature] Use fake YDL 2015-02-10 05:28:59 +01:00
c0e1a415fd [firstpost] Modernize 2015-02-10 05:28:48 +01:00
bf8f082a90 [vimeo:album] Add support for album passwords (Fixes #4917) 2015-02-10 04:53:21 +01:00
2f543a2142 [options] Add alias --dump-header for --print-traffic 2015-02-10 04:52:33 +01:00
7e5db8c930 [options] Add --no-color 2015-02-10 04:22:10 +01:00
f7a211dcc8 [pornhd] Fix extraction (fixes #4915) 2015-02-10 03:41:31 +01:00
845734773d release 2015.02.10.2 2015-02-10 03:32:55 +01:00
347de4931c [YoutubeDL] Add generic video filtering (Fixes #4916)
This functionality is intended to eventually encompass the current format filtering.
2015-02-10 03:32:24 +01:00
8829650513 release 2015.02.10.1 2015-02-10 01:46:09 +01:00
c73fae1e2e [commonmistakes] Detect BOMs at the beginning of URLs
Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 .
2015-02-10 01:40:55 +01:00
834bf069d2 [bandcamp] Correct variable name 2015-02-10 01:37:14 +01:00
c06a9fa34f Use snake_case instead of camelCase 2015-02-10 01:36:38 +01:00
753fad4adc [commonmistakes] Correct logic error 2015-02-10 01:34:01 +01:00
34814eb66e release 2015.02.10 2015-02-10 01:19:52 +01:00
3a5bcd0326 [extractor/common] Wrap extractor errors (Fixes #1194)
For now, we just wrap some common errors. More may follow. We do not want to catch actual programming errors in the extractors, such as 1 // 0.
2015-02-10 01:17:23 +01:00
99c2398bc6 [bandcamp] Use our API to get more stable error messages (#1194) 2015-02-09 19:08:51 +01:00
28f1272870 [svtplay] Correct test case 2015-02-09 16:05:01 +01:00
f18e3a2fc0 release 2015.02.09.3 2015-02-09 15:59:19 +01:00
c4c5dc27cb Merge branch 'master' of github.com:rg3/youtube-dl 2015-02-09 15:59:14 +01:00
2caf182f37 [trilulilu] Add support for videos without category in the URL (Closes #4067)
Also, update the testcase, detect private/country restricted videos and modernize a bit.
2015-02-09 17:00:05 +02:00
43f244b6d5 [YoutubeDL] Do not show worst in --list-formats output
Nobody wants to know what the worst possible format is. And if they do, they can still provide -f worst.
2015-02-09 15:57:42 +01:00
1309b396d0 [svtplay] Add new extractor (Fixes #4914) 2015-02-09 15:56:59 +01:00
ba61796458 [youtube] Don't override format info from the dash manifest (fixes #4911) 2015-02-09 15:04:22 +01:00
3255fe7141 release 2015.02.09.2 2015-02-09 14:46:30 +01:00
e98b8e79ea [generic] Improve SBS detection (Fixes #4899) 2015-02-09 14:46:10 +01:00
196121c51b release 2015.02.09.1 2015-02-09 10:49:10 +01:00
5269028951 [rtlnow] Add test for @mmue's extension (#4908) 2015-02-09 10:47:19 +01:00
f7bc056b5a Merge remote-tracking branch 'mmue/fix-rtlnow' 2015-02-09 10:44:55 +01:00
a0f7198544 [generic] Add support for jwPlayer YouTube videos
This makes nationalarchives.gov.uk work (Fixes #4907, fixes #4876)
2015-02-09 10:43:01 +01:00
dd8930684e release 2015.02.09 2015-02-09 10:28:16 +01:00
bdb186f3b0 fix rtlnow for newer series like "Der Bachelor" season 5 2015-02-08 21:55:39 +01:00
64f9baa084 [options] Mention asr as possible filter 2015-02-09 01:35:16 +06:00
b29231c040 release 2015.02.08 2015-02-08 20:28:38 +01:00
6128bf07a9 [options] Update help on string comparisons 2015-02-09 01:27:27 +06:00
2ec19e9558 [YoutubeDL] Allow filtering by audio sampling rate 2015-02-09 01:09:45 +06:00
9ddb6925bf [YoutubeDL] Allow filtering by string properties (#4906) 2015-02-09 01:07:43 +06:00
12931e1c6e Credit @robin007bond for tweakers (#4881) and gamekings fixes (#4901) 2015-02-08 23:33:29 +06:00
41c23b0da5 [gamekings] Support videos from news pages 2015-02-08 23:12:59 +06:00
2578ab19e4 Merge branch 'robin007bond-gamekings' 2015-02-08 23:03:31 +06:00
d87ec897e9 [gamekings] Improve extraction 2015-02-08 23:03:12 +06:00
3bd4bffb1c Merge branch 'gamekings' of https://github.com/robin007bond/youtube-dl into robin007bond-gamekings 2015-02-08 22:46:43 +06:00
c36b09a502 [Gamekings] Use thumbnail in return statement 2015-02-08 16:46:13 +01:00
641eb10d34 Use _family_friendly_search for determining age_limit 2015-02-08 17:45:38 +02:00
955c5505e7 [Gamekings] Use xpath
XPath is used for extracting the video url and the thumbnail
2015-02-08 16:44:25 +01:00
69319969de [extractor/common] Add new helper method _family_friendly_search 2015-02-08 17:39:00 +02:00
a14292e848 [soulanime] Remove extractor (#4554)
Was supposed to be deleted by 67c2bcd
2015-02-08 16:57:07 +02:00
5d678df64a [Gamekings] Download playlist
Todo: URL and Thumbnail should be extracted with XPath
2015-02-08 15:34:37 +01:00
8ca8cbe2bd [Gamekings] Check string for vimeo, fix test
The test now doesn't fail anymore. It just checks the string for having
"vimeo" in it, instead of using the method for URL-checking, since it's
returns an error.

The tests don't fail, and the extractor works fine now.
2015-02-08 14:41:14 +01:00
ba322d8209 [Gamekings] Added test and replaced video_url
Quick and dirty fix for the Gamekings extractor. It gives an error about
the video_url, but it downloads it now instead of giving a 404 error on
newer Gamekings videos
2015-02-08 14:23:37 +01:00
2f38289b79 [Gamekings] Fix order of replacement string
Oops.
2015-02-08 13:49:32 +01:00
f23a3ca699 [Gamekings] Fixed typo in URL replacement 2015-02-08 13:47:27 +01:00
77d2b106cc [Gamekings] Fix 404 when large isn't available
When trying to download some GameKings videos, not all worked. This was
because not all videos had a "/large"-URL available. The extractor
checks now if the /large URL is available, if it isn't, it tries to get
the normal URL.
2015-02-08 13:42:41 +01:00
c0e46412e9 [aparat] Fix extraction (Closes #4897) 2015-02-08 17:30:29 +06:00
0161353d7d [test/test_YoutubeDL] Remove debug print call 2015-02-06 23:58:01 +01:00
2b4ecde2c8 [test/YoutubeDL] Add a simple test for postprocesors
Just checks that the 'keepvideo' option works as intended.
2015-02-06 23:54:25 +01:00
b3a286d69d [YoutubeDL] _calc_cookies: add get_header method to _PseudoRequest (#4861) 2015-02-06 22:23:06 +01:00
467d3c9a0c [ffmpeg] --extrac-audio: Use the same options for avconv and ffmpeg
They have been available in ffmpeg since version 0.9, and we require 1.0 or higher.
2015-02-06 22:05:11 +01:00
ad5747bad1 [rtp] Construct regular HTTP download URLs (#4882) 2015-02-06 23:00:54 +02:00
d6eb66ed3c [aftenposten] Add extractor (Closes #4863) 2015-02-07 01:46:54 +06:00
7f2a9f1b49 [tvigle] Add support for cloud URLs (Closes #4887) 2015-02-06 21:15:01 +06:00
1e1896f2de [extractor/common] Correct sort order.
We should look at height and width before ext_preference.
2015-02-06 15:16:45 +01:00
c831973366 release 2015.02.06 2015-02-06 14:38:30 +01:00
1a2548d9e9 [rtp] Pass --realtime to rtmpdump (Fixes #4882)
A workaround for video jumping back in time.
2015-02-06 13:44:46 +02:00
3900eec27c [extractor/common] Fix 2.0 manifest extraction (Closes #4830) 2015-02-06 04:29:29 +06:00
a02d212638 Merge branch 'robin007bond-tweakers' 2015-02-06 03:23:56 +06:00
9c91a8fa70 [tweakers] Switch extraction to xspf playlist, extract all formats and meta (#4881) 2015-02-06 03:23:42 +06:00
41469f335e Merge branch 'tweakers' of https://github.com/robin007bond/youtube-dl into robin007bond-tweakers 2015-02-06 02:59:33 +06:00
67ce4f8820 Use match_id method instead of splitted URL 2015-02-05 21:49:13 +01:00
bc63d56cca Remove unnecessary TODO-comments 2015-02-05 21:40:18 +01:00
c893d70805 Remove player-url in tweakers.py
Player-url only needed for rmftp, not for regular URLs
2015-02-05 21:38:35 +01:00
3ee6e02564 Edit Tweakers extractor
Fixed code conventions (mainly adding two or more spaces before making
an inline comment)
2015-02-05 19:59:36 +01:00
e3aaace400 [tweakers] Add new extractor 2015-02-05 19:55:41 +01:00
300753a069 [YoutubeDL] Fix video+audio format field (Closes #4880) 2015-02-06 00:51:16 +06:00
f13b88c616 [rts] Fix f4m and m3u8 extraction (Closes #4873) 2015-02-05 22:17:50 +06:00
60ca389c64 [extractor/common] Prefix f4m/m3u8 entries with identifier 2015-02-05 22:16:27 +06:00
1b0f3919c1 Merge branch 'Frenzie-npo' 2015-02-05 20:15:13 +06:00
6a348cf7d5 Credit @Frenzie for npo subtitles (#4878) 2015-02-05 20:14:56 +06:00
9e91449c8d [npo] Fix subtitles (Closes #3638) 2015-02-05 20:13:28 +06:00
25e5ebf382 Add NPO.nl subtitles
Implements #3638
2015-02-05 12:51:33 +01:00
7dfc356625 release 2015.02.04 2015-02-04 16:09:35 +01:00
58ba6c0160 [mixcloud] Fix extraction (Closes #4862) 2015-02-04 19:47:55 +06:00
f076b63821 [generic/ooyala] Add support for Ooyala embeds on SBN network websites (Fixes #4859) 2015-02-04 15:33:37 +02:00
12f0454cd6 [README] Add an FAQ entry about anime sites 2015-02-03 14:18:15 +01:00
cd7342755f release 2015.02.03.1 2015-02-03 10:59:27 +01:00
9bb8e0a3f9 [wsj] Add new extractor (Fixes #4854) 2015-02-03 10:58:28 +01:00
1a6373ef39 [sort_formats] Prefer bitrate over video size
720p @ 1000KB/s looks way better than 1080p @ 500KB/s
2015-02-03 10:53:07 +01:00
f6c24009be [YoutubeDL] Calculate thumbnail IDs automatically 2015-02-03 10:52:22 +01:00
d862042301 [aftonbladet] Modernize 2015-02-03 10:18:32 +01:00
23d9ded655 [franceculture] Rewrite for new HTML scheme (Fixes #4853) 2015-02-03 10:17:13 +01:00
4c1a017e69 release 2015.02.03 2015-02-03 00:22:52 +01:00
ee623d9247 [descripts/release] Regenerate auxiliary documentation on build as well 2015-02-03 00:22:17 +01:00
330537d08a [README] typo 2015-02-03 00:20:57 +01:00
2cf0ecac7b [ffmpeg] --add-metadata: Set comment and purl fields (Fixes #4847) 2015-02-03 00:16:45 +01:00
d200b11c7e [Makefile] Simplify clean/cleanall 2015-02-03 00:14:42 +01:00
d0eca21021 release 2015.02.02.5 2015-02-02 23:47:19 +01:00
c1147c05e1 [brightcove] Fix up more generically invalid XML (Fixes #4849) 2015-02-02 23:47:14 +01:00
55898ad2cf release 2015.02.02.4 2015-02-02 23:39:03 +01:00
a465808592 Merge branch 'master' of github.com:rg3/youtube-dl 2015-02-02 23:38:54 +01:00
5c4862bad4 [normalboots] Remove unused import 2015-02-02 23:38:45 +01:00
995029a142 [nerdist] Add new extractor (Fixes #4851) 2015-02-02 23:38:35 +01:00
a57b562cff [nfl] Add support for articles pages (fixes #4848) 2015-02-02 23:17:00 +01:00
531572578e [normalboots] Modernize 2015-02-02 23:04:39 +01:00
3a4cca687f release 2015.02.02.3 2015-02-02 22:56:15 +01:00
7d3d06a16c [vevo] Restore SMIL support (#3656) 2015-02-02 22:48:12 +01:00
c21b1fbeeb release 2015.02.02.2 2015-02-02 21:58:58 +01:00
f920ce295e [ntvru] Remove unused import 2015-02-02 21:58:17 +01:00
7a7bd19c45 [n-tv.de] Use native m3u8 as best format 2015-02-02 21:57:48 +01:00
8f4b58d70e [ntvde] Add new extractor (Fixes #4850) 2015-02-02 21:48:54 +01:00
3fd45e03bf [ntvru] Rename from NTV to clarify the difference between n-tv.de and ntv.ru 2015-02-02 20:43:02 +01:00
199 changed files with 7886 additions and 2242 deletions

View File

@ -2,6 +2,7 @@ language: python
python:
- "2.6"
- "2.7"
- "3.2"
- "3.3"
- "3.4"
before_install:

View File

@ -108,3 +108,12 @@ Enam Mijbah Noor
David Luhmer
Shaya Goldberg
Paul Hartmann
Frans de Jonge
Robin de Rooij
Ryan Schmidt
Leslie P. Polzer
Duncan Keall
Alexander Mamay
Devin J. Pohly
Eduardo Ferro Aldama
Jeff Buchbinder

View File

@ -1,4 +1,6 @@
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
**Please include the full output of youtube-dl when run with `-v`**.
The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
@ -16,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
### Are you using the latest version?
@ -122,7 +126,7 @@ If you want to add support for a new site, you can follow this quick list (assum
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/__init__.py

View File

@ -1,10 +1,8 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
cleanall: clean
rm -f youtube-dl youtube-dl.exe
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find -name "*.pyc" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
@ -46,7 +44,7 @@ test:
ot: offlinetest
offlinetest: codetest
nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py
tar: youtube-dl.tar.gz

433
README.md
View File

@ -47,189 +47,109 @@ which means you can modify it, redistribute it or use it however you like.
# OPTIONS
-h, --help print this help text and exit
--version print program version and exit
-U, --update update this program to latest version. Make
sure that you have sufficient permissions
(run with sudo if needed)
-i, --ignore-errors continue on download errors, for example to
skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the
playlist or the command line) if an error
occurs
-U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
-i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
--dump-user-agent display the current browser identification
--list-extractors List all supported extractors and the URLs
they would handle
--extractor-descriptions Output descriptions of all supported
extractors
--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large
apple". Use the value "auto" to let
youtube-dl guess ("auto_warning" to emit a
warning when guessing). "error" just throws
an error. The default value "fixup_error"
repairs broken URLs, but emits an error if
this is not possible instead of searching.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: Do not read the user
configuration in ~/.config/youtube-
dl/config (%APPDATA%/youtube-dl/config.txt
on Windows)
--flat-playlist Do not extract the videos of a playlist,
only list them.
--list-extractors List all supported extractors and the URLs they would handle
--extractor-descriptions Output descriptions of all supported extractors
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
--ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
--flat-playlist Do not extract the videos of a playlist, only list them.
--no-color Do not emit color codes in output.
## Network Options:
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
an empty string (--proxy "") for direct
connection
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
--source-address IP Client-side IP address to bind to
(experimental)
-4, --force-ipv4 Make all connections via IPv4
(experimental)
-6, --force-ipv6 Make all connections via IPv6
(experimental)
--source-address IP Client-side IP address to bind to (experimental)
-4, --force-ipv4 Make all connections via IPv4 (experimental)
-6, --force-ipv6 Make all connections via IPv6 (experimental)
--cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
not present) is used for the actual downloading. (experimental)
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
--playlist-end NUMBER playlist video to end at (default is last)
--playlist-items ITEM_SPEC playlist video items to download. Specify
indices of the videos in the playlist
seperated by commas like: "--playlist-items
1,2,5,8" if you want to download videos
indexed 1, 2, 5, 8 in the playlist. You can
specify range: "--playlist-items
1-3,7,10-13", it will download the videos
at index 1, 2, 3, 7, 10, 11, 12 and 13.
--match-title REGEX download only matching titles (regex or
caseless sub-string)
--reject-title REGEX skip download for matching titles (regex or
caseless sub-string)
--playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
--match-title REGEX download only matching titles (regex or caseless sub-string)
--reject-title REGEX skip download for matching titles (regex or caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
--min-filesize SIZE Do not download any videos smaller than
SIZE (e.g. 50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE
(e.g. 50k or 44.6m)
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
--date DATE download only videos uploaded in this date
--datebefore DATE download only videos uploaded on or before
this date (i.e. inclusive)
--dateafter DATE download only videos uploaded on or after
this date (i.e. inclusive)
--min-views COUNT Do not download any videos with less than
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
--no-playlist If the URL refers to a video and a
playlist, download only the video.
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
archive file. Record the IDs of all
downloaded videos in it.
--include-ads Download advertisements as well
(experimental)
--datebefore DATE download only videos uploaded on or before this date (i.e. inclusive)
--dateafter DATE download only videos uploaded on or after this date (i.e. inclusive)
--min-views COUNT Do not download any videos with less than COUNT views
--max-views COUNT Do not download any videos with more than COUNT views
--match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
!key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
dislike_count <? 50 & description" .
--no-playlist If the URL refers to a video and a playlist, download only the video.
--yes-playlist If the URL refers to a video and a playlist, download the playlist.
--age-limit YEARS download only videos suitable for the given age
--download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
--include-ads Download advertisements as well (experimental)
## Download Options:
-r, --rate-limit LIMIT maximum download rate in bytes per second
(e.g. 50K or 4.2M)
-R, --retries RETRIES number of retries (default is 10), or
"infinite".
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
(default is 1024)
--no-resize-buffer do not automatically adjust the buffer
size. By default, the buffer size is
automatically resized from an initial value
of SIZE.
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M)
-R, --retries RETRIES number of retries (default is 10), or "infinite".
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
--playlist-reverse Download playlist videos in reverse order
--xattr-set-filesize (experimental) set file xattribute
ytdl.filesize with expected filesize
--external-downloader COMMAND (experimental) Use the specified external
downloader. Currently supports
aria2c,curl,wget
--xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize
--hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg.
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
--external-downloader-args ARGS Give these arguments to the external downloader.
## Filesystem Options:
-a, --batch-file FILE file containing URLs to download ('-' for
stdin)
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
--id use only video ID in file name
-o, --output TEMPLATE output filename template. Use %(title)s to
get the title, %(uploader)s for the
uploader name, %(uploader_id)s for the
uploader nickname if different,
%(autonumber)s to get an automatically
incremented number, %(ext)s for the
filename extension, %(format)s for the
format description (like "22 - 1280x720" or
"HD"), %(format_id)s for the unique id of
the format (like Youtube's itags: "137"),
%(upload_date)s for the upload date
(YYYYMMDD), %(extractor)s for the provider
(youtube, metacafe, etc), %(id)s for the
video id, %(playlist_title)s,
%(playlist_id)s, or %(playlist)s (=title if
present, ID otherwise) for the playlist the
video is in, %(playlist_index)s for the
position in the playlist. %(height)s and
%(width)s for the width and height of the
video format. %(resolution)s for a textual
description of the resolution of the video
format. %% for a literal percent. Use - to
output to stdout. Can also be used to
download to a different directory, for
example with -o '/my/downloads/%(uploader)s
/%(title)s-%(id)s.%(ext)s' .
--autonumber-size NUMBER Specifies the number of digits in
%(autonumber)s when it is present in output
filename template or --auto-number option
is given
--restrict-filenames Restrict filenames to only ASCII
characters, and avoid "&" and spaces in
filenames
-A, --auto-number [deprecated; use -o
"%(autonumber)s-%(title)s.%(ext)s" ] number
downloaded files starting from 00000
-t, --title [deprecated] use title in file name
(default)
-o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
%(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
%(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
--restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
-A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
-t, --title [deprecated] use title in file name (default)
-l, --literal [deprecated] alias of --title
-w, --no-overwrites do not overwrite files
-c, --continue force resume of partially downloaded files.
By default, youtube-dl will resume
downloads if possible.
--no-continue do not resume partially downloaded files
(restart from beginning)
--no-part do not use .part files - write directly
into output file
--no-mtime do not use the Last-modified header to set
the file modification time
--write-description write video description to a .description
file
-c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
--no-continue do not resume partially downloaded files (restart from beginning)
--no-part do not use .part files - write directly into output file
--no-mtime do not use the Last-modified header to set the file modification time
--write-description write video description to a .description file
--write-info-json write video metadata to a .info.json file
--write-annotations write video annotations to a .annotation
file
--load-info FILE json file containing the video information
(created with the "--write-json" option)
--cookies FILE file to read cookies from and dump cookie
jar in
--cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information
permanently. By default $XDG_CACHE_HOME
/youtube-dl or ~/.cache/youtube-dl . At the
moment, only YouTube player files (for
videos with obfuscated signatures) are
cached, but that may change.
--write-annotations write video annotations to a .annotation file
--load-info FILE json file containing the video information (created with the "--write-json" option)
--cookies FILE file to read cookies from and dump cookie jar in
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
change.
--no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files
## Thumbnail images:
--write-thumbnail write thumbnail image to disk
--write-all-thumbnails write all thumbnail image formats to disk
--list-thumbnails Simulate and list all available thumbnail
formats
--list-thumbnails Simulate and list all available thumbnail formats
## Verbosity / Simulation Options:
-q, --quiet activates quiet mode
--no-warnings Ignore warnings
-s, --simulate do not download the video and do not write
anything to disk
-s, --simulate do not download the video and do not write anything to disk
--skip-download do not download the video
-g, --get-url simulate, quiet but print URL
-e, --get-title simulate, quiet but print title
@ -239,148 +159,87 @@ which means you can modify it, redistribute it or use it however you like.
--get-duration simulate, quiet but print video length
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
-j, --dump-json simulate, quiet but print JSON information.
See --output for a description of available
keys.
-J, --dump-single-json simulate, quiet but print JSON information
for each command-line argument. If the URL
refers to a playlist, dump the whole
playlist information in a single line.
--print-json Be quiet and print the video information as
JSON (video is still being downloaded).
-j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys.
-J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
information in a single line.
--print-json Be quiet and print the video information as JSON (video is still being downloaded).
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar
-v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems
(very verbose)
--write-pages Write downloaded intermediary pages to
files in the current directory to debug
problems
--dump-pages print downloaded pages to debug problems (very verbose)
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
--print-traffic Display sent and read HTTP traffic
-C, --call-home Contact the youtube-dl server for
debugging.
--no-call-home Do NOT contact the youtube-dl server for
debugging.
-C, --call-home Contact the youtube-dl server for debugging.
--no-call-home Do NOT contact the youtube-dl server for debugging.
## Workarounds:
--encoding ENCODING Force the specified encoding (experimental)
--no-check-certificate Suppress HTTPS certificate validation.
--prefer-insecure Use an unencrypted connection to retrieve
information about the video. (Currently
supported only for YouTube)
--prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
--user-agent UA specify a custom user agent
--referer URL specify a custom referer, use if the video
access is restricted to one domain
--add-header FIELD:VALUE specify a custom HTTP header and its value,
separated by a colon ':'. You can use this
option multiple times
--bidi-workaround Work around terminals that lack
bidirectional text support. Requires bidiv
or fribidi executable in PATH
--sleep-interval SECONDS Number of seconds to sleep before each
download.
--referer URL specify a custom referer, use if the video access is restricted to one domain
--add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
--bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
--sleep-interval SECONDS Number of seconds to sleep before each download.
## Video Format Options:
-f, --format FORMAT video format code, specify the order of
preference using slashes, as in -f 22/17/18
. Instead of format codes, you can select
by extension for the extensions aac, m4a,
mp3, mp4, ogg, wav, webm. You can also use
the special names "best", "bestvideo",
"bestaudio", "worst". You can filter the
video results by putting a condition in
brackets, as in -f "best[height=720]" (or
-f "[filesize>10M]"). This works for
filesize, height, width, tbr, abr, vbr, and
fps and the comparisons <, <=, >, >=, =, !=
. Formats for which the value is not known
are excluded unless you put a question mark
(?) after the operator. You can combine
format filters, so -f "[height <=?
720][tbr>500]" selects up to 720p videos
(or videos where the height is not known)
with a bitrate of at least 500 KBit/s. By
default, youtube-dl will pick the best
quality. Use commas to download multiple
audio formats, such as -f
136/137/mp4/bestvideo,140/m4a/bestaudio.
You can merge the video and audio of two
formats into a single file using -f <video-
format>+<audio-format> (requires ffmpeg or
avconv), for example -f
-f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
"worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a
question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos
(or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality.
Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
bestvideo+bestaudio.
--all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific
one is requested
--prefer-free-formats prefer free video formats unless a specific one is requested
--max-quality FORMAT highest quality format to download
-F, --list-formats list all available formats
--youtube-skip-dash-manifest Do not download the DASH manifest on
YouTube videos
--merge-output-format FORMAT If a merge is required (e.g.
bestvideo+bestaudio), output to given
container format. One of mkv, mp4, ogg,
webm, flv.Ignored if no merge is required
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
merge is required
## Subtitle Options:
--write-sub write subtitle file
--write-auto-sub write automatic subtitle file (youtube
only)
--all-subs downloads all the available subtitles of
the video
--write-auto-sub write automatic subtitle file (youtube only)
--all-subs downloads all the available subtitles of the video
--list-subs lists all available subtitles for the video
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
youtube only)
--sub-lang LANGS languages of the subtitles to download
(optional) separated by commas, use IETF
language tags like 'en,pt'
--sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best"
--sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
## Authentication Options:
-u, --username USERNAME login with this account ID
-p, --password PASSWORD account password. If this option is left
out, youtube-dl will ask interactively.
-p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively.
-2, --twofactor TWOFACTOR two-factor auth code
-n, --netrc use .netrc authentication data
--video-password PASSWORD video password (vimeo, smotri)
## Post-processing Options:
-x, --extract-audio convert video files to audio-only files
(requires ffmpeg or avconv and ffprobe or
avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
"opus", or "wav"; "best" by default
--audio-quality QUALITY ffmpeg/avconv audio quality specification,
insert a value between 0 (better) and 9
(worse) for VBR or a specific bitrate like
128K (default 5)
--recode-video FORMAT Encode the video to another format if
necessary (currently supported:
mp4|flv|ogg|webm|mkv)
-k, --keep-video keeps the video file on disk after the
post-processing; the video is erased by
default
--no-post-overwrites do not overwrite post-processed files; the
post-processed files are overwritten by
default
--embed-subs embed subtitles in the video (only for mp4
videos)
-x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
(default 5)
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
-k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default
--embed-subs embed subtitles in the video (only for mp4 videos)
--embed-thumbnail embed thumbnail in the audio as cover art
--add-metadata write metadata to the video file
--xattrs write metadata to the video file's xattrs
(using dublin core and xdg standards)
--fixup POLICY Automatically correct known faults of the
file. One of never (do nothing), warn (only
emit a warning), detect_or_warn(the
default; fix file if we can, warn
otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the
postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors
--exec CMD Execute a command on the file after
downloading, similar to find's -exec
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
--metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
%(title)s" matches a title like "Coldplay - Paradise"
--xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
fix file if we can, warn otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
--ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
--exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
{}'
--convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt)
# CONFIGURATION
@ -490,11 +349,19 @@ If you want to play the video on a machine that is not running youtube-dl, you c
### ERROR: no fmt_url_map or conn information found in video info
youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
### ERROR: unable to download video ###
youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
### ExtractorError: Could not find JS function u'OF'
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
### HTTP Error 429: Too Many Requests or 402: Payment Required
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
### SyntaxError: Non-ASCII character ###
@ -532,9 +399,29 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
youtube-dl -- -wNyEUrxzFU
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
### Can you add support for this anime video site, or site which shows current movies for free?
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
### How can I speed up work on my issue?
(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
@ -631,6 +518,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
```python
from __future__ import unicode_literals
import youtube_dl
ydl_opts = {}
@ -643,6 +531,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
```python
from __future__ import unicode_literals
import youtube_dl
@ -700,7 +589,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
### Are you using the latest version?
@ -728,7 +619,7 @@ In particular, every site support request issue should only pertain to services
### Is anyone going to need the feature?
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
### Is your question about youtube-dl?

View File

@ -45,12 +45,12 @@ for test in get_testcases():
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
or test['info_dict']['age_limit'] != 18):
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name']))
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
and test['info_dict']['age_limit'] == 18):
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name']))
else:

View File

@ -0,0 +1,42 @@
from __future__ import unicode_literals
import codecs
import subprocess
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.utils import intlist_to_bytes
from youtube_dl.aes import aes_encrypt, key_expansion
secret_msg = b'Secret message goes here'
def hex_str(int_list):
return codecs.encode(intlist_to_bytes(int_list), 'hex')
def openssl_encode(algo, key, iv):
cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
out, _ = prog.communicate(secret_msg)
return out
iv = key = [0x20, 0x15] + 14 * [0]
r = openssl_encode('aes-128-cbc', key, iv)
print('aes_cbc_decrypt')
print(repr(r))
password = key
new_key = aes_encrypt(password, key_expansion(password))
r = openssl_encode('aes-128-ctr', new_key, iv)
print('aes_decrypt_text 16')
print(repr(r))
password = key + 16 * [0]
new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
r = openssl_encode('aes-256-ctr', new_key, iv)
print('aes_decrypt_text 32')
print(repr(r))

View File

@ -35,7 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
/bin/echo -e "\n### First of all, testing..."
make cleanall
make clean
if $skip_tests ; then
echo 'SKIPPING TESTS'
else
@ -45,9 +45,9 @@ fi
/bin/echo -e "\n### Changing version in version.py..."
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
make README.md
git add README.md youtube_dl/version.py
/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..."
make README.md CONTRIBUTING.md supportedsites
git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py
git commit -m "release $version"
/bin/echo -e "\n### Now tagging, signing and pushing..."

View File

@ -1,4 +1,5 @@
# Supported sites
- **1tv**: Первый канал
- **1up.com**
- **220.ro**
- **24video**
@ -9,16 +10,22 @@
- **8tracks**
- **9gag**
- **abc.net.au**
- **Abc7News**
- **AcademicEarth:Course**
- **AddAnime**
- **AdobeTV**
- **AdultSwim**
- **Aftenposten**
- **Aftonbladet**
- **AirMozilla**
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
- **anitube.se**
- **AnySex**
- **Aparat**
- **AppleDailyAnimationNews**
- **AppleDailyRealtimeNews**
- **AppleTrailers**
- **archive.org**: archive.org videos
- **ARD**
@ -30,14 +37,17 @@
- **arte.tv:ddc**
- **arte.tv:embed**
- **arte.tv:future**
- **AtresPlayer**
- **ATTTechChannel**
- **audiomack**
- **AUEngine**
- **audiomack:album**
- **Azubu**
- **bambuser**
- **bambuser:channel**
- **Bandcamp**
- **Bandcamp:album**
- **bbc.co.uk**: BBC iPlayer
- **BeatportPro**
- **Beeg**
- **BehindKink**
- **Bet**
@ -53,14 +63,19 @@
- **Brightcove**
- **BuzzFeed**
- **BYUtv**
- **Camdemy**
- **CamdemyFolder**
- **Canal13cl**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS**
- **CBSNews**: CBS News
- **CBSSports**
- **CeskaTelevize**
- **channel9**: Channel 9
- **Chilloutzone**
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
- **Cinemassacre**
- **clipfish**
@ -71,8 +86,10 @@
- **cmt.com**
- **CNET**
- **CNN**
- **CNNArticle**
- **CNNBlogs**
- **CollegeHumor**
- **CollegeRama**
- **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralShows**: The Daily Show / The Colbert Report
@ -82,32 +99,40 @@
- **Crunchyroll**
- **crunchyroll:playlist**
- **CSpan**: C-SPAN
- **CtsNews**
- **culturebox.francetvinfo.fr**
- **dailymotion**
- **dailymotion:playlist**
- **dailymotion:user**
- **daum.net**
- **DBTV**
- **DctpTv**
- **DeezerPlaylist**
- **defense.gouv.fr**
- **Discovery**
- **divxstage**: DivxStage
- **Dotsub**
- **DouyuTV**
- **DRBonanza**
- **Dropbox**
- **DrTuber**
- **DRTV**
- **Dump**
- **dvtv**: http://video.aktualne.cz/
- **EaglePlatform**
- **EbaumsWorld**
- **EchoMsk**
- **eHow**
- **Einthusan**
- **eitb.tv**
- **EllenTV**
- **EllenTV:clips**
- **ElPais**: El País
- **Embedly**
- **EMPFlix**
- **Engadget**
- **Eporner**
- **EroProfile**
- **Escapist**
- **EveryonesMixtape**
- **exfm**: ex.fm
@ -120,9 +145,9 @@
- **fernsehkritik.tv:postecke**
- **Firedrive**
- **Firstpost**
- **firsttv**: Видеоархив - Первый канал
- **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FootyRoom**
- **Foxgay**
- **FoxNews**
- **france2.fr:generation-quoi**
@ -140,9 +165,11 @@
- **GameSpot**
- **GameStar**
- **Gametrailers**
- **Gazeta**
- **GDCVault**
- **generic**: Generic downloader that works on some sites
- **GiantBomb**
- **Giga**
- **Glide**: Glide mobile video messages (glide.me)
- **Globo**
- **GodTube**
@ -153,9 +180,15 @@
- **Grooveshark**
- **Groupon**
- **Hark**
- **HearThisAt**
- **Heise**
- **HellPorno**
- **Helsinki**: helsinki.fi
- **HentaiStigma**
- **HistoricFilms**
- **History**
- **hitbox**
- **hitbox:live**
- **HornBunny**
- **HostingBulk**
- **HotNewHipHop**
@ -167,6 +200,7 @@
- **ign.com**
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
- **Imgur**
- **Ina**
- **InfoQ**
- **Instagram**
@ -181,7 +215,10 @@
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
- **Kaltura**
- **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
- **Karaoketv**
- **keek**
- **KeezMovies**
- **KhanAcademy**
@ -191,10 +228,15 @@
- **Ku6**
- **la7.tv**
- **Laola1Tv**
- **Letv**
- **LetvPlaylist**
- **LetvTv**
- **Libsyn**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
- **livestream:original**
- **LnkGo**
- **lrt.lt**
- **lynda**: lynda.com videos
- **lynda:course**: lynda.com online courses
@ -203,6 +245,7 @@
- **mailru**: Видео@Mail.Ru
- **Malemotion**
- **MDR**
- **media.ccc.de**
- **metacafe**
- **Metacritic**
- **Mgoon**
@ -235,6 +278,8 @@
- **MySpass**
- **myvideo**
- **MyVidster**
- **n-tv.de**
- **NationalGeographic**
- **Naver**
- **NBA**
- **NBC**
@ -242,11 +287,16 @@
- **ndr**: NDR.de - Mediathek
- **NDTV**
- **NerdCubedFeed**
- **Nerdist**
- **Netzkino**
- **Newgrounds**
- **Newstube**
- **NextMedia**
- **NextMediaActionNews**
- **nfb**: National Film Board of Canada
- **nfl.com**
- **nhl.com**
- **nhl.com:news**: NHL news
- **nhl.com:videocenter**: NHL videocenter category
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
@ -257,39 +307,53 @@
- **Nowness**
- **nowvideo**: NowVideo
- **npo.nl**
- **npo.nl:live**
- **npo.nl:radio**
- **npo.nl:radio:fragment**
- **NRK**
- **NRKPlaylist**
- **NRKTV**
- **NTV**
- **ntv.ru**
- **Nuvid**
- **NYTimes**
- **ocw.mit.edu**
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
- **Ooyala**
- **OpenFilm**
- **orf:fm4**: radio FM4
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
- **ORFFM4**: radio FM4
- **parliamentlive.tv**: UK parliament videos
- **Patreon**
- **PBS**
- **Phoenix**
- **Photobucket**
- **Pladform**
- **PlanetaPlay**
- **play.fm**
- **played.to**
- **Playvid**
- **Playwire**
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
- **PornHd**
- **PornHub**
- **PornHubPlaylist**
- **Pornotube**
- **PornoXO**
- **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **Puls4**
- **Pyvideo**
- **QuickVid**
- **R7**
- **radio.de**
- **radiobremen**
- **radiofrance**
- **Rai**
- **RBMARadio**
@ -300,18 +364,23 @@
- **RottenTomatoes**
- **Roxwel**
- **RTBF**
- **Rte**
- **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2**
- **RTLnow**
- **rtlxl.nl**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
- **rtve.es:infantil**: RTVE infantil
- **rtve.es:live**: RTVE.es live streams
- **RUHD**
- **rutube**: Rutube videos
- **rutube:channel**: Rutube channels
- **rutube:embed**: Rutube embedded videos
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
@ -339,7 +408,8 @@
- **soundcloud:playlist**
- **soundcloud:set**
- **soundcloud:user**
- **Soundgasm**
- **soundgasm**
- **soundgasm:profile**
- **southpark.cc.com**
- **southpark.de**
- **Space**
@ -351,12 +421,15 @@
- **Sport5**
- **SportBox**
- **SportDeutschland**
- **SRMediathek**: Süddeutscher Rundfunk
- **SRMediathek**: Saarländischer Rundfunk
- **SSA**
- **stanfordoc**: Stanford Open ClassRoom
- **Steam**
- **streamcloud.eu**
- **StreamCZ**
- **StreetVoice**
- **SunPorno**
- **SVTPlay**: SVT Play and Öppet arkiv
- **SWRMediathek**
- **Syfy**
- **SztvHu**
@ -375,7 +448,9 @@
- **TeleBruxelles**
- **telecinco.es**
- **TeleMB**
- **TeleTask**
- **TenPlay**
- **TestTube**
- **TF1**
- **TheOnion**
- **ThePlatform**
@ -401,13 +476,23 @@
- **Turbo**
- **Tutv**
- **tv.dfb.de**
- **TV4**: tv4.se and tv4play.se
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvp.pl**
- **tvp.pl:Series**
- **TVPlay**: TV3Play and related services
- **Twitch**
- **Tweakers**
- **twitch:bookmarks**
- **twitch:chapter**
- **twitch:past_broadcasts**
- **twitch:profile**
- **twitch:stream**
- **twitch:video**
- **twitch:vod**
- **Ubu**
- **udemy**
- **udemy:course**
- **Ultimedia**
- **Unistra**
- **Urort**: NRK P3 Urørt
- **ustream**
@ -433,6 +518,9 @@
- **videoweed**: VideoWeed
- **Vidme**
- **Vidzi**
- **vier**
- **vier:videos**
- **Viewster**
- **viki**
- **vimeo**
- **vimeo:album**
@ -460,11 +548,13 @@
- **WDR**
- **wdr:mobile**
- **WDRMaus**: Sendung mit der Maus
- **WebOfStories**
- **Weibo**
- **Wimp**
- **Wistia**
- **WorldStarHipHop**
- **wrzuta.pl**
- **WSJ**: Wall Street Journal
- **XBef**
- **XboxClips**
- **XHamster**
@ -472,8 +562,14 @@
- **XNXX**
- **XTube**
- **XTubeUser**: XTube user profile
- **Xuite**
- **XVideos**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
- **Yam**
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек
- **YesJapan**
- **Ynet**
- **YouJizz**
@ -491,9 +587,9 @@
- **youtube:search_url**: YouTube.com search URLs
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:toplist**: YouTube.com top lists, "yttoplist:{channel}:{list title}" (Example: "yttoplist:music:Top Tracks")
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**
- **ZDFChannel**
- **zingmp3:album**: mp3.zing.vn albums

View File

@ -3,4 +3,4 @@ universal = True
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
ignore = E501
ignore = E402,E501,E731

View File

@ -103,6 +103,26 @@ def expect_info_dict(self, got_dict, expected_dict):
self.assertTrue(
match_rex.match(got),
'field %s (value: %r) should match %r' % (info_field, got, match_str))
elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
got = got_dict.get(info_field)
start_str = expected[len('startswith:'):]
self.assertTrue(
isinstance(got, compat_str),
'Expected a %s object, but got %s for field %s' % (
compat_str.__name__, type(got).__name__, info_field))
self.assertTrue(
got.startswith(start_str),
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
got = got_dict.get(info_field)
contains_str = expected[len('contains:'):]
self.assertTrue(
isinstance(got, compat_str),
'Expected a %s object, but got %s for field %s' % (
compat_str.__name__, type(got).__name__, info_field))
self.assertTrue(
contains_str in got,
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
elif isinstance(expected, type):
got = got_dict.get(info_field)
self.assertTrue(isinstance(got, expected),
@ -153,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict):
info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(v))
for k, v in test_info_dict.items() if k not in missing_keys)
info_dict_str += '\n'
if info_dict_str:
info_dict_str += '\n'
info_dict_str += ''.join(
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
for k in missing_keys)
write_string(
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
'\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
self.assertFalse(
missing_keys,
'Missing keys in test definition: %s' % (

View File

@ -28,7 +28,7 @@
"retries": 10,
"simulate": false,
"subtitleslang": null,
"subtitlesformat": "srt",
"subtitlesformat": "best",
"test": true,
"updatetime": true,
"usenetrc": false,
@ -39,5 +39,6 @@
"writesubtitles": false,
"allsubtitles": false,
"listssubtitles": false,
"socket_timeout": 20
"socket_timeout": 20,
"fixup": "never"
}

View File

@ -13,6 +13,10 @@ import copy
from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
from youtube_dl.postprocessor.common import PostProcessor
from youtube_dl.utils import match_filter_func
TEST_URL = 'http://localhost/sample.mp4'
class YDL(FakeYDL):
@ -45,8 +49,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
{'ext': 'webm', 'height': 460, 'url': 'x'},
{'ext': 'mp4', 'height': 460, 'url': 'y'},
{'ext': 'webm', 'height': 460, 'url': TEST_URL},
{'ext': 'mp4', 'height': 460, 'url': TEST_URL},
]
info_dict = _make_result(formats)
yie = YoutubeIE(ydl)
@ -59,8 +63,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
{'ext': 'webm', 'height': 720, 'url': 'a'},
{'ext': 'mp4', 'height': 1080, 'url': 'b'},
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
{'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@ -73,9 +77,9 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{'ext': 'webm', 'height': 720, 'url': '_'},
{'ext': 'mp4', 'height': 720, 'url': '_'},
{'ext': 'flv', 'height': 720, 'url': '_'},
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
{'ext': 'mp4', 'height': 720, 'url': TEST_URL},
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@ -87,8 +91,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{'ext': 'flv', 'height': 720, 'url': '_'},
{'ext': 'webm', 'height': 720, 'url': '_'},
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@ -132,10 +136,10 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection(self):
formats = [
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@ -166,10 +170,10 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_audio(self):
formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@ -184,8 +188,8 @@ class TestFormatSelection(unittest.TestCase):
self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@ -227,9 +231,9 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_video(self):
formats = [
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@ -336,6 +340,67 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G')
class TestYoutubeDL(unittest.TestCase):
def test_subtitles(self):
def s_formats(lang, autocaption=False):
return [{
'ext': ext,
'url': 'http://localhost/video.%s.%s' % (lang, ext),
'_auto': autocaption,
} for ext in ['vtt', 'srt', 'ass']]
subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es'])
auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es'])
info_dict = {
'id': 'test',
'title': 'Test',
'url': 'http://localhost/video.mp4',
'subtitles': subtitles,
'automatic_captions': auto_captions,
'extractor': 'TEST',
}
def get_info(params={}):
params.setdefault('simulate', True)
ydl = YDL(params)
ydl.report_warning = lambda *args, **kargs: None
return ydl.process_video_result(info_dict, download=False)
result = get_info()
self.assertFalse(result.get('requested_subtitles'))
self.assertEqual(result['subtitles'], subtitles)
self.assertEqual(result['automatic_captions'], auto_captions)
result = get_info({'writesubtitles': True})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['en']))
self.assertTrue(subs['en'].get('data') is None)
self.assertEqual(subs['en']['ext'], 'ass')
result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'})
subs = result['requested_subtitles']
self.assertEqual(subs['en']['ext'], 'srt')
result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'fr']))
result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
self.assertFalse(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']})
subs = result['requested_subtitles']
self.assertTrue(subs)
self.assertEqual(set(subs.keys()), set(['es', 'pt']))
self.assertTrue(subs['es']['_auto'])
self.assertTrue(subs['pt']['_auto'])
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',
@ -370,5 +435,102 @@ class TestFormatSelection(unittest.TestCase):
'vbr': 10,
}), '^\s*10k$')
def test_postprocessors(self):
filename = 'post-processor-testfile.mp4'
audiofile = filename + '.mp3'
class SimplePP(PostProcessor):
def run(self, info):
with open(audiofile, 'wt') as f:
f.write('EXAMPLE')
info['filepath']
return False, info
def run_pp(params):
with open(filename, 'wt') as f:
f.write('EXAMPLE')
ydl = YoutubeDL(params)
ydl.add_post_processor(SimplePP())
ydl.post_process(filename, {'filepath': filename})
run_pp({'keepvideo': True})
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
os.unlink(filename)
os.unlink(audiofile)
run_pp({'keepvideo': False})
self.assertFalse(os.path.exists(filename), '%s exists' % filename)
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
os.unlink(audiofile)
def test_match_filter(self):
class FilterYDL(YDL):
def __init__(self, *args, **kwargs):
super(FilterYDL, self).__init__(*args, **kwargs)
self.params['simulate'] = True
def process_info(self, info_dict):
super(YDL, self).process_info(info_dict)
def _match_entry(self, info_dict, incomplete):
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
if res is None:
self.downloaded_info_dicts.append(info_dict)
return res
first = {
'id': '1',
'url': TEST_URL,
'title': 'one',
'extractor': 'TEST',
'duration': 30,
'filesize': 10 * 1024,
}
second = {
'id': '2',
'url': TEST_URL,
'title': 'two',
'extractor': 'TEST',
'duration': 10,
'description': 'foo',
'filesize': 5 * 1024,
}
videos = [first, second]
def get_videos(filter_=None):
ydl = FilterYDL({'match_filter': filter_})
for v in videos:
ydl.process_ie_result(v, download=True)
return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos()
self.assertEqual(res, ['1', '2'])
def f(v):
if v['id'] == '1':
return None
else:
return 'Video id is not 1'
res = get_videos(f)
self.assertEqual(res, ['1'])
f = match_filter_func('duration < 30')
res = get_videos(f)
self.assertEqual(res, ['2'])
f = match_filter_func('description = foo')
res = get_videos(f)
self.assertEqual(res, ['2'])
f = match_filter_func('description =? foo')
res = get_videos(f)
self.assertEqual(res, ['1', '2'])
f = match_filter_func('filesize > 5KiB')
res = get_videos(f)
self.assertEqual(res, ['1'])
if __name__ == '__main__':
unittest.main()

55
test/test_aes.py Normal file
View File

@ -0,0 +1,55 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
import base64
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
class TestAES(unittest.TestCase):
def setUp(self):
self.key = self.iv = [0x20, 0x15] + 14 * [0]
self.secret_msg = b'Secret message goes here'
def test_encrypt(self):
msg = b'message'
key = list(range(16))
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
self.assertEqual(decrypted, msg)
def test_cbc_decrypt(self):
data = bytes_to_intlist(
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
)
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) +
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
)
decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg)
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) +
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
)
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
if __name__ == '__main__':
unittest.main()

View File

@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(':tds', ['ComedyCentralShows'])
def test_vimeo_matching(self):
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
# https://github.com/rg3/youtube-dl/issues/1930

View File

@ -1,4 +1,6 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
import unittest
@ -27,5 +29,12 @@ class TestExecution(unittest.TestCase):
def test_main_exec(self):
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
def test_cmdline_umlauts(self):
p = subprocess.Popen(
[sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'],
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
_, stderr = p.communicate()
self.assertFalse(stderr)
if __name__ == '__main__':
unittest.main()

View File

@ -8,7 +8,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
import threading
@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
def _build_proxy_handler(name):
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
proxy_name = name
def log_message(self, format, *args):
pass
def do_GET(self):
self.send_response(200)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.end_headers()
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
return HTTPTestRequestHandler
class TestProxy(unittest.TestCase):
def setUp(self):
self.proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('normal'))
self.port = self.proxy.socket.getsockname()[1]
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True
self.proxy_thread.start()
self.cn_proxy = compat_http_server.HTTPServer(
('localhost', 0), _build_proxy_handler('cn'))
self.cn_port = self.cn_proxy.socket.getsockname()[1]
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
self.cn_proxy_thread.daemon = True
self.cn_proxy_thread.start()
def test_proxy(self):
cn_proxy = 'localhost:{0}'.format(self.cn_port)
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
'cn_verification_proxy': cn_proxy,
})
url = 'http://foo.com/bar'
response = ydl.urlopen(url).read().decode('utf-8')
self.assertEqual(response, 'normal: {0}'.format(url))
req = compat_urllib_request.Request(url)
req.add_header('Ytdl-request-proxy', cn_proxy)
response = ydl.urlopen(req).read().decode('utf-8')
self.assertEqual(response, 'cn: {0}'.format(url))
if __name__ == '__main__':
unittest.main()

View File

@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase):
self.assertEqual(jsi.call_function('f'), -11)
def test_comments(self):
'Skipping: Not yet fully implemented'
return
jsi = JSInterpreter('''
function x() {
var x = /* 1 + */ 2;
@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase):
''')
self.assertEqual(jsi.call_function('x'), 52)
jsi = JSInterpreter('''
function f() {
var x = "/*";
var y = 1 /* comment */ + 2;
return y;
}
''')
self.assertEqual(jsi.call_function('f'), 3)
def test_precedence(self):
jsi = JSInterpreter('''
function x() {

26
test/test_netrc.py Normal file
View File

@ -0,0 +1,26 @@
# coding: utf-8
from __future__ import unicode_literals
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import (
gen_extractors,
)
class TestNetRc(unittest.TestCase):
def test_netrc_present(self):
for ie in gen_extractors():
if not hasattr(ie, '_login'):
continue
self.assertTrue(
hasattr(ie, '_NETRC_MACHINE'),
'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.postprocessor import MetadataFromTitlePP
class TestMetadataFromTitle(unittest.TestCase):
def test_format_to_regex(self):
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')

View File

@ -18,6 +18,15 @@ from youtube_dl.extractor import (
VimeoIE,
WallaIE,
CeskaTelevizeIE,
LyndaIE,
NPOIE,
ComedyCentralIE,
NRKTVIE,
RaiIE,
VikiIE,
ThePlatformIE,
RTVEALaCartaIE,
FunnyOrDieIE,
)
@ -27,42 +36,38 @@ class BaseTestSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.ie = self.IE(self.DL)
self.ie = self.IE()
self.DL.add_info_extractor(self.ie)
def getInfoDict(self):
info_dict = self.ie.extract(self.url)
info_dict = self.DL.extract_info(self.url, download=False)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict['subtitles']
subtitles = info_dict['requested_subtitles']
if not subtitles:
return subtitles
for sub_info in subtitles.values():
if sub_info.get('data') is None:
uf = self.DL.urlopen(sub_info['url'])
sub_info['data'] = uf.read().decode('utf-8')
return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
for lang in ['it', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
@ -76,12 +81,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
def test_youtube_list_subtitles(self):
self.DL.expect_warning('Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
@ -103,55 +102,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
self.assertFalse(subtitles)
class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
self.assertTrue(len(subtitles.keys()) >= 6)
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
for lang in ['es', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
@ -159,61 +125,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
self.assertFalse(subtitles)
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) >= 28)
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
for lang in ['es', 'fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
@ -221,14 +147,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@ -240,39 +159,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
@ -280,27 +173,13 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
self.assertFalse(subtitles)
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
@ -315,26 +194,20 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
self.assertFalse(subtitles)
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
def test_list_subtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['cs']))
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
self.assertTrue(len(subtitles['cs']) > 20000)
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
@ -342,7 +215,122 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
self.assertFalse(subtitles)
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['nl']))
self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4')
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
IE = ComedyCentralIE
def getInfoDict(self):
return super(TestMTVSubtitles, self).getInfoDict()['entries'][0]
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
class TestNRKSubtitles(BaseTestSubtitles):
url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1'
IE = NRKTVIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['no']))
self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a')
class TestRaiSubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['it']))
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a')
class TestThePlatformSubtitles(BaseTestSubtitles):
# from http://www.3playmedia.com/services-features/tools/integrations/theplatform/
# (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/)
url = 'theplatform:JFUjUE1_ehvq'
IE = ThePlatformIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
class TestRtveSubtitles(BaseTestSubtitles):
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
IE = RTVEALaCartaIE
def test_allsubtitles(self):
print('Skipping, only available from Spain')
return
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['es']))
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
IE = FunnyOrDieIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
if __name__ == '__main__':

View File

@ -34,8 +34,8 @@ def _make_testfunc(testfile):
def test_func(self):
as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
if ((not os.path.exists(swf_file))
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
if ((not os.path.exists(swf_file)) or
os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
subprocess.check_call([

View File

@ -17,13 +17,22 @@ IGNORED_FILES = [
'buildserver.py',
]
IGNORED_DIRS = [
'.git',
'.tox',
]
from test.helper import assertRegexpMatches
class TestUnicodeLiterals(unittest.TestCase):
def test_all_files(self):
for dirpath, _, filenames in os.walk(rootDir):
for dirpath, dirnames, filenames in os.walk(rootDir):
for ignore_dir in IGNORED_DIRS:
if ignore_dir in dirnames:
# If we remove the directory from dirnames os.walk won't
# recurse into it
dirnames.remove(ignore_dir)
for basename in filenames:
if not basename.endswith('.py'):
continue

View File

@ -24,6 +24,7 @@ from youtube_dl.utils import (
encodeFilename,
escape_rfc3986,
escape_url,
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
InAdvancePagedList,
@ -38,6 +39,8 @@ from youtube_dl.utils import (
parse_iso8601,
read_batch_urls,
sanitize_filename,
sanitize_path,
sanitize_url_path_consecutive_slashes,
shell_quote,
smuggle_url,
str_to_int,
@ -52,7 +55,9 @@ from youtube_dl.utils import (
urlencode_postdata,
version_tuple,
xpath_with_ns,
xpath_text,
render_table,
match_str,
)
@ -85,6 +90,11 @@ class TestUtil(unittest.TestCase):
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
for fbc in forbidden:
@ -125,6 +135,62 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
def test_sanitize_path(self):
if sys.platform != 'win32':
return
self.assertEqual(sanitize_path('abc'), 'abc')
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
self.assertEqual(sanitize_path('abc|def'), 'abc#def')
self.assertEqual(sanitize_path('<>:"|?*'), '#######')
self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
self.assertEqual(
sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
self.assertEqual(
sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
self.assertEqual(sanitize_path('../abc'), '..\\abc')
self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
def test_sanitize_url_path_consecutive_slashes(self):
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname//'),
'http://hostname/')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
'http://hostname/foo/bar/filename.html')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/'),
'http://hostname/')
self.assertEqual(
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
'http://hostname/abc/')
def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
@ -156,6 +222,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
'20141126')
self.assertEqual(
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
'20150202')
def test_find_xpath_attr(self):
testxml = '''<root>
@ -183,6 +252,17 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find('media:song/media:author').text, 'The Author')
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
def test_xpath_text(self):
testxml = '''<root>
<div>
<p>Foo</p>
</div>
</root>'''
doc = xml.etree.ElementTree.fromstring(testxml)
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
self.assertTrue(xpath_text(doc, 'div/bar') is None)
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
def test_smuggle_url(self):
data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
@ -238,6 +318,9 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('5 s'), 5)
self.assertEqual(parse_duration('3 min'), 180)
self.assertEqual(parse_duration('2.5 hours'), 9000)
self.assertEqual(parse_duration('02:03:04'), 7384)
self.assertEqual(parse_duration('01:02:03:04'), 93784)
self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)
def test_fix_xml_ampersands(self):
self.assertEqual(
@ -364,6 +447,10 @@ class TestUtil(unittest.TestCase):
"playlist":[{"controls":{"all":null}}]
}''')
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
json_code = js_to_json(inp)
self.assertEqual(json.loads(json_code), json.loads(inp))
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
@ -371,6 +458,16 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True})
# Ignore JavaScript code as well
on = js_to_json('''{
"x": 1,
y: "a",
z: some.code
}''')
d = json.loads(on)
self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a')
def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
@ -444,6 +541,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
'123 4\n'
'9999 51')
def test_match_str(self):
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
self.assertFalse(match_str('!x', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 0}))
self.assertFalse(match_str('x>0', {'x': 0}))
self.assertFalse(match_str('x>0', {}))
self.assertTrue(match_str('x>?0', {}))
self.assertTrue(match_str('x>1K', {'x': 1200}))
self.assertFalse(match_str('x>2K', {'x': 1200}))
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))
self.assertTrue(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'description': 'foo'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
if __name__ == '__main__':
unittest.main()

View File

@ -8,11 +8,11 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io
import re
import string
from test.helper import FakeYDL
from youtube_dl.extractor import YoutubeIE
from youtube_dl.compat import compat_str, compat_urlretrieve
@ -64,6 +64,12 @@ _TESTS = [
'js',
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
)
]
@ -88,7 +94,8 @@ def make_tfunc(url, stype, sig_input, expected_sig):
if not os.path.exists(fn):
compat_urlretrieve(url, fn)
ie = YoutubeIE()
ydl = FakeYDL()
ie = YoutubeIE(ydl)
if stype == 'js':
with io.open(fn, encoding='utf-8') as testf:
jscode = testf.read()

View File

@ -1,8 +1,11 @@
[tox]
envlist = py26,py27,py33
envlist = py26,py27,py33,py34
[testenv]
deps =
nose
coverage
commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
--exclude test_subtitles.py --exclude test_write_annotations.py
--exclude test_youtube_lists.py
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
# test.test_download:TestDownload.test_NowVideo

View File

@ -4,8 +4,10 @@
from __future__ import absolute_import, unicode_literals
import collections
import contextlib
import datetime
import errno
import fileinput
import io
import itertools
import json
@ -28,6 +30,7 @@ from .compat import (
compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
compat_http_client,
compat_kwargs,
compat_str,
@ -46,18 +49,19 @@ from .utils import (
ExtractorError,
format_bytes,
formatSeconds,
get_term_width,
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
PagedList,
parse_filesize,
PerRequestProxyHandler,
PostProcessingError,
platform_name,
preferredencoding,
render_table,
SameFileError,
sanitize_filename,
sanitize_path,
std_headers,
subtitles_filename,
takewhile_inclusive,
@ -154,7 +158,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
@ -181,6 +185,8 @@ class YoutubeDL(object):
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
cn_verification_proxy: URL of the proxy to use for IP address verification
on Chinese sites. (Experimental)
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
@ -199,18 +205,25 @@ class YoutubeDL(object):
postprocessor.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading" and "finished".
* status: One of "downloading", "error", or "finished".
Check this first and ignore unknown values.
If status is one of "downloading" or "finished", the
If status is one of "downloading", or "finished", the
following properties may also be present:
* filename: The final filename (always present)
* tmpfilename: The filename we're currently writing to
* downloaded_bytes: Bytes on disk
* total_bytes: Size of the whole file, None if unknown
* tmpfilename: The filename we're currently writing to
* total_bytes_estimate: Guess of the eventual file size,
None if unavailable.
* elapsed: The number of seconds since download started.
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if
unknown
* fragment_index: The counter of the currently
downloaded video fragment.
* fragment_count: The number of fragments (= individual
files that will be merged)
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
@ -225,16 +238,25 @@ class YoutubeDL(object):
call_home: Boolean, true iff we are allowed to contact the
youtube-dl servers for debugging.
sleep_interval: Number of seconds to sleep before each download.
external_downloader: Executable of the external downloader to call.
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
match_filter: A function that gets called with the info_dict of
every video.
If it returns a message, the video is ignored.
If it returns None, the video is downloaded.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
The following options determine which downloader is picked:
external_downloader: Executable of the external downloader to call.
None or unset for standard (built-in) downloader.
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
xattr_set_filesize.
xattr_set_filesize, external_downloader_args.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
@ -268,7 +290,7 @@ class YoutubeDL(object):
try:
import pty
master, slave = pty.openpty()
width = get_term_width()
width = compat_get_terminal_size().columns
if width is None:
width_args = []
else:
@ -292,8 +314,8 @@ class YoutubeDL(object):
raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
and not params.get('restrictfilenames', False)):
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
@ -301,8 +323,10 @@ class YoutubeDL(object):
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
if '%(stitle)s' in self.params.get('outtmpl', ''):
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
if isinstance(params.get('outtmpl'), bytes):
self.report_warning(
'Parameter outtmpl is bytes, but should be a unicode string. '
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
self._setup_opener()
@ -485,7 +509,7 @@ class YoutubeDL(object):
else:
if self.params.get('no_warnings'):
return
if self._err_file.isatty() and os.name != 'nt':
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m'
else:
_msg_header = 'WARNING:'
@ -497,7 +521,7 @@ class YoutubeDL(object):
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
if self._err_file.isatty() and os.name != 'nt':
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m'
else:
_msg_header = 'ERROR:'
@ -541,7 +565,7 @@ class YoutubeDL(object):
if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
# Temporary fix for #4787
@ -554,7 +578,7 @@ class YoutubeDL(object):
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
def _match_entry(self, info_dict):
def _match_entry(self, info_dict, incomplete):
""" Returns None iff the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video'))
@ -583,9 +607,17 @@ class YoutubeDL(object):
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
return 'Skipping "%s" because it is age restricted' % title
return 'Skipping "%s" because it is age restricted' % video_title
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
if not incomplete:
match_filter = self.params.get('match_filter')
if match_filter is not None:
ret = match_filter(info_dict)
if ret is not None:
return ret
return None
@staticmethod
@ -600,7 +632,7 @@ class YoutubeDL(object):
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
'''
'''
if ie_key:
ies = [self.get_info_extractor(ie_key)]
@ -779,7 +811,7 @@ class YoutubeDL(object):
'extractor_key': ie_result['extractor_key'],
}
reason = self._match_entry(entry)
reason = self._match_entry(entry, incomplete=True)
if reason is not None:
self.to_screen('[download] ' + reason)
continue
@ -826,27 +858,44 @@ class YoutubeDL(object):
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*\[
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
(?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
\]$
''' % '|'.join(map(re.escape, OPERATORS.keys())))
m = operator_rex.search(format_spec)
if m:
try:
comparison_value = int(m.group('value'))
except ValueError:
comparison_value = parse_filesize(m.group('value'))
if comparison_value is None:
comparison_value = parse_filesize(m.group('value') + 'B')
if comparison_value is None:
raise ValueError(
'Invalid value %r in format specification %r' % (
m.group('value'), format_spec))
op = OPERATORS[m.group('op')]
if not m:
STR_OPERATORS = {
'=': operator.eq,
'!=': operator.ne,
}
str_operator_rex = re.compile(r'''(?x)\s*\[
\s*(?P<key>ext|acodec|vcodec|container|protocol)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9_-]+)
\s*\]$
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.search(format_spec)
if m:
comparison_value = m.group('value')
op = STR_OPERATORS[m.group('op')]
if not m:
raise ValueError('Invalid format specification %r' % format_spec)
try:
comparison_value = int(m.group('value'))
except ValueError:
comparison_value = parse_filesize(m.group('value'))
if comparison_value is None:
comparison_value = parse_filesize(m.group('value') + 'B')
if comparison_value is None:
raise ValueError(
'Invalid value %r in format specification %r' % (
m.group('value'), format_spec))
op = OPERATORS[m.group('op')]
def _filter(f):
actual_value = f.get(m.group('key'))
if actual_value is None:
@ -920,27 +969,9 @@ class YoutubeDL(object):
return res
def _calc_cookies(self, info_dict):
class _PseudoRequest(object):
def __init__(self, url):
self.url = url
self.headers = {}
self.unverifiable = False
def add_unredirected_header(self, k, v):
self.headers[k] = v
def get_full_url(self):
return self.url
def is_unverifiable(self):
return self.unverifiable
def has_header(self, h):
return h in self.headers
pr = _PseudoRequest(info_dict['url'])
pr = compat_urllib_request.Request(info_dict['url'])
self.cookiejar.add_cookie_header(pr)
return pr.headers.get('Cookie')
return pr.get_header('Cookie')
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
@ -964,9 +995,11 @@ class YoutubeDL(object):
thumbnails.sort(key=lambda t: (
t.get('preference'), t.get('width'), t.get('height'),
t.get('id'), t.get('url')))
for t in thumbnails:
for i, t in enumerate(thumbnails):
if 'width' in t and 'height' in t:
t['resolution'] = '%dx%d' % (t['width'], t['height'])
if t.get('id') is None:
t['id'] = '%d' % i
if thumbnails and 'thumbnail' not in info_dict:
info_dict['thumbnail'] = thumbnails[-1]['url']
@ -983,6 +1016,15 @@ class YoutubeDL(object):
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
if self.params.get('listsubtitles', False):
if 'automatic_captions' in info_dict:
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
return
info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], info_dict.get('subtitles'),
info_dict.get('automatic_captions'))
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
@ -1046,8 +1088,7 @@ class YoutubeDL(object):
if req_format is None:
req_format = 'best'
formats_to_download = []
# The -1 is for supporting YoutubeIE
if req_format in ('-1', 'all'):
if req_format == 'all':
formats_to_download = formats
else:
for rfstr in req_format.split(','):
@ -1074,7 +1115,8 @@ class YoutubeDL(object):
else self.params['merge_output_format'])
selected_format = {
'requested_formats': formats_info,
'format': rf,
'format': '%s+%s' % (formats_info[0].get('format'),
formats_info[1].get('format')),
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
formats_info[1].get('format_id')),
'width': formats_info[0].get('width'),
@ -1110,6 +1152,55 @@ class YoutubeDL(object):
info_dict.update(formats_to_download[-1])
return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
"""Select the requested subtitles and their format"""
available_subs = {}
if normal_subtitles and self.params.get('writesubtitles'):
available_subs.update(normal_subtitles)
if automatic_captions and self.params.get('writeautomaticsub'):
for lang, cap_info in automatic_captions.items():
if lang not in available_subs:
available_subs[lang] = cap_info
if (not self.params.get('writesubtitles') and not
self.params.get('writeautomaticsub') or not
available_subs):
return None
if self.params.get('allsubtitles', False):
requested_langs = available_subs.keys()
else:
if self.params.get('subtitleslangs', False):
requested_langs = self.params.get('subtitleslangs')
elif 'en' in available_subs:
requested_langs = ['en']
else:
requested_langs = [list(available_subs.keys())[0]]
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
subs = {}
for lang in requested_langs:
formats = available_subs.get(lang)
if formats is None:
self.report_warning('%s subtitles not available for %s' % (lang, video_id))
continue
for ext in formats_preference:
if ext == 'best':
f = formats[-1]
break
matches = list(filter(lambda f: f['ext'] == ext, formats))
if matches:
f = matches[-1]
break
else:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
subs[lang] = f
return subs
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@ -1124,13 +1215,10 @@ class YoutubeDL(object):
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
# Keep for backwards compatibility
info_dict['stitle'] = info_dict['title']
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
reason = self._match_entry(info_dict)
reason = self._match_entry(info_dict, incomplete=False)
if reason is not None:
self.to_screen('[download] ' + reason)
return
@ -1172,7 +1260,7 @@ class YoutubeDL(object):
return
try:
dn = os.path.dirname(encodeFilename(filename))
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
@ -1212,15 +1300,23 @@ class YoutubeDL(object):
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
if subtitles_are_requested and info_dict.get('requested_subtitles'):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat', 'srt')
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
continue
subtitles = info_dict['requested_subtitles']
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
if sub_info.get('data') is not None:
sub_data = sub_info['data']
else:
try:
sub_data = ie._download_webpage(
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@ -1228,7 +1324,7 @@ class YoutubeDL(object):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
@ -1261,7 +1357,7 @@ class YoutubeDL(object):
downloaded = []
success = True
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
if not merger._executable:
if not merger.available:
postprocessors = []
self.report_warning('You have requested multiple '
'formats but ffmpeg or avconv are not installed.'
@ -1340,8 +1436,8 @@ class YoutubeDL(object):
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
if (len(url_list) > 1 and
'%' not in outtmpl
and self.params.get('max_downloads') != 1):
'%' not in outtmpl and
self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
@ -1360,8 +1456,11 @@ class YoutubeDL(object):
return self._download_retcode
def download_with_info_file(self, info_filename):
with io.open(info_filename, 'r', encoding='utf-8') as f:
info = json.load(f)
with contextlib.closing(fileinput.FileInput(
[info_filename], mode='r',
openhook=fileinput.hook_encoded('utf-8'))) as f:
# FileInput doesn't have a read method, we can't call json.load
info = json.loads('\n'.join(f))
try:
self.process_ie_result(info, download=True)
except DownloadError:
@ -1508,30 +1607,18 @@ class YoutubeDL(object):
return res
def list_formats(self, info_dict):
def line(format, idlen=20):
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
format['format_id'],
format['ext'],
self.format_resolution(format),
self._format_note(format),
))
formats = info_dict.get('formats', [info_dict])
idlen = max(len('format code'),
max(len(f['format_id']) for f in formats))
formats_s = [
line(f, idlen) for f in formats
table = [
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
for f in formats
if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
header_line = line({
'format_id': 'format code', 'ext': 'extension',
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
header_line = ['format code', 'extension', 'resolution', 'note']
self.to_screen(
'[info] Available formats for %s:\n%s\n%s' %
(info_dict['id'], header_line, '\n'.join(formats_s)))
'[info] Available formats for %s:\n%s' %
(info_dict['id'], render_table(header_line, table)))
def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails')
@ -1550,6 +1637,17 @@ class YoutubeDL(object):
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
def list_subtitles(self, video_id, subtitles, name='subtitles'):
if not subtitles:
self.to_screen('%s has no %s' % (video_id, name))
return
self.to_screen(
'Available %s for %s:' % (name, video_id))
self.to_screen(render_table(
['Language', 'formats'],
[[lang, ', '.join(f['ext'] for f in reversed(formats))]
for lang, formats in subtitles.items()]))
def urlopen(self, req):
""" Start an HTTP download """
@ -1611,7 +1709,7 @@ class YoutubeDL(object):
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
exe_versions = FFmpegPostProcessor.get_versions()
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
exe_str = ', '.join(
'%s %s' % (exe, v)
@ -1666,13 +1764,14 @@ class YoutubeDL(object):
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
proxy_handler = PerRequestProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, ydlh)
proxy_handler, https_handler, cookie_processor, ydlh)
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)

View File

@ -9,6 +9,7 @@ import codecs
import io
import os
import random
import shlex
import sys
@ -23,9 +24,10 @@ from .compat import (
)
from .utils import (
DateRange,
DEFAULT_OUTTMPL,
decodeOption,
DEFAULT_OUTTMPL,
DownloadError,
match_filter_func,
MaxDownloadsReached,
preferredencoding,
read_batch_urls,
@ -169,6 +171,9 @@ def _real_main(argv=None):
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
parser.error('invalid subtitle format specified')
if opts.date is not None:
date = DateRange.day(opts.date)
@ -188,14 +193,14 @@ def _real_main(argv=None):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
if opts.outtmpl is not None:
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
or (opts.useid and '%(id)s.%(ext)s')
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
or DEFAULT_OUTTMPL)
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
(opts.useid and '%(id)s.%(ext)s') or
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
@ -208,6 +213,11 @@ def _real_main(argv=None):
# PostProcessors
postprocessors = []
# Add the metadata pp first, the other pps will copy it
if opts.metafromtitle:
postprocessors.append({
'key': 'MetadataFromTitle',
'titleformat': opts.metafromtitle
})
if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'})
if opts.extractaudio:
@ -222,10 +232,14 @@ def _real_main(argv=None):
'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo,
})
if opts.convertsubtitles:
postprocessors.append({
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
})
if opts.embedsubtitles:
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
'subtitlesformat': opts.subtitlesformat,
})
if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'})
@ -247,6 +261,12 @@ def _real_main(argv=None):
xattr # Confuse flake8
except ImportError:
parser.error('setting filesize xattr requested but python-xattr is not available')
external_downloader_args = None
if opts.external_downloader_args:
external_downloader_args = shlex.split(opts.external_downloader_args)
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))
ydl_opts = {
'usenetrc': opts.usenetrc,
@ -344,6 +364,12 @@ def _real_main(argv=None):
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize,
'match_filter': match_filter,
'no_color': opts.no_color,
'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native,
'external_downloader_args': external_downloader_args,
'cn_verification_proxy': opts.cn_verification_proxy,
}
with YoutubeDL(ydl_opts) as ydl:

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
@ -329,3 +327,5 @@ def inc(data):
data[i] = data[i] + 1
break
return data
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']

View File

@ -1,9 +1,11 @@
from __future__ import unicode_literals
import collections
import getpass
import optparse
import os
import re
import shutil
import socket
import subprocess
import sys
@ -364,6 +366,33 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size():
columns = compat_getenv('COLUMNS', None)
if columns:
columns = int(columns)
else:
columns = None
lines = compat_getenv('LINES', None)
if lines:
lines = int(lines)
else:
lines = None
try:
sp = subprocess.Popen(
['stty', 'size'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
lines, columns = map(int, out.split())
except:
pass
return _terminal_size(columns, lines)
__all__ = [
'compat_HTTPError',
@ -371,6 +400,7 @@ __all__ = [
'compat_chr',
'compat_cookiejar',
'compat_expanduser',
'compat_get_terminal_size',
'compat_getenv',
'compat_getpass',
'compat_html_entities',

View File

@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}):
if ed.supports(info_dict):
return ed
if protocol == 'm3u8' and params.get('hls_prefer_native'):
return NativeHlsFD
return PROTOCOL_MAP.get(protocol, HttpFD)

View File

@ -1,4 +1,4 @@
from __future__ import unicode_literals
from __future__ import division, unicode_literals
import os
import re
@ -42,6 +42,8 @@ class FileDownloader(object):
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal)
external_downloader_args: A list of additional command-line arguments for the
external downloader.
Subclasses of this one must re-define the real_download method.
"""
@ -54,6 +56,7 @@ class FileDownloader(object):
self.ydl = ydl
self._progress_hooks = []
self.params = params
self.add_progress_hook(self.report_progress)
@staticmethod
def format_seconds(seconds):
@ -226,42 +229,64 @@ class FileDownloader(object):
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title('youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
def report_progress(self, s):
if s['status'] == 'finished':
if self.params.get('noprogress', False):
self.to_screen('[download] Download completed')
else:
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
if s.get('elapsed') is not None:
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
else:
msg_template = '100%% of %(_total_bytes_str)s'
self._report_progress_status(
msg_template % s, is_last_line=True)
if self.params.get('noprogress'):
return
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
msg = ('%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
if s['status'] != 'downloading':
return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen('[download] Download completed')
if s.get('eta') is not None:
s['_eta_str'] = self.format_eta(s['eta'])
else:
self._report_progress_status(
('100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))),
is_last_line=True)
s['_eta_str'] = 'Unknown ETA'
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
else:
if s.get('downloaded_bytes') == 0:
s['_percent_str'] = self.format_percent(0)
else:
s['_percent_str'] = 'Unknown %'
if s.get('speed') is not None:
s['_speed_str'] = self.format_speed(s['speed'])
else:
s['_speed_str'] = 'Unknown speed'
if s.get('total_bytes') is not None:
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
elif s.get('total_bytes_estimate') is not None:
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
else:
if s.get('downloaded_bytes') is not None:
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
if s.get('elapsed'):
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
else:
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
else:
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
self._report_progress_status(msg_template % s)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@ -288,14 +313,14 @@ class FileDownloader(object):
"""
nooverwrites_and_exists = (
self.params.get('nooverwrites', False)
and os.path.exists(encodeFilename(filename))
self.params.get('nooverwrites', False) and
os.path.exists(encodeFilename(filename))
)
continuedl_and_exists = (
self.params.get('continuedl', False)
and os.path.isfile(encodeFilename(filename))
and not self.params.get('nopart', False)
self.params.get('continuedl', False) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
# Check file already present

View File

@ -51,6 +51,13 @@ class ExternalFD(FileDownloader):
return []
return [command_option, source_address]
def _configuration_args(self, default=[]):
ex_args = self.params.get('external_downloader_args')
if ex_args is None:
return default
assert isinstance(ex_args, list)
return ex_args
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
cmd = self._make_cmd(tmpfilename, info_dict)
@ -75,10 +82,11 @@ class ExternalFD(FileDownloader):
class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
cmd = [self.exe, '--location', '-o', tmpfilename]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--interface')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@ -89,15 +97,16 @@ class WgetFD(ExternalFD):
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--bind-address')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [
self.exe, '-c',
'--min-split-size', '1M', '--max-connection-per-server', '4']
cmd = [self.exe, '-c']
cmd += self._configuration_args([
'--min-split-size', '1M', '--max-connection-per-server', '4'])
dn = os.path.dirname(tmpfilename)
if dn:
cmd += ['--dir', dn]

View File

@ -1,4 +1,4 @@
from __future__ import unicode_literals
from __future__ import division, unicode_literals
import base64
import io
@ -11,11 +11,11 @@ from .common import FileDownloader
from .http import HttpFD
from ..compat import (
compat_urlparse,
compat_urllib_error,
)
from ..utils import (
struct_pack,
struct_unpack,
format_bytes,
encodeFilename,
sanitize_open,
xpath_text,
@ -122,7 +122,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
self.read(1)
flags = self.read_unsigned_char()
live = flags & 0x20 != 0
# time scale
self.read_unsigned_int()
# CurrentMediaTime
@ -161,6 +162,7 @@ class FlvReader(io.BytesIO):
return {
'segments': segments,
'fragments': fragments,
'live': live,
}
def read_bootstrap_info(self):
@ -183,6 +185,10 @@ def build_fragments_list(boot_info):
for segment, fragments_count in segment_run_table['segment_run']:
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
if boot_info['live']:
res = res[-2:]
return res
@ -247,22 +253,43 @@ class F4mFD(FileDownloader):
self.report_error('Unsupported DRM')
return media
def _get_bootstrap_from_url(self, bootstrap_url):
bootstrap = self.ydl.urlopen(bootstrap_url).read()
return read_bootstrap_info(bootstrap)
def _update_live_fragments(self, bootstrap_url, latest_fragment):
fragments_list = []
retries = 30
while (not fragments_list) and (retries > 0):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
fragments_list = build_fragments_list(boot_info)
fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
if not fragments_list:
# Retry after a while
time.sleep(5.0)
retries -= 1
if not fragments_list:
self.report_error('Failed to update fragments')
return fragments_list
def _parse_bootstrap_node(self, node, base_url):
if node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, node.attrib['url'])
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = base64.b64decode(node.text.encode('ascii'))
boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url)
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
http_dl = HttpQuietDownloader(
self.ydl,
{
'continuedl': True,
'quiet': True,
'noprogress': True,
'ratelimit': self.params.get('ratelimit', None),
'test': self.params.get('test', False),
}
)
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
@ -277,18 +304,13 @@ class F4mFD(FileDownloader):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
if bootstrap_node.text is None:
bootstrap_url = compat_urlparse.urljoin(
base_url, bootstrap_node.attrib['url'])
bootstrap = self.ydl.urlopen(bootstrap_url).read()
else:
bootstrap = base64.b64decode(bootstrap_node.text)
boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text)
metadata = base64.b64decode(metadata_node.text.encode('ascii'))
else:
metadata = None
boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False):
@ -298,64 +320,112 @@ class F4mFD(FileDownloader):
# For some akamai manifests we'll need to add a query to the fragment url
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
self.report_destination(filename)
http_dl = HttpQuietDownloader(
self.ydl,
{
'continuedl': True,
'quiet': True,
'noprogress': True,
'ratelimit': self.params.get('ratelimit', None),
'test': self.params.get('test', False),
}
)
tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
write_flv_header(dest_stream)
write_metadata_tag(dest_stream, metadata)
if not live:
write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress
# hook
state = {
'status': 'downloading',
'downloaded_bytes': 0,
'frag_counter': 0,
'frag_index': 0,
'frag_count': total_frags,
'filename': filename,
'tmpfilename': tmpfilename,
}
start = time.time()
def frag_progress_hook(status):
frag_total_bytes = status.get('total_bytes', 0)
estimated_size = (state['downloaded_bytes'] +
(total_frags - state['frag_counter']) * frag_total_bytes)
if status['status'] == 'finished':
def frag_progress_hook(s):
if s['status'] not in ('downloading', 'finished'):
return
frag_total_bytes = s.get('total_bytes', 0)
if s['status'] == 'finished':
state['downloaded_bytes'] += frag_total_bytes
state['frag_counter'] += 1
progress = self.calc_percent(state['frag_counter'], total_frags)
byte_counter = state['downloaded_bytes']
state['frag_index'] += 1
estimated_size = (
(state['downloaded_bytes'] + frag_total_bytes) /
(state['frag_index'] + 1) * total_frags)
time_now = time.time()
state['total_bytes_estimate'] = estimated_size
state['elapsed'] = time_now - start
if s['status'] == 'finished':
progress = self.calc_percent(state['frag_index'], total_frags)
else:
frag_downloaded_bytes = status['downloaded_bytes']
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
frag_downloaded_bytes = s['downloaded_bytes']
frag_progress = self.calc_percent(frag_downloaded_bytes,
frag_total_bytes)
progress = self.calc_percent(state['frag_counter'], total_frags)
progress = self.calc_percent(state['frag_index'], total_frags)
progress += frag_progress / float(total_frags)
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
self.report_progress(progress, format_bytes(estimated_size),
status.get('speed'), eta)
state['eta'] = self.calc_eta(
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
state['speed'] = s.get('speed')
self._hook_progress(state)
http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = []
for (seg_i, frag_i) in fragments_list:
while fragments_list:
seg_i, frag_i = fragments_list.pop(0)
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name
if akamai_pv:
url += '?' + akamai_pv.strip(';')
frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
frags_filenames.append(frag_filename)
try:
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
if live:
os.remove(frag_filename)
else:
frags_filenames.append(frag_filename)
except (compat_urllib_error.HTTPError, ) as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i
self.report_warning(msg)
fragments_list = []
else:
raise
if not fragments_list and live and bootstrap_url:
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
total_frags += len(fragments_list)
if fragments_list and (fragments_list[0][1] > frag_i + 1):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
dest_stream.close()
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
elapsed = time.time() - start
self.try_rename(tmpfilename, filename)
for frag_file in frags_filenames:
os.remove(frag_file)
@ -366,6 +436,7 @@ class F4mFD(FileDownloader):
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
'elapsed': elapsed,
})
return True

View File

@ -23,15 +23,14 @@ class HlsFD(FileDownloader):
tmpfilename = self.temp_name(filename)
ffpp = FFmpegPostProcessor(downloader=self)
program = ffpp._executable
if program is None:
if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
ffpp.check_version()
args = [
encodeArgument(opt)
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
args.append(encodeFilename(tmpfilename, True))
retval = subprocess.call(args)
@ -48,7 +47,7 @@ class HlsFD(FileDownloader):
return True
else:
self.to_stderr('\n')
self.report_error('%s exited with code %d' % (program, retval))
self.report_error('%s exited with code %d' % (ffpp.basename, retval))
return False

View File

@ -1,10 +1,9 @@
from __future__ import unicode_literals
import os
import time
from socket import error as SocketError
import errno
import os
import socket
import time
from .common import FileDownloader
from ..compat import (
@ -15,7 +14,6 @@ from ..utils import (
ContentTooShortError,
encodeFilename,
sanitize_open,
format_bytes,
)
@ -94,6 +92,8 @@ class HttpFD(FileDownloader):
self._hook_progress({
'filename': filename,
'status': 'finished',
'downloaded_bytes': resume_len,
'total_bytes': resume_len,
})
return True
else:
@ -102,7 +102,7 @@ class HttpFD(FileDownloader):
resume_len = 0
open_mode = 'wb'
break
except SocketError as e:
except socket.error as e:
if e.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry
raise
@ -137,7 +137,6 @@ class HttpFD(FileDownloader):
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
@ -196,20 +195,19 @@ class HttpFD(FileDownloader):
# Progress message
speed = self.calc_speed(start, now, byte_counter - resume_len)
if data_len is None:
eta = percent = None
eta = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
'elapsed': now - start,
})
if is_test and byte_counter == data_len:
@ -221,7 +219,7 @@ class HttpFD(FileDownloader):
return False
if tmpfilename != '-':
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
@ -235,6 +233,7 @@ class HttpFD(FileDownloader):
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
'elapsed': time.time() - start,
})
return True

View File

@ -11,7 +11,6 @@ from ..compat import compat_str
from ..utils import (
check_executable,
encodeFilename,
format_bytes,
get_exe_version,
)
@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
time_now = time.time()
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = '~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
@ -120,7 +119,9 @@ class RtmpFD(FileDownloader):
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
basic_args = [
'rtmpdump', '--verbose', '-r', url,
'-o', encodeFilename(tmpfilename, True)]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:

View File

@ -6,7 +6,9 @@ from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .adobetv import AdobeTVIE
from .adultswim import AdultSwimIE
from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .anitube import AnitubeIE
@ -35,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
@ -48,14 +51,24 @@ from .brightcove import BrightcoveIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .cbssports import CBSSportsIE
from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .chirbit import (
ChirbitIE,
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
@ -73,7 +86,7 @@ from .collegehumor import CollegeHumorIE
from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE
@ -94,6 +107,7 @@ from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .dfb import DFBIE
from .dotsub import DotsubIE
from .douyutv import DouyuTVIE
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
@ -104,6 +118,7 @@ from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .dropbox import DropboxIE
from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
from .ehow import EHowIE
@ -115,6 +130,7 @@ from .ellentv import (
EllenTVClipsIE,
)
from .elpais import ElPaisIE
from .embedly import EmbedlyIE
from .empflix import EMPFlixIE
from .engadget import EngadgetIE
from .eporner import EpornerIE
@ -137,6 +153,7 @@ from .fktv import (
)
from .flickr import FlickrIE
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .fourtube import FourTubeIE
from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE
@ -161,6 +178,7 @@ from .gameone import (
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
from .giantbomb import GiantBombIE
@ -183,6 +201,7 @@ from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .historicfilms import HistoricFilmsIE
from .history import HistoryIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hornbunny import HornBunnyIE
from .hostingbulk import HostingBulkIE
@ -197,6 +216,7 @@ from .imdb import (
ImdbIE,
ImdbListIE
)
from .imgur import ImgurIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
@ -212,6 +232,8 @@ from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE
@ -223,6 +245,12 @@ from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .letv import (
LetvIE,
LetvTvIE,
LetvPlaylistIE
)
from .libsyn import LibsynIE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import (
@ -275,6 +303,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .nationalgeographic import NationalGeographicIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import (
@ -285,6 +314,7 @@ from .ndr import NDRIE
from .ndtv import NDTVIE
from .netzkino import NetzkinoIE
from .nerdcubed import NerdCubedFeedIE
from .nerdist import NerdistIE
from .newgrounds import NewgroundsIE
from .newstube import NewstubeIE
from .nextmedia import (
@ -311,15 +341,20 @@ from .nowvideo import NowVideoIE
from .npo import (
NPOIE,
NPOLiveIE,
NPORadioIE,
NPORadioFragmentIE,
TegenlichtVproIE,
)
from .nrk import (
NRKIE,
NRKPlaylistIE,
NRKTVIE,
)
from .ntv import NTVIE
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nytimes import NYTimesIE
from .nuvid import NuvidIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ooyala import OoyalaIE
from .openfilm import OpenFilmIE
@ -327,6 +362,7 @@ from .orf import (
ORFTVthekIE,
ORFOE1IE,
ORFFM4IE,
ORFIPTVIE,
)
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
@ -334,18 +370,26 @@ from .pbs import PBSIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
from .pladform import PladformIE
from .played import PlayedIE
from .playfm import PlayFMIE
from .playvid import PlayvidIE
from .playwire import PlaywireIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
from .pornhub import (
PornHubIE,
PornHubPlaylistIE,
)
from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
from .r7 import R7IE
from .radiode import RadioDeIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
@ -360,12 +404,12 @@ from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rtbf import RTBFIE
from .rte import RteIE
from .rtlnl import RtlXlIE
from .rtlnl import RtlNlIE
from .rtlnow import RTLnowIE
from .rtl2 import RTL2IE
from .rtp import RTPIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
@ -375,6 +419,7 @@ from .rutube import (
RutubePersonIE,
)
from .rutv import RUTVIE
from .sandia import SandiaIE
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
@ -405,7 +450,10 @@ from .soundcloud import (
SoundcloudUserIE,
SoundcloudPlaylistIE
)
from .soundgasm import SoundgasmIE
from .soundgasm import (
SoundgasmIE,
SoundgasmProfileIE
)
from .southpark import (
SouthParkIE,
SouthparkDeIE,
@ -419,12 +467,14 @@ from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .srmediathek import SRMediathekIE
from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
from .sunporno import SunPornoIE
from .svtplay import SVTPlayIE
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
@ -470,9 +520,11 @@ from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
from .tv4 import TV4IE
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twitch import (
TwitchVideoIE,
@ -488,6 +540,7 @@ from .udemy import (
UdemyIE,
UdemyCourseIE
)
from .ultimedia import UltimediaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
@ -511,6 +564,7 @@ from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
from .viewster import ViewsterIE
from .vimeo import (
VimeoIE,
VimeoAlbumIE,
@ -552,6 +606,7 @@ from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
from .wrzuta import WrzutaIE
from .wsj import WSJIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xhamster import XHamsterIE
@ -565,6 +620,12 @@ from .yahoo import (
YahooIE,
YahooSearchIE,
)
from .yam import YamIE
from .yandexmusic import (
YandexMusicTrackIE,
YandexMusicAlbumIE,
YandexMusicPlaylistIE,
)
from .yesjapan import YesJapanIE
from .ynet import YnetIE
from .youjizz import YouJizzIE
@ -588,6 +649,7 @@ from .youtube import (
YoutubeUserIE,
YoutubeWatchLaterIE,
)
from .zapiks import ZapiksIE
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import (
ZingMp3SongIE,

View File

@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._parse_json(
@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
self._html_search_meta('datepublished', webpage, 'upload date'))
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration')
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
self._html_search_meta('duration', webpage, 'duration') or
self._search_regex(
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',

View File

@ -2,13 +2,12 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
xpath_text,
float_or_none,
xpath_text,
)
@ -38,6 +37,7 @@ class AdultSwimIE(InfoExtractor):
},
],
'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
}
@ -55,9 +55,28 @@ class AdultSwimIE(InfoExtractor):
}
],
'info_dict': {
'id': '-t8CamQlQ2aYZ49ItZCFog',
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}, {
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
'playlist': [
{
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
'ext': 'flv',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
}
],
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
}]
@staticmethod
@ -78,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
return None, None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -88,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
webpage = self._download_webpage(url, episode_path)
# Extract the value of `bootstrappedData` from the Javascript in the page.
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
try:
bootstrappedData = json.loads(bootstrappedDataJS)
except ValueError as ve:
errmsg = '%s: Failed to parse JSON ' % episode_path
raise ExtractorError(errmsg, cause=ve)
bootstrapped_data = self._parse_json(self._search_regex(
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
collections = bootstrappedData['playlists']['collections']
collections = bootstrapped_data['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
else:
collections = bootstrappedData['show']['collections']
collections = bootstrapped_data['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
show = bootstrappedData['show']
# Video wasn't found in the collections, let's try `slugged_video`.
if video_info is None:
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
video_info = bootstrapped_data['slugged_video']
else:
raise ExtractorError('Unable to find video info')
show = bootstrapped_data['show']
show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]

View File

@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
xpath_with_ns,
xpath_text,
find_xpath_attr,
)
class AftenpostenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
'ext': 'mov',
'title': 'TRAILER: "Sweatshop" - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'timestamp': 1416927969,
'upload_date': '20141125',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_xml(
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
NS_MAP = {
'atom': 'http://www.w3.org/2005/Atom',
'xt': 'http://xstream.dk/',
'media': 'http://search.yahoo.com/mrss/',
}
entry = data.find(xpath_with_ns('./atom:entry', NS_MAP))
title = xpath_text(
entry, xpath_with_ns('./atom:title', NS_MAP), 'title')
description = xpath_text(
entry, xpath_with_ns('./atom:summary', NS_MAP), 'description')
timestamp = parse_iso8601(xpath_text(
entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date'))
formats = []
media_group = entry.find(xpath_with_ns('./media:group', NS_MAP))
for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)):
media_url = media_content.get('url')
if not media_url:
continue
tbr = int_or_none(media_content.get('bitrate'))
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', media_url)
if mobj:
formats.append({
'url': mobj.group('url'),
'play_path': 'mp4:%s' % mobj.group('playpath'),
'app': mobj.group('app'),
'ext': 'flv',
'tbr': tbr,
'format_id': 'rtmp-%d' % tbr,
})
else:
formats.append({
'url': media_url,
'tbr': tbr,
})
self._sort_formats(formats)
link = find_xpath_attr(
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
if link is not None:
formats.append({
'url': link.get('href'),
'format_id': link.get('rel'),
})
thumbnails = [{
'url': splash.get('url'),
'width': int_or_none(splash.get('width')),
'height': int_or_none(splash.get('height')),
} for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))]
return {
'id': video_id,
'title': title,
'description': description,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}

View File

@ -1,8 +1,6 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -21,9 +19,7 @@ class AftonbladetIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
video_id = mobj.group('video_id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# find internal video meta data

View File

@ -0,0 +1,74 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': 're:https://\w+\.cloudfront\.net/6x4q2w/poster\.jpg\?t=\d+',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
'location': 'SFO Commons',
'duration': 3780,
'view_count': int,
'categories': ['Main'],
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
timestamp = parse_iso8601(self._html_search_regex(
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
duration = parse_duration(self._search_regex(
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage),
'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}

View File

@ -20,6 +20,7 @@ class AparatIE(InfoExtractor):
'id': 'wP8On',
'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت',
'age_limit': 0,
},
# 'skip': 'Extremely unreliable',
}
@ -34,7 +35,8 @@ class AparatIE(InfoExtractor):
video_id + '/vt/frame')
webpage = self._download_webpage(embed_url, video_id)
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url)
res = self._request_webpage(
@ -46,7 +48,7 @@ class AparatIE(InfoExtractor):
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
thumbnail = self._search_regex(
r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
@ -54,4 +56,5 @@ class AparatIE(InfoExtractor):
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
'age_limit': self._family_friendly_search(webpage),
}

View File

@ -11,9 +11,12 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TESTS = [{
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
'info_dict': {
'id': 'manofsteel',
},
"playlist": [
{
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
},
},
]
}
}, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
}]
_JSON_RE = r'iTunes.playURL\((.*?)\);'

View File

@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
if re.search(r'[\?&]rss($|[=&])', url):
doc = parse_xml(webpage)
if doc.tag == 'rss':

View File

@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
formats.append(format)
self._check_formats(formats, video_id)
self._sort_formats(formats)
info_dict['formats'] = formats

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import time
import hmac
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse,
@ -17,8 +17,9 @@ from ..utils import (
)
class AtresPlayerIE(SubtitlesInfoExtractor):
class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
_NETRC_MACHINE = 'atresplayer'
_TESTS = [
{
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
@ -144,13 +145,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
subtitles = {}
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
if subtitle:
subtitles['es'] = subtitle
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
if subtitle_url:
subtitles['es'] = [{
'ext': 'srt',
'url': subtitle_url,
}]
return {
'id': video_id,
@ -159,5 +159,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles),
'subtitles': subtitles,
}

View File

@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
'duration': int(info['length']),
'view_count': int(info['views_total']),
'uploader': info['username'],
'uploader_id': info['uid'],
'uploader_id': info['owner']['uid'],
}

View File

@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor):
download_link = m_download.group(1)
video_id = self._search_regex(
r'var TralbumData = {.*?id: (?P<id>\d+),?$',
webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
webpage, 'video id')
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
# We get the dictionary of the track from some javascript code
info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
info = json.loads(info)[0]
all_info = self._parse_json(self._search_regex(
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
info = all_info[0]
# We pick mp3-320 for now, until format selection can be easily implemented.
mp3_info = info['downloads']['mp3-320']
# If we try to use this url it says the link has expired
initial_url = mp3_info['url']
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
m_url = re.match(re_url, initial_url)
m_url = re.match(
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
initial_url)
# We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be
# in the "download_url" key
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
final_url = self._search_regex(
r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL')
return {
'id': video_id,
@ -106,7 +109,7 @@ class BandcampIE(InfoExtractor):
class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@ -130,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
],
'info_dict': {
'title': 'Jazz Format Mixtape vol.1',
'id': 'jazz-format-mixtape-vol-1',
'uploader_id': 'blazo',
},
'params': {
'playlistend': 2
},
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
'skip': 'Bandcamp imposes download limits.'
}, {
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
'info_dict': {
'title': 'Hierophany of the Open Grave',
'uploader_id': 'nightbringer',
'id': 'hierophany-of-the-open-grave',
},
'playlist_mincount': 9,
}, {
'url': 'http://dotscale.bandcamp.com',
'info_dict': {
'title': 'Loom',
'id': 'dotscale',
'uploader_id': 'dotscale',
},
'playlist_mincount': 7,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('subdomain')
title = mobj.group('title')
display_id = title or playlist_id
webpage = self._download_webpage(url, display_id)
uploader_id = mobj.group('subdomain')
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
raise ExtractorError('The page doesn\'t contain any tracks')
@ -165,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
return {
'_type': 'playlist',
'uploader_id': uploader_id,
'id': playlist_id,
'display_id': display_id,
'title': title,
'entries': entries,
}

View File

@ -2,12 +2,12 @@ from __future__ import unicode_literals
import xml.etree.ElementTree
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..utils import ExtractorError
from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor):
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
formats.extend(conn_formats)
return formats
def _extract_captions(self, media, programme_id):
def _get_subtitles(self, media, programme_id):
subtitles = {}
for connection in self._extract_connections(media):
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
srt = ''
def _extract_text(p):
if p.text is not None:
stripped_text = p.text.strip()
if stripped_text:
return stripped_text
return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
for pos, p in enumerate(ps):
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
p.text.strip() if p.text is not None else '')
subtitles[lang] = srt
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
subtitles[lang] = [
{
'url': connection.get('href'),
'ext': 'ttml',
},
{
'data': srt,
'ext': 'srt',
},
]
return subtitles
def _download_media_selector(self, programme_id):
@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
subtitles = self.extract_subtitles(media, programme_id)
return formats, subtitles
@ -273,7 +288,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
formats, subtitles = self._download_media_selector(programme_id)
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
raise
# fallback to legacy playlist
@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)
return
self._sort_formats(formats)
return {

View File

@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
class BeatportProIE(InfoExtractor):
_VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
'md5': 'b3c34d8639a2f6a7f734382358478887',
'info_dict': {
'id': '5379371',
'display_id': 'synesthesia-original-mix',
'ext': 'mp4',
'title': 'Froxic - Synesthesia (Original Mix)',
},
}, {
'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
'info_dict': {
'id': '3756896',
'display_id': 'love-and-war-original-mix',
'ext': 'mp3',
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
},
}, {
'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
'md5': 'a1fd8e8046de3950fd039304c186c05f',
'info_dict': {
'id': '4991738',
'display_id': 'birds-original-mix',
'ext': 'mp4',
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
track_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
playables = self._parse_json(
self._search_regex(
r'window\.Playables\s*=\s*({.+?});', webpage,
'playables info', flags=re.DOTALL),
track_id)
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
if track['mix']:
title += ' (' + track['mix'] + ')'
formats = []
for ext, info in track['preview'].items():
if not info['url']:
continue
fmt = {
'url': info['url'],
'ext': ext,
'format_id': ext,
'vcodec': 'none',
}
if ext == 'mp3':
fmt['preference'] = 0
fmt['acodec'] = 'mp3'
fmt['abr'] = 96
fmt['asr'] = 44100
elif ext == 'mp4':
fmt['preference'] = 1
fmt['acodec'] = 'aac'
fmt['abr'] = 96
fmt['asr'] = 44100
formats.append(fmt)
self._sort_formats(formats)
images = []
for name, info in track['images'].items():
image_url = info.get('url')
if name == 'dynamic' or not image_url:
continue
image = {
'id': name,
'url': image_url,
'height': int_or_none(info.get('height')),
'width': int_or_none(info.get('width')),
}
images.append(image)
return {
'id': compat_str(track.get('id')) or track_id,
'display_id': track.get('slug') or display_id,
'title': title,
'formats': formats,
'thumbnails': images,
}

View File

@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
_TEST = {
'url': 'http://beeg.com/5416503',
'md5': '634526ae978711f6b748fe0dd6c11f57',
'md5': '1bff67111adb785c51d1b42959ec10e5',
'info_dict': {
'id': '5416503',
'ext': 'mp4',

View File

@ -1,40 +1,35 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import remove_start
from ..utils import (
remove_start,
int_or_none,
)
class BlinkxIE(InfoExtractor):
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
IE_NAME = 'blinkx'
_TEST = {
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
'md5': '2e9a07364af40163a908edbf10bb2492',
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
'md5': '337cf7a344663ec79bf93a526a2e06c7',
'info_dict': {
'id': '8aQUy7GV',
'id': 'Da0Gw3xc',
'ext': 'mp4',
'title': 'Police Car Rolls Away',
'uploader': 'stupidvideos.com',
'upload_date': '20131215',
'timestamp': 1387068000,
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
'duration': 14.886,
'thumbnails': [{
'width': 100,
'height': 76,
'resolution': '100x76',
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
}],
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
'uploader': 'IGN News',
'upload_date': '20150217',
'timestamp': 1424215740,
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
'duration': 47.743333,
},
}
def _real_extract(self, rl):
m = re.match(self._VALID_URL, rl)
video_id = m.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
display_id = video_id[:8]
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
tbr = vbr + abr if vbr and abr else None
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
formats.append({
'format_id': format_id,
'url': m['link'],
'vcodec': vcodec,
'acodec': acodec,
'abr': int(m['abr']) // 1000,
'vbr': int(m['vbr']) // 1000,
'abr': abr,
'vbr': vbr,
'tbr': tbr,
'width': int(m['w']),
'height': int(m['h']),
'width': int_or_none(m.get('w')),
'height': int_or_none(m.get('h')),
})
self._sort_formats(formats)

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_str,
@ -18,7 +17,7 @@ from ..utils import (
)
class BlipTVIE(SubtitlesInfoExtractor):
class BlipTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
_TESTS = [
@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
categories = [category.text for category in item.findall('category')]
formats = []
subtitles = {}
subtitles_urls = {}
media_group = item.find(media('group'))
for media_content in media_group.findall(media('content')):
@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
}
lang = role.rpartition('-')[-1].strip().lower()
langcode = LANGS.get(lang, lang)
subtitles[langcode] = url
subtitles_urls[langcode] = url
elif media_type.startswith('video/'):
formats.append({
'url': real_url,
@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
})
self._sort_formats(formats)
# subtitles
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, subtitles_urls)
return {
'id': video_id,
@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
'thumbnail': thumbnail,
'categories': categories,
'formats': formats,
'subtitles': video_subtitles,
'subtitles': subtitles,
}
def _download_subtitle_url(self, sub_lang, url):
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req.add_header('User-Agent', 'youtube-dl')
return self._download_webpage(req, None, note=False)
def _get_subtitles(self, video_id, subtitles_urls):
subtitles = {}
for lang, url in subtitles_urls.items():
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req.add_header('User-Agent', 'youtube-dl')
subtitles[lang] = [{
# The extension is 'srt' but it's actually an 'ass' file
'ext': 'ass',
'data': self._download_webpage(req, None, note=False),
}]
return subtitles
class BlipTVUserIE(InfoExtractor):

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
_TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
name = self._match_id(url)
webpage = self._download_webpage(url, name)
f4m_url = self._search_regex(
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
'f4m url')

View File

@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
'tbr': media['bitRate'],
'width': media['width'],
'height': media['height'],
} for media in info['media']]
} for media in info['media'] if media.get('mediaPurpose') == 'play']
if not formats:
formats.append({

View File

@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
'info_dict': {
'title': 'Sealife',
'id': '3550319591001',
},
'playlist_mincount': 7,
},
@ -108,7 +109,7 @@ class BrightcoveIE(InfoExtractor):
"""
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
lambda m: m.group(1) + '/>', object_str)
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
object_str = object_str.replace('<--', '<!--')
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
playlist_info = json_data['videoList']
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
return self.playlist_result(videos, playlist_id=playlist_info['id'],
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
def _extract_video_info(self, video_info):

View File

@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
'skip_download': True, # Got enough YouTube download tests
},
'info_dict': {
'id': 'look-at-this-cute-dog-omg',
'description': 're:Munchkin the Teddy Bear is back ?!',
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
},
@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20141124',
'uploader_id': 'CindysMunchkin',
'description': 're:© 2014 Munchkin the Shih Tzu',
'uploader': 'Munchkin the Shih Tzu',
'description': 're:© 2014 Munchkin the',
'uploader': 're:^Munchkin the',
'title': 're:Munchkin the Teddy Bear gets her exercise',
},
}]

View File

@ -0,0 +1,153 @@
# coding: utf-8
from __future__ import unicode_literals
import datetime
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import (
parse_iso8601,
str_to_int,
)
class CamdemyIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
_TESTS = [{
# single file
'url': 'http://www.camdemy.com/media/5181/',
'md5': '5a5562b6a98b37873119102e052e311b',
'info_dict': {
'id': '5181',
'ext': 'mp4',
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
'thumbnail': 're:^https?://.*\.jpg$',
'description': '',
'creator': 'ss11spring',
'upload_date': '20130114',
'timestamp': 1358154556,
'view_count': int,
}
}, {
# With non-empty description
'url': 'http://www.camdemy.com/media/13885',
'md5': '4576a3bb2581f86c61044822adbd1249',
'info_dict': {
'id': '13885',
'ext': 'mp4',
'title': 'EverCam + Camdemy QuickStart',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
'creator': 'evercam',
'upload_date': '20140620',
'timestamp': 1403271569,
}
}, {
# External source
'url': 'http://www.camdemy.com/media/14842',
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
'info_dict': {
'id': '2vsYQzNIsJo',
'ext': 'mp4',
'upload_date': '20130211',
'uploader': 'Hun Kim',
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
'uploader_id': 'hunkimtutorials',
'title': 'Excel 2013 Tutorial - How to add Password Protection',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
src_from = self._html_search_regex(
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
'external source', default=None)
if src_from:
return self.url_result(src_from)
oembed_obj = self._download_json(
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
thumb_url = oembed_obj['thumbnail_url']
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
file_list_doc = self._download_xml(
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
video_id, 'Filelist XML')
file_name = file_list_doc.find('./video/item/fileName').text
video_url = compat_urlparse.urljoin(video_folder, file_name)
timestamp = parse_iso8601(self._html_search_regex(
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
page, 'creation time', fatal=False),
delimiter=' ', timezone=datetime.timedelta(hours=8))
view_count = str_to_int(self._html_search_regex(
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
page, 'view count', fatal=False))
return {
'id': video_id,
'url': video_url,
'title': oembed_obj['title'],
'thumbnail': thumb_url,
'description': self._html_search_meta('description', page),
'creator': oembed_obj['author_name'],
'duration': oembed_obj['duration'],
'timestamp': timestamp,
'view_count': view_count,
}
class CamdemyFolderIE(InfoExtractor):
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
_TESTS = [{
# links with trailing slash
'url': 'http://www.camdemy.com/folder/450',
'info_dict': {
'id': '450',
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
},
'playlist_mincount': 145
}, {
# links without trailing slash
# and multi-page
'url': 'http://www.camdemy.com/folder/853',
'info_dict': {
'id': '853',
'title': '科學計算 - 使用 Matlab'
},
'playlist_mincount': 20
}, {
# with displayMode parameter. For testing the codes to add parameters
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
'info_dict': {
'id': '853',
'title': '科學計算 - 使用 Matlab'
},
'playlist_mincount': 20
}]
def _real_extract(self, url):
folder_id = self._match_id(url)
# Add displayMode=list so that all links are displayed in a single page
parsed_url = list(compat_urlparse.urlparse(url))
query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
query.update({'displayMode': 'list'})
parsed_url[4] = compat_urllib_parse.urlencode(query)
final_url = compat_urlparse.urlunparse(parsed_url)
page = self._download_webpage(final_url, folder_id)
matches = re.findall(r"href='(/media/\d+/?)'", page)
entries = [self.url_result('http://www.camdemy.com' + media_path)
for media_path in matches]
folder_title = self._html_search_meta('keywords', page)
return self.playlist_result(entries, folder_id, folder_title)

View File

@ -15,12 +15,13 @@ from ..utils import (
class CanalplusIE(InfoExtractor):
IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
_VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
_SITE_ID_MAP = {
'canalplus.fr': 'cplus',
'piwiplus.fr': 'teletoon',
'd8.tv': 'd8',
'itele.fr': 'itele',
}
_TESTS = [{
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
'upload_date': '20131108',
},
'skip': 'videos get deleted after a while',
}, {
'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
'md5': '65aa83ad62fe107ce29e564bb8712580',
'info_dict': {
'id': '1213714',
'ext': 'flv',
'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
'description': 'md5:8216206ec53426ea6321321f3b3c16db',
'upload_date': '20150211',
},
}]
def _real_extract(self, url):

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';",

View File

@ -0,0 +1,30 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class CBSSportsIE(InfoExtractor):
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
_TEST = {
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
'info_dict': {
'id': '_d5_GbO8p1sT',
'ext': 'flv',
'title': 'US Open flashbacks: 1990s',
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
section = mobj.group('section')
video_id = mobj.group('id')
all_videos = self._download_json(
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
video_id)
# The json file contains the info of all the videos in the section
video_info = next(v for v in all_videos if v['pcid'] == video_id)
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')

View File

@ -0,0 +1,99 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
qualities,
unified_strdate,
)
class CCCIE(InfoExtractor):
IE_NAME = 'media.ccc.de'
_VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html'
_TEST = {
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
'info_dict': {
'id': '20131228183',
'ext': 'mp4',
'title': 'Introduction to Processor Design',
'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b',
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
'upload_date': '20131229',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if self._downloader.params.get('prefer_free_formats'):
preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd'])
else:
preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd'])
title = self._html_search_regex(
r'(?s)<h1>(.*?)</h1>', webpage, 'title')
description = self._html_search_regex(
r"(?s)<p class='description'>(.*?)</p>",
webpage, 'description', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>",
webpage, 'upload date', fatal=False))
view_count = int_or_none(self._html_search_regex(
r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",
webpage, 'view count', fatal=False))
matches = re.finditer(r'''(?xs)
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
<a\s+href='(?P<http_url>[^']+)'>\s*
(?:
.*?
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
)?''', webpage)
formats = []
for m in matches:
format = m.group('format')
format_id = self._search_regex(
r'.*/([a-z0-9_-]+)/[^/]*$',
m.group('http_url'), 'format id', default=None)
vcodec = 'h264' if 'h264' in format_id else (
'none' if format_id in ('mp3', 'opus') else None
)
formats.append({
'format_id': format_id,
'format': format,
'url': m.group('http_url'),
'vcodec': vcodec,
'preference': preference(format_id),
})
if m.group('torrent_url'):
formats.append({
'format_id': 'torrent-%s' % (format if format_id is None else format_id),
'format': '%s (torrent)' % format,
'proto': 'torrent',
'format_note': '(unsupported; will just download the .torrent file)',
'vcodec': vcodec,
'preference': -100 + preference(format_id),
'url': m.group('torrent_url'),
})
self._sort_formats(formats)
thumbnail = self._html_search_regex(
r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
'upload_date': upload_date,
'formats': formats,
}

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
@ -15,7 +15,7 @@ from ..utils import (
)
class CeskaTelevizeIE(SubtitlesInfoExtractor):
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
_TESTS = [
@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
subtitles = {}
subs = item.get('subtitles')
if subs:
subtitles['cs'] = subs[0]['url']
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
subtitles = self.extract_subtitles(episode_id, subs)
return {
'id': episode_id,
@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
'subtitles': subtitles,
}
def _get_subtitles(self, episode_id, subs):
original_subtitles = self._download_webpage(
subs[0]['url'], episode_id, 'Downloading subtitles')
srt_subs = self._fix_subtitles(original_subtitles)
return {
'cs': [{
'ext': 'srt',
'data': srt_subs,
}]
}
@staticmethod
def _fix_subtitles(subtitles):
""" Convert millisecond-based subtitles to SRT """
if subtitles is None:
return subtitles # subtitles not requested
def _msectotimecode(msec):
""" Helper utility to convert milliseconds to timecode """
@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
else:
yield line
fixed_subtitles = {}
for k, v in subtitles.items():
fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
return fixed_subtitles
return "\r\n".join(_fix_subtitle(subtitles))

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
int_or_none,
)
class ChirbitIE(InfoExtractor):
IE_NAME = 'chirbit'
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
_TESTS = [{
'url': 'http://chirb.it/PrIPv5',
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
'info_dict': {
'id': 'PrIPv5',
'ext': 'mp3',
'title': 'Фасадстрой',
'duration': 52,
'view_count': int,
'comment_count': int,
}
}, {
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
'only_matching': True,
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
webpage = self._download_webpage(
'http://chirb.it/%s' % audio_id, audio_id)
audio_url = self._search_regex(
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
title = self._search_regex(
r'itemprop="name">([^<]+)', webpage, 'title')
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex(
r'itemprop="playCount"\s*>(\d+)', webpage,
'listen count', fatal=False))
comment_count = int_or_none(self._search_regex(
r'>(\d+) Comments?:', webpage,
'comment count', fatal=False))
return {
'id': audio_id,
'url': audio_url,
'title': title,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
}
class ChirbitProfileIE(InfoExtractor):
IE_NAME = 'chirbit:profile'
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
_TEST = {
'url': 'http://chirbit.com/ScarletBeauty',
'info_dict': {
'id': 'ScarletBeauty',
'title': 'Chirbits by ScarletBeauty',
},
'playlist_mincount': 3,
}
def _real_extract(self, url):
profile_id = self._match_id(url)
rss = self._download_xml(
'http://chirbit.com/rss/%s' % profile_id, profile_id)
entries = [
self.url_result(audio_url.text, 'Chirbit')
for audio_url in rss.findall('./channel/item/link')]
title = rss.find('./channel/title').text
return self.playlist_result(entries, profile_id, title)

View File

@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
file_key = self._search_regex(
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
webpage, 'file_key')
return self._extract_video(video_host, video_id, file_key)

View File

@ -250,6 +250,8 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
})
self._sort_formats(formats)
subtitles = self._extract_subtitles(cdoc, guid)
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
entries.append({
'id': guid,
@ -260,6 +262,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
'duration': duration,
'thumbnail': thumbnail,
'description': description,
'subtitles': subtitles,
})
return {

View File

@ -27,7 +27,6 @@ from ..utils import (
compiled_regex_type,
ExtractorError,
float_or_none,
HEADRequest,
int_or_none,
RegexNotFoundError,
sanitize_filename,
@ -145,17 +144,25 @@ class InfoExtractor(object):
thumbnail: Full URL to a video thumbnail image.
description: Full video description.
uploader: Full name of the video uploader.
creator: The main artist who created the video.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
location: Physical location where the video was filmed.
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
subtitles: The available subtitles as a dictionary in the format
{language: subformats}. "subformats" is a list sorted from
lower to higher preference, each element is a dictionary
with the "ext" entry and one of:
* "data": The subtitles file contents
* "url": A url pointing to the subtitles file
automatic_captions: Like 'subtitles', used by the YoutubeIE for
automatically generated captions
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
average_rating: Average rating give by users, the scale used depends on the webpage
comment_count: Number of comments on the video
comments: A list of comments, each with one or more of the following
properties (all but one of text or html optional):
@ -263,8 +270,15 @@ class InfoExtractor(object):
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
self.initialize()
return self._real_extract(url)
try:
self.initialize()
return self._real_extract(url)
except ExtractorError:
raise
except compat_http_client.IncompleteRead as e:
raise ExtractorError('A network error has occured.', cause=e, expected=True)
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occured.', cause=e)
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
@ -383,6 +397,16 @@ class InfoExtractor(object):
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in content[:512]:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
return content
@ -506,7 +530,7 @@ class InfoExtractor(object):
if mobj:
break
if os.name != 'nt' and sys.stderr.isatty():
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
@ -655,6 +679,21 @@ class InfoExtractor(object):
}
return RATING_TABLE.get(rating.lower(), None)
def _family_friendly_search(self, html):
# See http://schema.org/VideoObject
family_friendly = self._html_search_meta('isFamilyFriendly', html)
if not family_friendly:
return None
RATING_TABLE = {
'1': 0,
'true': 0,
'0': 18,
'false': 18,
}
return RATING_TABLE.get(family_friendly.lower(), None)
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
'twitter card player')
@ -704,15 +743,15 @@ class InfoExtractor(object):
preference,
f.get('language_preference') if f.get('language_preference') is not None else -1,
f.get('quality') if f.get('quality') is not None else -1,
f.get('tbr') if f.get('tbr') is not None else -1,
f.get('filesize') if f.get('filesize') is not None else -1,
f.get('vbr') if f.get('vbr') is not None else -1,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
ext_preference,
f.get('tbr') if f.get('tbr') is not None else -1,
f.get('vbr') if f.get('vbr') is not None else -1,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('fps') if f.get('fps') is not None else -1,
f.get('filesize') if f.get('filesize') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('source_preference') if f.get('source_preference') is not None else -1,
f.get('format_id'),
@ -728,10 +767,12 @@ class InfoExtractor(object):
formats)
def _is_valid_url(self, url, video_id, item='video'):
url = self._proto_relative_url(url, scheme='http:')
# For now assume non HTTP(S) URLs always valid
if not (url.startswith('http://') or url.startswith('https://')):
return True
try:
self._request_webpage(
HEADRequest(url), video_id,
'Checking %s URL' % item)
self._request_webpage(url, video_id, 'Checking %s URL' % item)
return True
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
@ -764,7 +805,7 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
def _extract_f4m_formats(self, manifest_url, video_id):
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None):
manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest')
@ -777,30 +818,32 @@ class InfoExtractor(object):
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
for i, media_el in enumerate(media_nodes):
if manifest_version == '2.0':
manifest_url = '/'.join(manifest_url.split('/')[:-1]) + '/' + media_el.attrib.get('href')
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
(media_el.attrib.get('href') or media_el.attrib.get('url')))
tbr = int_or_none(media_el.attrib.get('bitrate'))
format_id = 'f4m-%d' % (i if tbr is None else tbr)
formats.append({
'format_id': format_id,
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
'url': manifest_url,
'ext': 'flv',
'tbr': tbr,
'width': int_or_none(media_el.attrib.get('width')),
'height': int_or_none(media_el.attrib.get('height')),
'preference': preference,
})
self._sort_formats(formats)
return formats
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None):
entry_protocol='m3u8', preference=None,
m3u8_id=None):
formats = [{
'format_id': 'm3u8-meta',
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
'preference': -1,
'preference': preference - 1 if preference else -1,
'resolution': 'multiple',
'format_note': 'Quality selection URL',
}]
@ -815,6 +858,7 @@ class InfoExtractor(object):
note='Downloading m3u8 information',
errnote='Failed to download m3u8 information')
last_info = None
last_media = None
kv_rex = re.compile(
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
for line in m3u8_doc.splitlines():
@ -825,6 +869,13 @@ class InfoExtractor(object):
if v.startswith('"'):
v = v[1:-1]
last_info[m.group('key')] = v
elif line.startswith('#EXT-X-MEDIA:'):
last_media = {}
for m in kv_rex.finditer(line):
v = m.group('val')
if v.startswith('"'):
v = v[1:-1]
last_media[m.group('key')] = v
elif line.startswith('#') or not line.strip():
continue
else:
@ -832,9 +883,13 @@ class InfoExtractor(object):
formats.append({'url': format_url(line)})
continue
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
format_id = []
if m3u8_id:
format_id.append(m3u8_id)
last_media_name = last_media.get('NAME') if last_media else None
format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
f = {
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
'format_id': '-'.join(format_id),
'url': format_url(line.strip()),
'tbr': tbr,
'ext': ext,
@ -854,54 +909,78 @@ class InfoExtractor(object):
width_str, height_str = resolution.split('x')
f['width'] = int(width_str)
f['height'] = int(height_str)
if last_media is not None:
f['m3u8_media'] = last_media
last_media = None
formats.append(f)
last_info = {}
self._sort_formats(formats)
return formats
# TODO: improve extraction
def _extract_smil_formats(self, smil_url, video_id):
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
smil = self._download_xml(
smil_url, video_id, 'Downloading SMIL file',
'Unable to download SMIL file')
'Unable to download SMIL file', fatal=fatal)
if smil is False:
assert not fatal
return []
base = smil.find('./head/meta').get('base')
formats = []
rtmp_count = 0
for video in smil.findall('./body/switch/video'):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
formats.append({
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
})
if smil.findall('./body/seq/video'):
video = smil.findall('./body/seq/video')[0]
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
else:
for video in smil.findall('./body/switch/video'):
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
formats.extend(fmts)
self._sort_formats(formats)
return formats
def _parse_smil_video(self, video, video_id, base, rtmp_count):
src = video.get('src')
if not src:
return ([], rtmp_count)
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
return ([{
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
elif proto.startswith('http'):
return ([{
'url': base + src,
'ext': ext or 'flv',
'tbr': bitrate,
'width': width,
'height': height,
}], rtmp_count)
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()
@ -965,6 +1044,27 @@ class InfoExtractor(object):
any_restricted = any_restricted or is_restricted
return not any_restricted
def extract_subtitles(self, *args, **kwargs):
if (self._downloader.params.get('writesubtitles', False) or
self._downloader.params.get('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
def _get_subtitles(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) or
self._downloader.params.get('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
def _subtitles_timecode(self, seconds):
return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
class SearchInfoExtractor(InfoExtractor):
"""

View File

@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor):
'That doesn\'t make any sense. '
'Simply remove the parameter in your command or configuration.'
) % url
if self._downloader.params.get('verbose'):
if not self._downloader.params.get('verbose'):
msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
raise ExtractorError(msg, expected=True)
class UnicodeBOMIE(InfoExtractor):
IE_DESC = False
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
_TESTS = [{
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
'only_matching': True,
}]
def _real_extract(self, url):
real_url = self._match_id(url)
self.report_warning(
'Your URL starts with a Byte Order Mark (BOM). '
'Removing the BOM and looking for "%s" ...' % real_url)
return self.url_result(real_url)

View File

@ -9,7 +9,7 @@ import xml.etree.ElementTree
from hashlib import sha1
from math import pow, sqrt, floor
from .subtitles import SubtitlesInfoExtractor
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
@ -23,13 +23,12 @@ from ..utils import (
)
from ..aes import (
aes_cbc_decrypt,
inc,
)
from .common import InfoExtractor
class CrunchyrollIE(SubtitlesInfoExtractor):
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_NETRC_MACHINE = 'crunchyroll'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
@ -102,13 +101,6 @@ class CrunchyrollIE(SubtitlesInfoExtractor):
key = obfuscate_key(id)
class Counter:
__value = iv
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
@ -187,6 +179,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return output
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
sub_root = xml.etree.ElementTree.fromstring(subtitle)
subtitles[lang_code] = [
{
'ext': 'srt',
'data': self._convert_subtitles_to_srt(sub_root),
},
{
'ext': 'ass',
'data': self._convert_subtitles_to_ass(sub_root),
},
]
return subtitles
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
@ -249,34 +273,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'format_id': video_format,
})
subtitles = {}
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage(
'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
video_id, note='Downloading subtitles for ' + sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
sub_root = xml.etree.ElementTree.fromstring(subtitle)
if sub_format == 'ass':
subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
else:
subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
subtitles = self.extract_subtitles(video_id, webpage)
return {
'id': video_id,

View File

@ -6,7 +6,6 @@ import json
import itertools
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_str,
@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
@ -47,13 +46,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
_TESTS = [
{
'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
'md5': '392c4b85a60a90dc4792da41ce3144eb',
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
'md5': '2137c41a8e78554bb09225b8eb322406',
'info_dict': {
'id': 'x33vw9',
'id': 'x2iuewm',
'ext': 'mp4',
'uploader': 'Amphora Alex and Van .',
'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
'uploader': 'IGN',
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
}
},
# Vevo video
@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
# subtitles
video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, webpage)
return
view_count = str_to_int(self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)',
@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
'view_count': view_count,
}
def _get_available_subtitles(self, video_id, webpage):
def _get_subtitles(self, video_id, webpage):
try:
sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
return sub_lang_list
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
@ -194,6 +190,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
'id': 'xv4bw_nqtv_sport',
},
'playlist_mincount': 20,
}]

View File

@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade'
},
'params': {
# rtmp download
'skip_download': True,
}
}

View File

@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id)
json_url = (
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
video_id)
info = self._download_json(json_url, title, 'Downloading JSON config')
video_url = info['renditions'][0]['url']

View File

@ -1,13 +1,14 @@
from __future__ import unicode_literals
import re
import time
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
)
class DotsubIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_TEST = {
'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'md5': '0914d4d69605090f623b7ac329fea66e',
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
'ext': 'flv',
'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
'duration': 3169,
'uploader': '4v4l0n42',
'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
'timestamp': 1292248482.625,
'upload_date': '20101213',
'view_count': int,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
info = self._download_json(info_url, video_id)
date = time.gmtime(info['dateCreated'] / 1000) # The timestamp is in miliseconds
video_id = self._match_id(url)
info = self._download_json(
'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
video_url = info.get('mediaURI')
if not video_url:
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
return {
'id': video_id,
'url': info['mediaURI'],
'url': video_url,
'ext': 'flv',
'title': info['title'],
'thumbnail': info['screenshotURI'],
'description': info['description'],
'uploader': info['user'],
'view_count': info['numberOfViews'],
'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
'description': info.get('description'),
'thumbnail': info.get('screenshotURI'),
'duration': int_or_none(info.get('duration'), 1000),
'uploader': info.get('user'),
'timestamp': float_or_none(info.get('dateCreated'), 1000),
'view_count': int_or_none(info.get('numberOfViews')),
}

View File

@ -0,0 +1,77 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class DouyuTVIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
_TEST = {
'url': 'http://www.douyutv.com/iseven',
'info_dict': {
'id': 'iseven',
'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:9e525642c25a0a24302869937cf69d17',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅',
'uploader_id': '431925',
'is_live': True,
},
'params': {
'skip_download': True,
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
config = self._download_json(
'http://www.douyutv.com/api/client/room/%s' % video_id, video_id)
data = config['data']
error_code = config.get('error', 0)
show_status = data.get('show_status')
if error_code is not 0:
raise ExtractorError(
'Server reported error %i' % error_code, expected=True)
# 1 = live, 2 = offline
if show_status == '2':
raise ExtractorError(
'Live stream is offline', expected=True)
base_url = data['rtmp_url']
live_path = data['rtmp_live']
title = self._live_title(data['room_name'])
description = data.get('show_details')
thumbnail = data.get('room_src')
uploader = data.get('nickname')
uploader_id = data.get('owner_uid')
multi_formats = data.get('rtmp_multi_bitrate')
if not isinstance(multi_formats, dict):
multi_formats = {}
multi_formats['live'] = live_path
formats = [{
'url': '%s/%s' % (base_url, format_path),
'format_id': format_id,
'preference': 1 if format_id == 'live' else 0,
} for format_id, format_path in multi_formats.items()]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'formats': formats,
'is_live': True,
}

View File

@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
'id': '1740434',
'display_id': 'hot-perky-blonde-naked-golf',
'ext': 'mp4',
'title': 'Hot Perky Blonde Naked Golf',
'title': 'hot perky blonde naked golf',
'like_count': int,
'dislike_count': int,
'comment_count': int,
@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
webpage, 'title')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"',

View File

@ -1,11 +1,10 @@
from __future__ import unicode_literals
from .subtitles import SubtitlesInfoExtractor
from .common import ExtractorError
from .common import InfoExtractor, ExtractorError
from ..utils import parse_iso8601
class DRTVIE(SubtitlesInfoExtractor):
class DRTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
_TEST = {
@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
}
for subs in subtitles_list:
lang = subs['Language']
subtitles[LANGS.get(lang, lang)] = subs['Uri']
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
if not formats and restricted_to_denmark:
raise ExtractorError(
@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
self._sort_formats(formats)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return {
'id': video_id,
'title': title,
@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
'timestamp': timestamp,
'duration': duration,
'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles),
'subtitles': subtitles,
}

View File

@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
)
class EaglePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
eagleplatform:(?P<custom_host>[^/]+):|
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
)
(?P<id>\d+)
'''
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
}, {
# http://muz-tv.ru/play/7129/
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
'url': 'eagleplatform:media.clipyou.ru:12820',
'md5': '6c2ebeab03b739597ce8d86339d5a905',
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
}]
def _handle_error(self, response):
status = int_or_none(response.get('status', 200))
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
self._handle_error(response)
return response
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
player_data = self._download_json(
'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
title = media['title']
description = media.get('description')
thumbnail = media.get('snapshot')
duration = int_or_none(media.get('duration'))
view_count = int_or_none(media.get('views'))
age_restriction = media.get('age_restriction')
age_limit = None
if age_restriction:
age_limit = 0 if age_restriction == 'allow_all' else 18
m3u8_data = self._download_json(
media['sources']['secure_m3u8']['auto'],
video_id, 'Downloading m3u8 JSON')
formats = self._extract_m3u8_formats(
m3u8_data['data'][0], video_id,
'mp4', entry_protocol='m3u8_native')
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import json
import random
import re
from .common import InfoExtractor
from ..compat import (
@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
json_like = self._search_regex(
r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
data = json.loads(json_like)
data = self._parse_json(
self._search_regex(
r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
playlist_id)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
duration = data['duration']
avg_song_duration = float(duration) / track_count
# duration is sometimes negative, use predefined avg duration
if avg_song_duration <= 0:
avg_song_duration = 300
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
entries = []

View File

@ -0,0 +1,16 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
class EmbedlyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
_TESTS = [{
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
'only_matching': True,
}]
def _real_extract(self, url):
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))

View File

@ -35,10 +35,7 @@ class EpornerIE(InfoExtractor):
title = self._html_search_regex(
r'<title>(.*?) - EPORNER', webpage, 'title')
redirect_code = self._html_search_regex(
r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id,
webpage, 'redirect_code')
redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
player_code = self._download_webpage(
redirect_url, display_id, note='Downloading player config')
@ -69,5 +66,5 @@ class EpornerIE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'formats': formats,
'age_limit': self._rta_search(webpage),
'age_limit': 18,
}

View File

@ -1,18 +1,20 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import (
ExtractorError,
js_to_json,
parse_duration,
)
class EscapistIE(InfoExtractor):
_VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
_TEST = {
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -20,64 +22,107 @@ class EscapistIE(InfoExtractor):
'id': '6618',
'ext': 'mp4',
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
'uploader': 'the-escapist-presents',
'uploader_id': 'the-escapist-presents',
'uploader': 'The Escapist Presents',
'title': "Breaking Down Baldur's Gate",
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 264,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
showName = mobj.group('showname')
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage_req = compat_urllib_request.Request(url)
webpage_req.add_header('User-Agent', self._USER_AGENT)
webpage = self._download_webpage(webpage_req, video_id)
self.report_extraction(video_id)
webpage = self._download_webpage(url, video_id)
uploader_id = self._html_search_regex(
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
webpage, 'uploader ID', fatal=False)
uploader = self._html_search_regex(
r"<h1\s+class='headline'>(.*?)</a>",
webpage, 'uploader', fatal=False)
description = self._html_search_meta('description', webpage)
duration = parse_duration(self._html_search_meta('duration', webpage))
videoDesc = self._html_search_regex(
r'<meta name="description" content="([^"]*)"',
webpage, 'description', fatal=False)
raw_title = self._html_search_meta('title', webpage, fatal=True)
title = raw_title.partition(' : ')[2]
playerUrl = self._og_search_video_url(webpage, name='player URL')
title = self._html_search_regex(
r'<meta name="title" content="([^"]*)"',
webpage, 'title').split(' : ')[-1]
configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
configUrl = compat_urllib_parse.unquote(configUrl)
config_url = compat_urllib_parse.unquote(self._html_search_regex(
r'''(?x)
(?:
<param\s+name="flashvars".*?\s+value="config=|
flashvars=&quot;config=
)
(https?://[^"&]+)
''',
webpage, 'config URL'))
formats = []
ad_formats = []
def _add_format(name, cfgurl, quality):
def _add_format(name, cfg_url, quality):
cfg_req = compat_urllib_request.Request(cfg_url)
cfg_req.add_header('User-Agent', self._USER_AGENT)
config = self._download_json(
cfgurl, video_id,
cfg_req, video_id,
'Downloading ' + name + ' configuration',
'Unable to download ' + name + ' configuration',
transform_source=lambda s: s.replace("'", '"'))
transform_source=js_to_json)
playlist = config['playlist']
formats.append({
'url': playlist[1]['url'],
'format_id': name,
'quality': quality,
})
for p in playlist:
if p.get('eventCategory') == 'Video':
ar = formats
elif p.get('eventCategory') == 'Video Postroll':
ar = ad_formats
else:
continue
_add_format('normal', configUrl, quality=0)
hq_url = (configUrl +
('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
ar.append({
'url': p['url'],
'format_id': name,
'quality': quality,
'http_headers': {
'User-Agent': self._USER_AGENT,
},
})
_add_format('normal', config_url, quality=0)
hq_url = (config_url +
('&hq=1' if '?' in config_url else config_url + '?hq=1'))
try:
_add_format('hq', hq_url, quality=1)
except ExtractorError:
pass # That's fine, we'll just use normal quality
self._sort_formats(formats)
return {
if '/escapist/sales-marketing/' in formats[-1]['url']:
raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
res = {
'id': video_id,
'formats': formats,
'uploader': showName,
'uploader': uploader,
'uploader_id': uploader_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'description': videoDesc,
'player_url': playerUrl,
'description': description,
'duration': duration,
}
if self._downloader.params.get('include_ads') and ad_formats:
self._sort_formats(ad_formats)
ad_res = {
'id': '%s-ad' % video_id,
'title': '%s (Postroll)' % title,
'formats': ad_formats,
}
return {
'_type': 'playlist',
'entries': [res, ad_res],
'title': title,
'id': video_id,
}
return res

View File

@ -4,11 +4,11 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_parse_qs,
compat_urllib_request,
compat_urllib_parse,
)
from ..utils import (
qualities,
str_to_int,
)
@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
'info_dict': {
'id': '652431',
'ext': 'mp4',
@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor):
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
webpage, 'view count', fatal=False))
video_url = compat_urllib_parse.unquote(self._html_search_regex(
r'video_url=(.+?)&amp;', webpage, 'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
format = path.split('/')[5].split('_')[:2]
format = "-".join(format)
flash_vars = compat_parse_qs(self._search_regex(
r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
formats = []
quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
for k, vals in flash_vars.items():
m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
if m is not None:
formats.append({
'format_id': m.group('quality'),
'quality': quality(m.group('quality')),
'url': vals[0],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'formats': formats,
'uploader': uploader,
'view_count': view_count,
'url': video_url,
'format': format,
'format_id': format,
'age_limit': 18,
}

View File

@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor):
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_data = params['video_data'][0]
video_url = video_data.get('hd_src')
if not video_url:
video_url = video_data['sd_src']
if not video_url:
raise ExtractorError('Cannot find video URL')
formats = []
for quality in ['sd', 'hd']:
src = video_data.get('%s_src' % quality)
if src is not None:
formats.append({
'format_id': quality,
'url': src,
})
if not formats:
raise ExtractorError('Cannot find video formats')
video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title',
@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor):
return {
'id': video_id,
'title': video_title,
'url': video_url,
'formats': formats,
'duration': int_or_none(video_data.get('video_duration')),
'thumbnail': video_data.get('thumbnail_src'),
}

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor):
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
title = self._html_search_meta('twitter:title', page, 'title')
title = self._html_search_meta('twitter:title', page, 'title', fatal=True)
description = self._html_search_meta('twitter:description', page, 'title')
data = self._download_xml(
@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor):
'height': int(details.find('./height').text.strip()),
} for details in item.findall('./source/file_details') if details.find('./file').text
]
self._sort_formats(formats)
return {
'id': video_id,

View File

@ -1,52 +1,71 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
class FirstTVIE(InfoExtractor):
IE_NAME = 'firsttv'
IE_DESC = 'Видеоархив - Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
IE_NAME = '1tv'
IE_DESC = 'Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
_TEST = {
_TESTS = [{
'url': 'http://www.1tv.ru/videoarchive/73390',
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
'md5': '777f525feeec4806130f4f764bc18a4f',
'info_dict': {
'id': '73390',
'ext': 'mp4',
'title': 'Олимпийские канатные дороги',
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 149,
'like_count': int,
'dislike_count': int,
},
'skip': 'Only works from Russia',
}
}, {
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
'info_dict': {
'id': '35930',
'ext': 'mp4',
'title': 'Наедине со всеми. Людмила Сенчина',
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'duration': 2694,
},
'skip': 'Only works from Russia',
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, 'Downloading page')
video_url = self._html_search_regex(
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
webpage, 'video URL')
title = self._html_search_regex(
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
description = self._html_search_regex(
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
webpage, 'description', default=None) or self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
duration = self._og_search_property(
'video:duration', webpage,
'video duration', fatal=False)
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
webpage, 'like count', fatal=False)
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', fatal=False)
like_count = self._html_search_regex(
r'title="Понравилось".*?/></label> \[(\d+)\]',
webpage, 'like count', default=None)
dislike_count = self._html_search_regex(
r'title="Не понравилось".*?/></label> \[(\d+)\]',
webpage, 'dislike count', default=None)
return {
'id': video_id,

View File

@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
IE_NAME = '5min'
_VALID_URL = r'''(?x)
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
5min:)
(?P<id>\d+)
'''

View File

@ -0,0 +1,41 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class FootyRoomIE(InfoExtractor):
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
_TEST = {
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
'info_dict': {
'id': 'schalke-04-0-2-real-madrid-2015-02',
'title': 'Schalke 04 0 2 Real Madrid',
},
'playlist_count': 3,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
playlist = self._parse_json(
self._search_regex(
r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
playlist_id)
playlist_title = self._og_search_title(webpage)
entries = []
for video in playlist:
payload = video.get('payload')
if not payload:
continue
playwire_url = self._search_regex(
r'data-config="([^"]+)"', payload,
'playwire url', default=None)
if playwire_url:
entries.append(self.url_result(playwire_url, 'Playwire'))
return self.playlist_result(entries, playlist_id, playlist_title)

View File

@ -1,77 +1,69 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
from ..utils import (
determine_ext,
int_or_none,
)
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
'info_dict': {
'id': '4795174',
'ext': 'mp3',
'title': 'Rendez-vous au pays des geeks',
'alt_title': 'Carnet nomade | 13-14',
'vcodec': 'none',
'uploader': 'Colette Fellous',
'upload_date': '20140301',
'duration': 3601,
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats',
'timestamp': 1393700400,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
baseurl = mobj.group('baseurl')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
params_code = self._search_regex(
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
webpage, 'parameter code')
params = compat_parse_qs(params_code)
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
video_path = self._search_regex(
r'<a id="player".*?href="([^"]+)"', webpage, 'video path')
video_url = compat_urlparse.urljoin(url, video_path)
timestamp = int_or_none(self._search_regex(
r'<a id="player".*?data-date="([0-9]+)"',
webpage, 'upload date', fatal=False))
thumbnail = self._search_regex(
r'<a id="player".*?>\s+<img src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
title = self._html_search_regex(
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
alt_title = self._html_search_regex(
r'<span class="title">(.*?)</span>',
webpage, 'alt_title', fatal=False)
description = self._html_search_regex(
r'<span class="description">(.*?)</span>',
webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
webpage, 'uploader', fatal=False)
thumbnail_part = self._html_search_regex(
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
'thumbnail', fatal=False)
if thumbnail_part is None:
thumbnail = None
else:
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
description = self._html_search_regex(
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
info = json.loads(params['infoData'][0])[0]
duration = info.get('media_length')
upload_date_candidate = info.get('media_section5')
upload_date = (
upload_date_candidate
if (upload_date_candidate is not None and
re.match(r'[0-9]{8}$', upload_date_candidate))
else None)
webpage, 'uploader', default=None)
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
return {
'id': video_id,
'url': video_url,
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
'duration': duration,
'vcodec': vcodec,
'uploader': uploader,
'upload_date': upload_date,
'timestamp': timestamp,
'title': title,
'alt_title': alt_title,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
bitrates.sort()
formats = []
for bitrate in bitrates:
for link in links:
formats.append({
@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
'vbr': bitrate,
})
subtitles = {}
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
subtitles[src_lang] = [{
'ext': src.split('/')[-1],
'url': 'http://www.funnyordie.com%s' % src,
}]
post_json = self._search_regex(
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
post = json.loads(post_json)
@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
'description': post.get('description'),
'thumbnail': post.get('picture'),
'formats': formats,
'subtitles': subtitles,
}

View File

@ -1,41 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
xpath_text,
xpath_with_ns,
)
class GamekingsIE(InfoExtractor):
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
_TEST = {
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
# MD5 is flaky, seems to change regularly
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
'info_dict': {
'id': '20130811',
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
'ext': 'mp4',
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
}
}
'thumbnail': 're:^https?://.*\.jpg$',
},
}, {
# vimeo video
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
'md5': '12bf04dfd238e70058046937657ea68d',
'info_dict': {
'id': 'the-legend-of-zelda-majoras-mask',
'ext': 'mp4',
'title': 'The Legend of Zelda: Majoras Mask',
'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
'thumbnail': 're:^https?://.*\.jpg$',
},
}, {
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
webpage = self._download_webpage(url, name)
video_url = self._og_search_video_url(webpage)
webpage = self._download_webpage(url, video_id)
video = re.search(r'[0-9]+', video_url)
video_id = video.group(0)
playlist_id = self._search_regex(
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
# Todo: add medium format
video_url = video_url.replace(video_id, 'large/' + video_id)
playlist = self._download_xml(
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
video_id)
NS_MAP = {
'jwplayer': 'http://rss.jwpcdn.com/'
}
item = playlist.find('./channel/item')
thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
return {
'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': thumbnail,
}

View File

@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@ -31,7 +33,7 @@ class GameStarIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
og_title = self._og_search_title(webpage)
title = og_title.replace(' - Video bei GameStar.de', '').strip()
title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id

View File

@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class GazetaIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
_TESTS = [{
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
'info_dict': {
'id': '205566',
'ext': 'mp4',
'title': '«7080 процентов гражданских в Донецке на грани голода»',
'description': 'md5:38617526050bd17b234728e7f9620a71',
'thumbnail': 're:^https?://.*\.jpg',
},
}, {
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
embed_url = '%s?p=embed' % mobj.group('url')
embed_page = self._download_webpage(
embed_url, display_id, 'Downloading embed page')
video_id = self._search_regex(
r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
return self.url_result(
'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')

View File

@ -7,10 +7,12 @@ from ..compat import (
compat_urllib_parse,
compat_urllib_request,
)
from ..utils import remove_end
class GDCVaultIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
_NETRC_MACHINE = 'gdcvault'
_TESTS = [
{
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
@ -65,10 +67,12 @@ class GDCVaultIE(InfoExtractor):
def _parse_flv(self, xml_description):
video_formats = []
akami_url = xml_description.find('./metadata/akamaiHost').text
akamai_url = xml_description.find('./metadata/akamaiHost').text
slide_video_path = xml_description.find('./metadata/slideVideo').text
video_formats.append({
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(slide_video_path, '.flv'),
'ext': 'flv',
'format_note': 'slide deck video',
'quality': -2,
'preference': -2,
@ -76,7 +80,9 @@ class GDCVaultIE(InfoExtractor):
})
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
video_formats.append({
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(speaker_video_path, '.flv'),
'ext': 'flv',
'format_note': 'speaker video',
'quality': -1,
'preference': -1,

View File

@ -26,6 +26,7 @@ from ..utils import (
unsmuggle_url,
UnsupportedError,
url_basename,
xpath_text,
)
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
@ -140,6 +141,19 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Ooyala'],
},
# multiple ooyala embeds on SBN network websites
{
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
'info_dict': {
'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
},
'playlist_mincount': 3,
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
},
# google redirect
{
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -460,6 +474,7 @@ class GenericIE(InfoExtractor):
{
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
'info_dict': {
'id': '1986',
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
},
'playlist_mincount': 2,
@ -511,6 +526,109 @@ class GenericIE(InfoExtractor):
'upload_date': '20150126',
},
'add_ie': ['Viddler'],
},
# Libsyn embed
{
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
'info_dict': {
'id': '3377616',
'ext': 'mp3',
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
'description': 'md5:601cb790edd05908957dae8aaa866465',
'upload_date': '20150220',
},
},
# jwplayer YouTube
{
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
'info_dict': {
'id': 'Mrj4DVp2zeA',
'ext': 'mp4',
'upload_date': '20150212',
'uploader': 'The National Archives UK',
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
'uploader_id': 'NationalArchives08',
'title': 'Webinar: Using Discovery, The National Archives online catalogue',
},
},
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
'playlist_mincount': 5,
'info_dict': {
'id': 'aanslagen-kopenhagen',
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
}
},
# Zapiks embed
{
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
'info_dict': {
'id': '118046',
'ext': 'mp4',
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
}
},
# Kaltura embed
{
'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
'info_dict': {
'id': '1_eergr3h1',
'ext': 'mp4',
'upload_date': '20150226',
'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
'timestamp': int,
'title': 'John Carlson Postgame 2/25/15',
},
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
},
# ClipYou (Eagle.Platform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
},
# Pladform embed
{
'url': 'http://muz-tv.ru/kinozal/view/7400/',
'info_dict': {
'id': '100183293',
'ext': 'mp4',
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 694,
'age_limit': 0,
},
},
# RSS feed with enclosure
{
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'info_dict': {
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
'ext': 'm4v',
'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
}
}
]
@ -523,11 +641,24 @@ class GenericIE(InfoExtractor):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
entries = [{
'_type': 'url',
'url': e.find('link').text,
'title': e.find('title').text,
} for e in doc.findall('./channel/item')]
entries = []
for it in doc.findall('./channel/item'):
next_url = xpath_text(it, 'link', fatal=False)
if not next_url:
enclosure_nodes = it.findall('./enclosure')
for e in enclosure_nodes:
next_url = e.attrib.get('url')
if next_url:
break
if not next_url:
continue
entries.append({
'_type': 'url',
'url': next_url,
'title': it.find('title').text,
})
return {
'_type': 'playlist',
@ -756,6 +887,13 @@ class GenericIE(InfoExtractor):
'entries': entries,
}
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
webpage)
if matches:
return _playlist_from_matches(matches, ie='RtlNl')
# Look for embedded (iframe) Vimeo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
@ -763,7 +901,6 @@ class GenericIE(InfoExtractor):
player_url = unescapeHTML(mobj.group('url'))
surl = smuggle_url(player_url, {'Referer': url})
return self.url_result(surl)
# Look for embedded (swf embed) Vimeo player
mobj = re.search(
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
@ -880,12 +1017,34 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for NYTimes player
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for Libsyn player
mobj = re.search(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
if mobj is not None:
return OoyalaIE._build_url_result(mobj.group('ec'))
# Look for multiple Ooyala embeds on SBN network websites
mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
if mobj is not None:
embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
if embeds:
return _playlist_from_matches(
embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala')
# Look for Aparat videos
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
if mobj is not None:
@ -1012,7 +1171,12 @@ class GenericIE(InfoExtractor):
# Look for embedded sbs.com.au player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
r'''(?x)
(?:
<meta\s+property="og:video"\s+content=|
<iframe[^>]+?src=
)
(["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
@ -1042,7 +1206,39 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream')
# Look for Zapiks embed
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
mobj = re.search(
r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage)
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
# Look for Eagle.Platform embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'EaglePlatform')
# Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
# Look for Pladform embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Pladform')
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
@ -1060,7 +1256,8 @@ class GenericIE(InfoExtractor):
JWPlayerOptions|
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
)
.*?file\s*:\s*["\'](.*?)["\']''', webpage))
.*?
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
if not found:
# Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
@ -1095,10 +1292,16 @@ class GenericIE(InfoExtractor):
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
webpage)
if not found:
# Look also in Refresh HTTP header
refresh_header = head_response.headers.get('Refresh')
if refresh_header:
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = found.group(1)
self.report_following_redirect(new_url)
@ -1134,7 +1337,9 @@ class GenericIE(InfoExtractor):
return entries[0]
else:
for num, e in enumerate(entries, start=1):
e['title'] = '%s (%d)' % (e['title'], num)
# 'url' results don't have a title
if e.get('title') is not None:
e['title'] = '%s (%d)' % (e['title'], num)
return {
'_type': 'playlist',
'entries': entries,

View File

@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
_VIDEOID_REGEXES = [
r'\bdata-video-id="(\d+)"',

View File

@ -34,8 +34,6 @@ class GoshgayIE(InfoExtractor):
duration = parse_duration(self._html_search_regex(
r'<span class="duration">\s*-?\s*(.*?)</span>',
webpage, 'duration', fatal=False))
family_friendly = self._html_search_meta(
'isFamilyFriendly', webpage, default='false')
flashvars = compat_parse_qs(self._html_search_regex(
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
@ -49,5 +47,5 @@ class GoshgayIE(InfoExtractor):
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': 0 if family_friendly == 'true' else 18,
'age_limit': self._family_friendly_search(webpage),
}

View File

@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
if webpage is not None:
o = GroovesharkHtmlParser.extract_object_tags(webpage)
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
return (webpage, None)
return webpage, None
def _real_initialize(self):
self.ts = int(time.time() * 1000) # timestamp in millis
@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
swf_referer = None
if self.do_playerpage_request:
(_, player_objs) = self._get_playerpage(url)
if player_objs is not None:
if player_objs:
swf_referer = self._build_swf_referer(url, player_objs[0])
self.to_screen('SWF Referer: %s' % swf_referer)

View File

@ -0,0 +1,31 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class HistoryIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
_TESTS = [{
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
'info_dict': {
'id': 'bLx5Dv5Aka1G',
'ext': 'mp4',
'title': "Bet You Didn't Know: Valentine's Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
},
'add_ie': ['ThePlatform'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
webpage, 'video url')
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))

Some files were not shown because too many files have changed in this diff Show More