Compare commits

..

1570 Commits

Author SHA1 Message Date
3d87426c2d release 2013.03.24 2014-03-24 01:42:14 +01:00
ce328530a9 Merge remote-tracking branch 'origin/master' 2014-03-24 01:42:11 +01:00
f70daac108 [RTS] Add extractor (Fixes #2608) 2014-03-24 01:41:14 +01:00
912b38b428 [instagram] Fix info_dict key name 2014-03-24 01:40:09 +01:00
6e25c58ed7 Merge pull request #2567 from jaimeMF/sphinx-docs
Add initial sphinx docs
2014-03-24 00:50:32 +01:00
51fb2e98d2 [radiofrance] Modernize 2014-03-23 17:43:33 +01:00
38d63d846e [extractor/common] Clarify preference key in formats 2014-03-23 17:41:43 +01:00
07cec9776e release 2014.03.23 2014-03-23 16:06:41 +01:00
ea38e55fff [instagram] Add support for user profiles (Fixes #2606) 2014-03-23 16:06:07 +01:00
257cfebfe6 [test] Move expect_info_dict out of test_download 2014-03-23 15:52:21 +01:00
6eefe53329 [utils] Simplify setproctitle 2014-03-23 14:28:22 +01:00
1986025d2b [xbef] (Add extractor) 2014-03-23 14:04:36 +01:00
c9aa111b4f [worldstarhiphop] Modernize 2014-03-23 13:49:15 +01:00
bfcb6e3917 Merge remote-tracking branch 'fiocfun/xtube-user-extractor' 2014-03-23 13:36:14 +01:00
2c1396073e [metacafe] Remove accidently inserted comment string 2014-03-23 05:16:02 +07:00
401983c6a0 [metacafe] More modernize 2014-03-23 05:13:15 +07:00
391dc3ee07 [metacafe] Replace cbs test 2014-03-23 05:08:11 +07:00
be3b8fa30f [metacafe] Modernize 2014-03-23 05:05:31 +07:00
9f5809b3e8 [xtube] user playlist extractor 2014-03-23 00:16:35 +06:00
0320ddc192 [pornhub] Fix uploader extraction and extract counts 2014-03-22 21:30:22 +07:00
56dd55721c Remove unused imports and clarify variable names 2014-03-22 15:17:32 +01:00
231f76b530 [toypics] Separate user and video extraction (#2601) 2014-03-22 15:15:01 +01:00
55442a7812 Merge remote-tracking branch 'fiocfun/toypics-support' 2014-03-22 14:24:44 +01:00
43b81eb98a [youtube] Remove useless resolution fields from format definitions
These can be - and are - calculated automatically by the YoutubeDL core.
2014-03-22 14:22:41 +01:00
bfd718793c Merge remote-tracking branch 'hurda/patch-1' 2014-03-22 14:21:04 +01:00
a9c2896e22 Make missing test definition fields an error
If the result is not testable (for example, because a description changes often), either pass in a type or a regular expression (a string starting with 're:')
2014-03-22 14:20:07 +01:00
278229d195 itag 160 is 144p, not 192p 2014-03-22 12:15:45 +01:00
fa154d1dbe [videolectures.net] Make description optional 2014-03-22 12:10:56 +01:00
7e2ede9891 [generic] Run TED detection before JW Player detection
Otherwise it overwrittes the `mobj` variable.
2014-03-22 10:20:44 +01:00
74af99fc2f toypics.net support 2014-03-22 04:07:44 +06:00
0f2a2ba14b Merge remote-tracking branch 'dstftw/generic-webpage-unescape'
Conflicts:
	youtube_dl/extractor/generic.py
2014-03-21 22:14:24 +01:00
e24b5a8610 [ooyala] Modernize 2014-03-21 21:55:51 +01:00
750f9020ae [generic] Recognize more Ooyala embedded videos (#2569) 2014-03-21 21:51:33 +01:00
f82863851e Add an extractor for on.aol.com 2014-03-21 19:54:44 +01:00
933a5b3792 Add extractor for Engadget and 5min (closes #2465)
engadget.com uses the generic 5min.com service.
2014-03-21 19:13:46 +01:00
aa488e1385 [xtube] Fix formats extraction 2014-03-21 23:58:40 +07:00
d77650525d release 2014.03.21.5 2014-03-21 14:52:57 +01:00
3e50c29984 release 2014.03.21.4 2014-03-21 14:38:55 +01:00
64e7ad6045 [videolectures] (New extractor) 2014-03-21 14:38:41 +01:00
23f4a93bb4 [daum] Modernize 2014-03-21 14:38:41 +01:00
6f13b055f1 [cspan] Fix typo in a comment 2014-03-21 08:01:20 +01:00
1f91bd15c3 release 2014.03.21.3 2014-03-21 02:10:35 +01:00
11a15be4ce [cspan] Add support for newer videos (Fixes #2577) 2014-03-21 02:10:24 +01:00
14e17e18cb release 2014.03.21.2 2014-03-21 01:42:45 +01:00
1b124d1942 [parliamentliveuk] Add extractor 2014-03-21 01:42:28 +01:00
747373d4ae release 2014.03.21.1 2014-03-21 01:00:27 +01:00
18d367c0a5 Remove legacy InfoExtractors file 2014-03-21 01:00:06 +01:00
a1a530b067 [pbs] Add support for video ratings 2014-03-21 00:59:51 +01:00
cb9722cb3f [viki] Modernize 2014-03-21 00:53:18 +01:00
773c0b4bb8 [pbs] Add support for widget URLs (Fixes #2594) 2014-03-21 00:46:32 +01:00
23c322a531 release 2014.03.21 2014-03-21 00:37:23 +01:00
7e8c0af004 Add --prefer-insecure option (Fixes #2364) 2014-03-21 00:37:10 +01:00
d2983ccb25 [ninegag] Modernize and remove unused import 2014-03-21 00:37:10 +01:00
f24e9833dc [youporn] Modernize 2014-03-21 00:37:10 +01:00
bc2bdf5709 [kontrtube] Modernize 2014-03-20 23:05:57 +07:00
627a209f74 release 2014.03.20 2014-03-20 16:35:54 +01:00
1a4895453a [YoutubeDL] Improve error message 2014-03-20 16:33:46 +01:00
aab74fa106 [ted] Simplify embed code (#2587) 2014-03-20 16:33:23 +01:00
2bd9efd4c2 Merge remote-tracking branch 'anovicecodemonkey/TEDIEimprovements' 2014-03-20 16:24:34 +01:00
39a743fb9b [arte] Modernize tests and fix _VALID_REGEX 2014-03-20 09:14:43 +01:00
4966a0b22d [arte] Add extractor for concert.arte.tv (closes #2588) 2014-03-20 09:11:47 +01:00
fc26023120 [TEDIE] Add support for embeded TED video URLs 2014-03-20 01:04:21 +10:30
8d7c0cca13 [generic] Add support for embeded TED videos 2014-03-20 00:56:32 +10:30
f66ede4328 [arte.tv:+7] Fix _VALID_URL 2014-03-19 21:23:55 +07:00
cc88b90ec8 [desvscripts/release] Bump the number of password tries to accomodate stubby-fingered @phihag 2014-03-18 15:02:37 +01:00
b6c5fa9a0b release 2014.03.18.1 2014-03-18 14:42:59 +01:00
dff10eaa77 release 2014.03.18 2014-03-18 14:31:03 +01:00
4e6f9aeca1 Fix typo 2014-03-18 14:28:53 +01:00
e68301af21 Fix getpass on Windows (Fixes #2547) 2014-03-18 14:27:42 +01:00
17286a96f2 [iprima] Fix permission check regex 2014-03-18 19:33:28 +07:00
0892363e6d Merge pull request #2580 from ericpardee/patch-1
Update to comedycentral.py (cc.com)
2014-03-18 08:14:39 +01:00
f102372b5f Update to comedycentral.py (cc.com)
Added cc.com as it's same as comedycentral.com and used, i.e. http://www.cc.com/video-clips/fmyq0m/broad-city-a-beautiful-railroad-style-apartment
2014-03-17 18:01:26 -07:00
ecbe1ad207 [generic] Fix access to removed function in python 3.4
The `Request.get_origin_req_host` method was deprecated in 3.3, use the
 `origin_req_host` property if it's not available, see http://docs.python.org/3.3/library/urllib.request.html#urllib.request.Request.get_origin_req_host.
2014-03-17 21:59:21 +01:00
9d840c43b5 release 2014.03.17 2014-03-17 14:49:02 +01:00
6f50f63382 Merge remote-tracking branch 'origin/wheels' 2014-03-17 14:31:22 +01:00
ff14fc4964 [test] Rename get_testcases to gettestcases
Apparently, newer versions of nosetests are somewhat over-eager in their test discovery.
2014-03-17 14:30:13 +01:00
e125c21531 [vesti] Restore vesti extractor 2014-03-17 02:01:01 +07:00
93d020dd65 [generic] Add support for embedded rutv player 2014-03-17 02:00:31 +07:00
a7515ec265 [rutv] Refactor vgtrk/rutv extractor 2014-03-17 01:59:40 +07:00
b6c1ceccc2 [ted] Add 'http://' to the thumbnail url if it's missing 2014-03-16 11:24:11 +01:00
4056ad8f36 Build and upload universal wheels to pypi 2014-03-16 10:22:41 +01:00
6563837ee1 [udemy] Make sure test case is not inherited 2014-03-16 07:09:10 +01:00
fd5e6f7ef2 [vevo] Mark all test timestamps as approximate 2014-03-16 07:05:48 +01:00
685052fc7b Add initial sphinx docs
With an initial guide for using youtube_dl from python programs.
2014-03-15 19:08:09 +01:00
15fd51b37c [generic] More generic support for embedded vimeo player (#1602) 2014-03-16 00:47:04 +07:00
d95e35d659 [generic] Add nowvideo test hidden behind percent encoding 2014-03-15 04:39:53 +07:00
1439073049 [generic] Add comment for unescaping webpage contents 2014-03-15 04:38:49 +07:00
1f7659dbe9 [generic] Unescape webpage contents 2014-03-15 04:21:17 +07:00
f1cef7a9ff [iprima] Skip test 2014-03-15 01:39:42 +07:00
8264223511 [iprima] Add access permission check 2014-03-15 01:38:44 +07:00
bc6d597828 Add bestvideo and worstvideo to special format names (#2163) 2014-03-14 17:01:47 +01:00
aba77bbfc2 [vevo] Adapt test to constantly changing timestamp 2014-03-13 18:45:14 +01:00
955c451456 Rename upload_timestamp to timestamp 2014-03-13 18:45:14 +01:00
e5de3f6c89 [udemy] Initial support for free courses (#1617) 2014-03-14 00:36:39 +07:00
2a1db721d4 [test_download] Move assertions before debugging output 2014-03-13 17:05:51 +01:00
1e0eb60f1a [videobam] Fix empty title handling 2014-03-13 17:03:43 +01:00
87a29e6f25 [wdr] Add description to tests 2014-03-13 17:01:58 +01:00
c3d36f134f [googlesearch] Fix next page indicator check 2014-03-13 16:52:13 +01:00
84769e708c [ninegag] Fix extraction 2014-03-13 16:40:53 +01:00
9d2ecdbc71 [vevo] Centralize timestamp handling 2014-03-13 15:30:25 +01:00
9b69af5342 Merge remote-tracking branch 'soult/br' 2014-03-13 14:35:34 +01:00
c21215b421 [br] Allow '/' in URL, allow empty author + broadcastDate fields
* Allow URLs that have a 'subdirectory' before the actual program name, e.g.
  'xyz/xyz-episode-1'.
* The author and broadcastDate fields in the XML file may be empty.
* Add test case for the two problems above.
2014-03-13 14:08:34 +01:00
cddcfd90b4 [funnyordie] Correct JSON interpretation 2014-03-13 00:53:19 +01:00
f36aacba0f [collegehumor] Fix one more test 2014-03-13 06:25:12 +07:00
355271fb61 [collegehumor] Extract like count 2014-03-13 06:12:39 +07:00
2a5b502364 [collegehumor] Fix test 2014-03-13 06:09:21 +07:00
98ff9d82d4 release 2014.03.12 2014-03-12 14:50:14 +01:00
b1ff87224c [vimeo] Now VimeoIE doesn't match urls of channels with a numeric id (fixes #2552) 2014-03-12 14:23:06 +01:00
b461641fb9 [wdr] Add support for WDR sites (Closes #1367) 2014-03-12 04:20:47 +07:00
b047de6f6e Add format to unified_strdate 2014-03-12 04:18:43 +07:00
34ca5d9ba0 release 2014.03.11 2014-03-11 16:51:50 +01:00
60cc4dc4b4 [generic/funnyordie] Add support for funnyordie embeds (Fixes #2546) 2014-03-11 16:51:36 +01:00
db95dc13a1 [playvid] Simplify (#2539) 2014-03-10 20:55:47 +01:00
777ac90791 Merge remote-tracking branch 'MikeCol/playvid_extract' 2014-03-10 20:45:45 +01:00
04f9bebbcb Merge remote-tracking branch 'jaimeMF/remove_global_opener' 2014-03-10 20:42:54 +01:00
4ea3137e41 Playvid extractor 2014-03-10 20:16:49 +01:00
a0792b738e Don't install the global url opener
All the code uses now the urlopen method of YoutubeDL
2014-03-10 19:04:51 +01:00
19a41fc613 Don't set the global socket timeout
Use the timeout argument of the `OpenerDirector.open` method instead
2014-03-10 19:03:37 +01:00
3ee52157fb [vgtrk] Rename vesti extractor 2014-03-11 00:58:05 +07:00
c4d197ee2d [vesti] Fix _VALID_URL regex 2014-03-11 00:49:41 +07:00
a33932cfe3 [vevo] Correct test value
The date is now interpreted as UTC for consistency.
2014-03-10 17:56:54 +01:00
bcf89ce62c [generic] Suppress warning about doctypes in RSS parser 2014-03-10 17:31:32 +01:00
e3899d0e00 Merge branch 'master' of github.com:rg3/youtube-dl 2014-03-10 16:42:22 +01:00
dcb00da49c [depositfiles] Remove extractor
This site requires a CAPTCHA to download, supports arbitrary files and not only audio/video, and I can't find a single uncopyrighted video with a quick google search.
Closes #1255
2014-03-10 16:41:08 +01:00
aa51d20d19 [vesti] Skip geo restricted test 2014-03-10 22:31:22 +07:00
ae7ed92057 [youtube] Fix up invalid JSON 2014-03-10 13:35:45 +01:00
e45b31d9bd [vevo] Interpret date as UTC instead of local time 2014-03-10 13:12:57 +01:00
5a25f39653 Correct extractor documentation 2014-03-10 13:09:55 +01:00
963d7ec412 release 2014.03.10 2014-03-10 13:04:20 +01:00
e712d94adf Merge branch 'master' of github.com:rg3/youtube-dl 2014-03-10 13:03:52 +01:00
6a72423955 [generic] Use a different URL for the generic RSS test (Closes #2532) 2014-03-10 13:03:39 +01:00
4126826b10 [photobucket] More unicode literals 2014-03-10 12:59:19 +01:00
b773ead7fd [vesti] Add support for more sites (Closes #2534) 2014-03-10 18:52:00 +07:00
855e2750bc Credit @mharrys for aftonbladet 2014-03-10 10:30:17 +01:00
805ef3c60b Correct automatic resolution determination 2014-03-10 10:29:25 +01:00
fbc2dcb40b [aftonbladet] Modernize 2014-03-10 10:28:56 +01:00
5375d7ad84 Merge remote-tracking branch 'mharrys/aftonbladet' 2014-03-10 10:23:45 +01:00
90f3476180 [photobucket] Modernize and remove the old extraction code 2014-03-09 19:36:46 +01:00
ee95c09333 [pornhub] Use compat_urllib_parse.unquote_plus (#2531) 2014-03-09 19:16:25 +01:00
75d06db9fc Merge branch 'pornhub_unquote_password' of github.com:MikeCol/youtube-dl 2014-03-09 19:15:33 +01:00
439a1fffcb [myvideo] Modernize 2014-03-09 18:58:34 +01:00
9d9d70c462 [facebook] Modernize 2014-03-09 18:42:44 +01:00
b4a186b7be [jukebox] Modernize and add a test 2014-03-09 18:33:17 +01:00
bdebf51c8f [xnxx] Modernize 2014-03-09 18:31:39 +01:00
264b86f9b4 Unquote password 2014-03-09 18:26:18 +01:00
9e55e37a2e Merge remote-tracking branch 'origin/master' 2014-03-09 18:08:16 +01:00
1471956573 Add a basic test suite for the InfoExtractor class 2014-03-09 17:05:29 +01:00
27865b2169 [aftonbladet] add extractor for aftonbladet.se 2014-03-09 16:59:18 +01:00
6d07ce0162 YoutubeDL: If the logger is set call its warning method in report_warning 2014-03-09 15:16:54 +01:00
edb7fc5435 [videodetective] Modernize 2014-03-09 18:39:39 +07:00
31f77343f2 [vube] Update the test's checksum 2014-03-09 12:27:38 +01:00
63ad031583 [soundcloud] Add the description field to the second test 2014-03-09 12:26:58 +01:00
957688cee6 [ustream:channel] Update test's number of entries 2014-03-09 12:03:49 +01:00
806d6c2e8c [gamekings] Modernize and update the test's description field 2014-03-09 11:57:30 +01:00
0ef68e04d9 [mtv] Transform the urls from the mobile version to get the best quality
And don't report a warning, just log a message, it allows to pass the test from Europe.
2014-03-08 22:09:42 +01:00
a496524db2 [collegehumor] Replace youtube test 2014-03-09 03:21:26 +07:00
935c7360cc [spike] Add support for mobile urls 2014-03-08 21:10:21 +01:00
340b046876 [spike] Add support for downloading the mobile version if the normal version is geoblocked 2014-03-08 20:59:11 +01:00
cc1db7f9b7 [mtv] Improve detection of geoblocked videos 2014-03-08 19:46:34 +01:00
a4ff6c4762 [arte] Raise a proper error when no video is found 2014-03-08 16:04:03 +01:00
1060425cbb [vimeo] Add a better error message for embed-only videos (#2527) 2014-03-08 12:25:09 +01:00
e9c092f125 YoutubeDL: Use its urlopen method for downloading the thumbnail. 2014-03-07 16:43:34 +01:00
22ff5d2105 [http] Use the YoutubeDL.urlopen method 2014-03-07 16:41:42 +01:00
136db7881b [lynda] Modernize 2014-03-07 22:11:01 +07:00
dae313e725 release 2014.03.07.1 2014-03-07 15:59:10 +01:00
b74fa8cd2c [facebook] Fix login process
It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.
2014-03-07 15:25:33 +01:00
94eae04c94 release 2014.03.07 2014-03-07 06:41:48 +01:00
16ff7ebc77 [lynda] Fix successful login regex and fix formats extraction (Closes #2520) 2014-03-07 06:56:48 +07:00
c361c505b0 release 2014.03.06 2014-03-06 23:57:00 +01:00
d37c07c575 [vesti] Fix extraction and support more link formats (Closes #2517) 2014-03-07 02:27:39 +07:00
9d6105c9f0 Do not resume live streams
No resuming or seeking in live streams is possible (c) man rtmpdump
2014-03-05 22:46:20 +07:00
8dec03ecba Use unicode literals 2014-03-05 22:24:07 +07:00
826547870b Report no connect as error 2014-03-05 22:21:19 +07:00
52d6a9a61d Handle rtmpdump's no connection return value 2014-03-05 22:19:27 +07:00
ad242b5fbc Remove superfluous whitespace 2014-03-05 22:16:50 +07:00
3524175625 Use meaningful return value constants for rtmpdump 2014-03-05 22:12:02 +07:00
7b9965ea93 [ted] Remove unused import and modernize test 2014-03-05 14:27:45 +01:00
0a5bce566f [generic] Add all test attributes for embedly (#2447)
In the future, we may want to not only print something, but throw an error for untested properties.
2014-03-05 14:05:50 +01:00
8012bd2424 [generic] Get a better ID 2014-03-05 14:02:14 +01:00
f55a1f0a88 Merge remote-tracking branch 'rzhxeo/embedly'
Conflicts:
	youtube_dl/extractor/generic.py
2014-03-05 14:01:53 +01:00
bacac173a9 [ted] Style fixes 2014-03-05 13:27:26 +01:00
ca1fee34f2 [ted] Fix playlist extraction and add a test 2014-03-05 13:22:10 +01:00
6dadaa9930 [prosiebensat1] Replace test 2014-03-05 15:10:49 +07:00
553f6e4633 [dailymotion] Convert width and height fields from strings to integers 2014-03-04 22:24:38 +01:00
652bee05f0 [ted] Fix video extraction
The site has been redesigned
2014-03-04 21:47:01 +01:00
d63516e9cd release 2014.03.04.2 2014-03-04 20:56:31 +01:00
e477dcf649 [vesti] Fix width and height 2014-03-04 21:40:35 +07:00
9d3f7781f3 [soundcloud:set] Fix _VALID_URL regex (Closes #2509) 2014-03-04 21:29:14 +07:00
c7095dada3 [tvigle] Add support for another video link format 2014-03-04 19:22:48 +07:00
607dbbad76 [xtube] Fix extraction add more metafields 2014-03-04 16:12:11 +07:00
17b75c0de1 Document width, height, and resolution (#1445) 2014-03-04 03:49:33 +01:00
ab24f4f3be [facebook] Use consistent quotes 2014-03-04 03:49:12 +01:00
e1a52d9e10 release 2014.03.04.1 2014-03-04 03:40:00 +01:00
d0ff838433 [facebook] Correct regexp 2014-03-04 03:39:45 +01:00
b37b94501c [facebook] Fix login detection (#2505) 2014-03-04 03:39:04 +01:00
cb3bb2cfef [facebook] Modernize 2014-03-04 03:36:54 +01:00
e2cc7983e9 release 2014.03.04 2014-03-04 03:32:54 +01:00
c9ae7b9565 [youtube] Add support for search result URLs (Fixes #2495) 2014-03-04 03:32:28 +01:00
86fb4347f7 release 2014.03.03 2014-03-03 13:51:25 +01:00
2fcec131f5 Credit @juancri for canal13cl (#2498) 2014-03-03 12:54:01 +01:00
9f62eaf4ef [canal13cl] Add test and improve extraction (#2498) 2014-03-03 12:53:11 +01:00
f92259c026 Merge remote-tracking branch 'origin/master' 2014-03-03 12:34:34 +01:00
0afef30b23 Add display_id field 2014-03-03 12:06:28 +01:00
dcdfd1c711 Merge remote-tracking branch 'origin/master' 2014-03-03 12:05:59 +01:00
2acc1f8f50 [orf] Fix segments extraction (Closes #2501) 2014-03-03 18:05:46 +07:00
2c39b0c695 [tinypic] Fix import 2014-03-03 17:40:12 +07:00
e77c5b4f63 [4tube] Fix import 2014-03-03 17:39:49 +07:00
409a16cb72 Allowing URLs for 13.cl without the /programas prefix 2014-03-02 23:41:13 -03:00
94d5e90b4f FIX: Typo in the extractor's name 2014-03-02 23:40:35 -03:00
2d73b45805 Adding support for 13.cl 2014-03-02 23:15:12 -03:00
271a2dbfa2 [tvigle] Add age limit 2014-03-02 22:07:18 +07:00
bf4adcac66 [tvigle] Fix like count 2014-03-02 20:56:36 +07:00
fb8b8fdd62 [tvigle] Add support for tvigle.ru 2014-03-02 19:59:34 +07:00
5a0b26252e [ceskatelevize] Simplify 2014-03-01 23:05:33 +07:00
7d78f0cc48 [ceskatelevize] Fix video availability check and add geo unrestricted test 2014-03-01 22:54:37 +07:00
f00fc78674 Merge branch '_ceskatelevize' of https://github.com/pulpe/youtube-dl into pulpe-_ceskatelevize 2014-03-01 22:26:18 +07:00
392017874c [CeskaTelevize] raise ExtractorError if you are outside of CR 2014-03-01 16:17:29 +01:00
c3cb92d1ab [CeskaTelevize] fix python3 support @dstftw 2014-03-01 16:02:51 +01:00
aa5590fa07 skip test 2014-03-01 12:34:01 +01:00
8cfb5bbf92 [CeskaTelevize] Add initial support for ceskatelevize.cz 2014-03-01 11:47:52 +01:00
69bb54ebf9 [mailru] Add support for mail.ru video 2014-03-01 16:34:38 +07:00
ca97a56e4b [vk] Add support for embedded videos (Closes #2473) 2014-02-28 23:51:54 +07:00
fc26f3b4c2 [lifenews] Add support for multiple videos on the same page (#2482) 2014-02-28 22:52:06 +07:00
f604c93c64 [gdcvault] Formatting / Remove unused variables 2014-02-28 15:50:19 +01:00
dc3727b65c Credit @mnem dor GDCVault 2014-02-28 15:14:25 +01:00
aba3231de1 Merge remote-tracking branch 'mnem/gdc-vault' 2014-02-28 12:52:11 +01:00
9193bab91d release 2014.02.28 2014-02-28 12:31:37 +01:00
fbcf3e416d Merge pull request #2463 from rzhxeo/resume
Set resume_len to 0 if download is restarted
2014-02-28 12:30:34 +01:00
c0e5d85631 [vimeo] Improve thumbnail extraction 2014-02-28 18:00:12 +07:00
ca7fa3dcb3 [vimeo] Fix thumbs extraction (Closes #2480) 2014-02-28 17:43:54 +07:00
4ccfba28d9 [collegehumor] Fix test's uploader field 2014-02-27 19:10:30 +01:00
abb82f1ddc [mixcloud] Unquote the track id (#2462) 2014-02-27 18:58:09 +01:00
cda008cff1 release 2014.02.27.1 2014-02-27 16:09:58 +01:00
1877a14049 [lifenews] Switch to non-mobile webpage version (Fixes #2476) 2014-02-27 21:45:34 +07:00
546582ec3e Removing MD5 check for ethereal file. 2014-02-27 14:28:55 +00:00
4534485586 Fix test, remove unused, tidy quotes and brackets 2014-02-27 12:50:48 +00:00
a9ab8855e4 [prosiebensat1] Fix typo 2014-02-27 17:53:09 +07:00
8a44ef6868 [prosiebensat1] Add rtmpe support 2014-02-27 17:52:52 +07:00
0c7214c404 [prosiebensat1] Add support for ProSiebenSat.1 Digital sites (Closes
#2346 #2469)
2014-02-27 17:44:29 +07:00
4cf9654693 Add one more format to unified_strdate 2014-02-27 17:44:05 +07:00
50a138d95c Add support for authenticated videos 2014-02-27 10:32:31 +00:00
1b86cc41cf Add support for embed.ly 2014-02-27 08:14:28 +01:00
91346358b0 release 2014.02.27 2014-02-27 07:22:34 +01:00
f3783d4b77 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-27 07:22:22 +01:00
89ef304bed [generic] Add support for <meta redirect>
Fixes #413
2014-02-27 07:22:02 +01:00
83cebb8b7a Add support for FLV videos with speaker decks 2014-02-27 00:20:34 +00:00
9e68f9fdf1 Extractor for non-password protected GDC Vault videos 2014-02-26 22:33:33 +00:00
2acea5c03d [mit] Fix MITIE test 2014-02-26 18:09:43 +07:00
978177527e [rtlnow] Remove unused import 2014-02-26 18:02:17 +07:00
2648c436f3 Merge pull request #2464 from rzhxeo/xhamster
[XHamsterIE] Make hd video search more robust
2014-02-26 02:53:54 -08:00
33f1f2c455 [rtlnow] Fix duration extraction 2014-02-26 17:49:49 +07:00
995befe0e9 [rtlnow] Replace n-tvnow.de test 2014-02-26 17:43:56 +07:00
1bb92aff55 [rtlnow] Modernize and add f4m support 2014-02-26 17:36:16 +07:00
b8e1471d3a [XHamsterIE] Make hd video search more robust 2014-02-26 10:01:44 +01:00
60daf7f0bb Set resume_len to 0 if download is restarted 2014-02-26 02:47:27 +01:00
a83a3139d1 [mit] Add import 2014-02-26 00:41:13 +01:00
fdb7ca3b8d release 2014.02.26 2014-02-26 00:32:22 +01:00
0d7caf5cdf Merge remote-tracking branch 'ruuk/master' 2014-02-26 00:31:08 +01:00
a339d7ba91 Credit @amlweems for ocw.mit (#2460) 2014-02-26 00:30:47 +01:00
7216de55d6 [mit] Fix ocw tests 2014-02-26 00:29:45 +01:00
2437fbca64 [tests] Raise an exception if test definition is invalid (Found in #2460) 2014-02-26 00:12:02 +01:00
7d75d06b78 Merge branch 'ocw-mit-edu' of https://github.com/amlweems/youtube-dl 2014-02-26 00:09:42 +01:00
13ef5648c4 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-26 00:07:45 +01:00
5b2478e2ba [mit] Modernize 2014-02-26 00:06:31 +01:00
8b286571c3 [mixcloud] Fix _VALID_RE (fixes #2462)
Accept any character except `/` for uploader and the name, caused problems with non ASCII characters
2014-02-26 00:04:03 +01:00
f3ac523794 Merge pull request #2461 from niebles/master
Update __init__.py

`io` wasn't imported.
2014-02-26 00:00:57 +01:00
020cf5ebfd [nbc] Add an extractor for the main nbc.com site
Some of the videos are encrypted, the f4m downloader doesn’t support them.
2014-02-25 23:57:54 +01:00
54ab193970 Extract thumbnail with _og_search_thumbnail 2014-02-25 14:41:36 -08:00
8f563f32ab Update __init__.py 2014-02-25 17:31:16 -05:00
151bae3566 Add support for ocw.mit.edu video lectures 2014-02-25 14:44:34 -06:00
76df418cba Add thumbnail for metacafe 2014-02-25 12:04:44 -08:00
d0a72674c6 [crunchyroll] Use enumerate 2014-02-25 20:51:51 +01:00
1d430674c7 [crunchyroll] Handle error message 2014-02-25 20:30:17 +07:00
70cb73922b [crunchyroll] Fix subtitle lang code extraction 2014-02-25 20:29:53 +07:00
344400951c [crunchyroll] Tidy and modernize 2014-02-25 20:29:53 +07:00
ea5a0be811 Skip youtube toptracks test
All the playlists return 500 errors.
2014-02-25 14:11:01 +01:00
3c7fd0bdb2 release 2014.02.25.1 2014-02-25 11:15:55 +01:00
6cadf8c858 [vevo] Add age_limit support 2014-02-25 11:15:34 +01:00
27579b9e4c [vevo] Add suppot for v3 SMIL URLs (Fixes #2409) 2014-02-25 11:06:47 +01:00
4d756a9cc0 [testurl] Fix case when only one IE matches 2014-02-25 10:43:34 +01:00
3e668e05be Merge pull request #2456 from AGSPhoenix/master
[YT] Fix incorrect format code descriptions
2014-02-25 10:24:02 +01:00
60d3a2e0f8 Fix incorrect format codes
Corrects the descriptions for the DASH video format codes 264 and 138
(1440p and 2160p, respectively).
2014-02-24 21:29:37 -05:00
cc3a3b6b47 release 2014.02.25 2014-02-25 01:45:10 +01:00
eda1d49a62 Merge remote-tracking branch 'origin/master' 2014-02-25 01:45:00 +01:00
62e609ab77 Ignore BOM in batch files (Fixes #2450) 2014-02-25 01:43:17 +01:00
2bfe4ead4b [veoh] Allow to download videos with age protection (fixes #2455) 2014-02-24 22:01:34 +01:00
b1c6c32f78 [generic] Add support for nowvideo embedded videos 2014-02-24 23:37:42 +07:00
f6acbdecf4 [podomatic] Use unicode_literals 2014-02-24 17:31:09 +01:00
f1c9dfcc01 [nowvideo] Rewrite based on novamov extractor 2014-02-24 23:30:58 +07:00
ce78943ae1 [novamov] Generalize extractor 2014-02-24 23:30:09 +07:00
d6f0d86649 [novamov] Improve _VALID_URL 2014-02-24 22:01:19 +07:00
5bb67dbfea [cinemassacre] Modernize 2014-02-24 14:44:29 +01:00
47610c4d3e [cinemassacre] Fix extraction
Now we download over http, we don't need rtmpdump.
2014-02-24 14:35:26 +01:00
b732f3581f [academicearth] Remove debug print 2014-02-24 14:20:17 +01:00
9e57ce716f [academicearth] Fix extraction
The courses seems to be no longer available, changed the test to a playlist.
2014-02-24 14:18:12 +01:00
cd7ee7aa44 [nbc] Modernize 2014-02-24 14:00:31 +01:00
3cfe791473 [iprima] Add missing ) 2014-02-24 13:50:53 +01:00
973f2532f5 [iprima] Add support for -WEB URLs (Closes #2449) 2014-02-24 10:12:36 +01:00
bc3be21d59 [iprima] Clean up a little bit 2014-02-24 09:53:48 +01:00
0bf5cf9886 release 2014.02.24 2014-02-24 09:44:22 +01:00
919052d094 [zdf] Fix podcast extraction and use unicode literals (Closes #2446) 2014-02-24 13:47:47 +07:00
a2dafe2887 [youtube] Fix mix video regex
Attributes' order in <li> is arbitrary and changes every time playlist
page is fetched, so we can't rely on `data-index` to be before
`data-video-username`.
2014-02-24 12:52:02 +07:00
92661c994b [normalboots] Modernize and simplify 2014-02-23 18:28:22 +01:00
ffe8fe356a [normalboots] Fix video url extraction 2014-02-23 18:06:51 +01:00
bc2f773b4f [youtube:playlist] Fix mixes extraction (fixes #2444) 2014-02-23 17:17:36 +01:00
f919201ecc [vine] Extract more metadata and support low format 2014-02-23 19:02:31 +07:00
7ff5d5c2e2 Add one more format to unified_strdate 2014-02-23 19:00:51 +07:00
9b77f951c7 [breakcom] Fix error when calling _search_regex
I passed `’webpage’` instead of the variable `webpage`.
2014-02-23 12:28:44 +01:00
a25f2f990a [breakcom] Fix info json extraction 2014-02-23 12:20:58 +01:00
78b373975d [vine] Fix uploader extraction 2014-02-23 12:08:30 +01:00
2fcc873c4c release 2014.02.22.1 2014-02-22 23:17:56 +01:00
23c2baadb3 [videobam] Set age_limit to 18
From [their ToS](http://videobam.com/terms): "User must be eighteen 18[sic] years of age or older to use or access this web site."
2014-02-22 23:15:41 +01:00
521ee82334 Fix imports 2014-02-22 23:03:12 +01:00
1df96e59ce [f4m] Clean up 2014-02-22 23:03:00 +01:00
3e123c1e28 [videobam] Add support for videobam.com (Closes #2411) 2014-02-23 04:50:05 +07:00
f38da66731 Credit @soult for br 2014-02-22 20:19:41 +01:00
06aabfc422 [br] Simplify 2014-02-22 20:17:26 +01:00
1052d2bfec Merge remote-tracking branch 'soult/br' 2014-02-22 17:14:47 +01:00
5e0b652344 release 2014.02.22 2014-02-22 15:07:25 +01:00
0f8f097183 [release.sh] Do not run tests by default
We are at the point that testing takes waay too long for a release cycle, and fails way too often.
Tests through travis are a better indicator than testing just before release.
2014-02-22 15:06:07 +01:00
491ed3dda2 [trutube] Support multiple formats (#2433) 2014-02-22 15:05:30 +01:00
af284c6d1b Merge remote-tracking branch 'JohnyMoSwag/master' 2014-02-22 14:38:42 +01:00
41d3ec5fba [savefrom] Add extractor (Fixes #2434) 2014-02-22 14:36:16 +01:00
0568c352f3 [canalc2] Modernize 2014-02-22 14:27:09 +01:00
2e7b4cb714 [spankwire] Fix uploader id regex 2014-02-22 16:50:08 +07:00
9767726b66 [spankwire] Improve and modernize 2014-02-22 16:45:03 +07:00
9ddfd84e41 added trutubeIE 2014-02-22 00:11:57 -08:00
1cf563d84b release 2014.02.21.1 2014-02-21 18:19:48 +01:00
7928024f57 [BR] Add basic test 2014-02-21 18:00:05 +01:00
3eb38acb43 [BR] Add "BR" extractor
Extractor for videos from the Bayerischer Rundfunk Mediathek[1]. Currently only
supports videos. Audio and podcasts do not work yet with this extractor.

1: http://br.de/mediathek
2014-02-21 17:58:52 +01:00
f7300c5c90 [generic] Fix on python 2.6
`ParseError` is not available, it raises `xml.parsers.expat.ExpatError`.
The webpage needs to be encoded.
2014-02-21 16:59:10 +01:00
3489b7d26c [youtube] Simplify the decryption process for the manifest urls and add a test (closes #2422) 2014-02-21 15:15:58 +01:00
acd2bcc384 Merge branch 'youtube-dash' of github.com:m0vie/youtube-dl 2014-02-21 15:02:47 +01:00
43e77ca455 release 2014.02.21 2014-02-21 12:16:03 +01:00
da36297988 [wimp] Modernize and replace test 2014-02-21 17:57:19 +07:00
dbb94fb044 [youtube] Fix playlist extraction (Closes #2423, #2424, #2425) 2014-02-21 17:19:55 +07:00
d68f0cdb23 [youtube] decrypt signature when downloading dash manifest 2014-02-21 03:24:56 +01:00
eae16eb67b release 2014.02.20 2014-02-20 13:14:21 +01:00
4fc946b546 [generic] Add support for RSS feeds (Fixes #667) 2014-02-20 13:14:09 +01:00
280bc5dad6 [bbccouk] Add friendly contry filter error message (#2184) 2014-02-20 18:50:34 +07:00
f43770d8c9 Merge pull request #2413 from bentley/optypo
Fix minor typo: “to to” → “to”.
2014-02-20 08:02:54 +01:00
98c4b8fa1b Fix minor typo: “to to” → “to”. 2014-02-19 20:02:29 -07:00
ccb079ee67 [xhamster] Fix and improve 2014-02-20 02:37:44 +07:00
2ea237472c Merge pull request #2408 from pulpe/_readme
[README.md] correct the test command
2014-02-19 16:45:14 +01:00
0d4b4865cc [README.md] correct the test command 2014-02-19 16:13:45 +01:00
fe52f9f956 Document prefered config location (#2407) 2014-02-19 11:35:35 +01:00
882907a818 release 2014.02.19.1 2014-02-19 01:27:22 +01:00
572a89cc4e [liveleak] Add support for prochan embeds (Fixes #2406) 2014-02-19 01:27:12 +01:00
c377110539 release 2014.02.19 2014-02-19 01:08:16 +01:00
a9c7198a0b [testurl] Add extractor
This is a pseudo extractor that can be used to quickly look up test URLs, or test without the test harness.
2014-02-19 01:06:16 +01:00
f6f01ea17b [space] modernize 2014-02-19 01:04:24 +01:00
f2d0fc6823 [bbccouk] Replace test
This older episode is from 1994 and hopefully won't get deleted.
2014-02-19 06:46:14 +07:00
f7000f3a1b [youtube] Add support for yourepeat.com URLs (Closes #2397) 2014-02-19 02:00:54 +07:00
c7f0177fa7 [bbccouk] Skip test 2014-02-18 00:26:12 +07:00
09c4d50944 Fix indenting in README 2014-02-17 14:58:39 +01:00
2eb5d315d4 [youtube] Match more truncated URLs (Closes #2402) 2014-02-17 14:56:21 +01:00
ad5976b4d9 [vimeo] Modernize test definition 2014-02-17 11:44:24 +01:00
a0dfcdce5e release 2014.02.17 2014-02-17 11:33:13 +01:00
96d1637082 Credit @Nikerabbit for helsinki 2014-02-17 11:33:01 +01:00
960f317171 [helsinki] Simplify 2014-02-17 11:32:30 +01:00
4412ca751d Merge remote-tracking branch 'Nikerabbit/hki' 2014-02-17 11:26:09 +01:00
cbffec0c95 Credit @patheticpat for 4tube.com (#2398) 2014-02-17 09:08:38 +07:00
0cea52cc18 Credit @pulpe for play.iprima.cz and stream.cz 2014-02-17 09:07:36 +07:00
6d784e87f4 Credit @prutz1311 for normalboots.com (#2279) 2014-02-17 09:03:28 +07:00
ae6cae78f1 [4tube] Minor changes and extract more metadata 2014-02-17 03:51:03 +07:00
0f99566c01 Add one more format in unified_strdate 2014-02-17 03:47:03 +07:00
2db806b4aa Improve parse_duration 2014-02-17 03:46:26 +07:00
3f32c0ba4c Merge branch '4tube' of https://github.com/patheticpat/youtube-dl into patheticpat-4tube 2014-02-17 02:21:45 +07:00
541cb26c0d [smotri] Add entry for netrc authentication 2014-02-17 02:19:55 +07:00
5544e038ab [vk] Add entry for netrc authentication 2014-02-17 02:17:10 +07:00
9032dc28a6 [vk] Add login feature (Closes #2206) 2014-02-17 02:05:15 +07:00
03635e2a71 Add support for 4tube.com. 2014-02-16 18:10:39 +01:00
00cf938aa5 [nfb] Add rtmp app field to format 2014-02-16 06:11:38 +07:00
a5f707c495 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-15 20:45:12 +01:00
1824b48169 [f4m] Download only the first fragment with the --test option 2014-02-15 17:53:23 +01:00
07ad22b8af [youtube:search] Mark "no results found" error as expected 2014-02-15 16:30:11 +01:00
b53466e168 Fix f4m downloading on Python 2.6 2014-02-15 16:24:43 +01:00
6a7a389679 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-15 15:34:17 +01:00
4edff78531 Merge remote-tracking branch 'jaimeMF/f4m'
Conflicts:
	youtube_dl/extractor/__init__.py
2014-02-15 15:32:13 +01:00
99043c2ea5 Replace test for dailymotion users 2014-02-15 13:17:31 +01:00
e68abba910 [sohu] Skip test
Only available from China
2014-02-15 13:12:41 +01:00
3165dc4d9f [france2.fr:generation-quoi] Skip test
The videos seem to not be available outside France
2014-02-15 13:04:31 +01:00
66c43a53e4 Add support for video.helsinki.fi archives 2014-02-14 18:14:28 +02:00
463b334616 [ndr] Replace 404 test 2014-02-14 23:12:15 +07:00
b71dbc57c4 [vesti] Fix player regex (Closes #2382) 2014-02-14 22:26:13 +07:00
72ca1d7f45 [vesti] Skip test 2 due to geo restrictions
At least that's how I interpret the error message "Просмотр вид��о ограничен в вашем регионе."
2014-02-13 22:19:59 +01:00
76e461f395 release 2014.02.13 2014-02-13 19:13:05 +01:00
1074982e6e [vesti] Add support for vesti.ru videos and live streams (Closes #2376) 2014-02-13 23:23:48 +07:00
29b2aaf035 [jadorecettepub] Remove unused import 2014-02-13 16:33:12 +01:00
6f90d098c5 [ecapist] modernize and fix id property 2014-02-13 16:32:42 +01:00
0715161450 Merge pull request #2373 from pulpe/_description_fixes
[collegehumor, chilloutzone] changed description in tests
2014-02-12 06:22:03 -08:00
896583517f [collegehumor, chilloutzone] changed description in tests 2014-02-12 15:11:57 +01:00
713d31fac8 [gametrailers] Fix gametrailers test 2014-02-12 01:50:53 +07:00
96cb10a5f5 [mtv] Improve title extraction 2014-02-12 01:07:30 +07:00
c207c1044e Merge pull request #2372 from pulpe/dropbox_fix
[dropbox] replace not working test
2014-02-11 09:34:49 -08:00
79629ec717 [dropbox] replace not working test 2014-02-11 17:27:36 +01:00
008fda0f08 [ndr] Replace 404 video test 2014-02-11 21:21:05 +07:00
0ae6b01937 [cnn] Add an extractor for blogs (closes #2361) 2014-02-11 14:38:17 +01:00
def630e523 [xtube] Fix uploader extraction 2014-02-11 14:20:41 +01:00
c5ba203e23 [xtube] use unicode_literals 2014-02-11 13:51:37 +01:00
2317e6b2b3 [yahoo] use unicode_literals 2014-02-11 13:51:23 +01:00
cb38928974 [firsttv] Skip test 2014-02-11 10:26:52 +07:00
fa78f13302 [streamcz] Minor changes 2014-02-11 10:19:02 +07:00
18395217c4 Merge branch '_stream' of https://github.com/pulpe/youtube-dl into pulpe-_stream 2014-02-11 09:18:46 +07:00
34bd987811 [freesound] Modernize 2014-02-10 21:03:14 +01:00
af6ba6a1c4 [exfm] Modernize 2014-02-10 21:00:37 +01:00
85409a0c69 [dotsub] Modernize 2014-02-10 20:52:53 +01:00
ebfe352b62 [breakcom] Modernize 2014-02-10 20:48:46 +01:00
fde56d2f17 [howcast] Modernize 2014-02-10 20:45:17 +01:00
3501423dfe [googleplus] Modernize and simplify 2014-02-10 20:36:11 +01:00
0de668af51 [instagram] Modernize 2014-02-10 20:24:12 +01:00
2a584ea90a [firsttv] Fix video URL regex 2014-02-11 00:49:37 +07:00
0f6ed94a15 [firsttv] Add support for 1tv.ru videoarchive 2014-02-11 00:20:41 +07:00
bcb891e82b [lifenews] Minor improvements 2014-02-10 21:07:41 +07:00
ac6e4ca1ed [brightcove] Unescape html entities from the 'og:video' url property (fixes #2360) 2014-02-10 07:50:10 +01:00
2e20bba708 release 2014.02.10 2014-02-10 02:01:11 +01:00
e70dc1d14b [youtube] Correct a minor regex typo 2014-02-10 01:30:47 +01:00
0793a7b3c7 [StreamCZ] Add support for stream.cz 2014-02-09 18:37:12 +01:00
026fcc0495 Fix #2355 (date parsing with dashes) 2014-02-09 18:09:57 +01:00
81c2f20b53 [youtube] Correct invalid JSON (Fixes #2353) 2014-02-09 17:56:10 +01:00
1afe753462 [slideshare] Fix description extraction and modernize
The ‘og:description’  property doesn’t contain the full description
2014-02-09 14:23:19 +01:00
524c2c716a [bloomberg] Fix extraction of ooyala embed code 2014-02-09 14:11:45 +01:00
b542d4bbd7 [kontrtube] Add support for kontrtube.ru (Closes #2354) 2014-02-09 19:53:11 +07:00
cf1eb45153 Add a downloader for f4m manifests 2014-02-09 12:24:54 +01:00
a97bcd80ba Add an extractor for syfy.com
It uses theplatfrom.com, which has been updated to work with f4m manifests
2014-02-08 22:30:00 +01:00
17968e444c [bbc.co.uk] Fix TV episode test 2014-02-09 04:04:21 +07:00
2e3fd9ec2f [bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
2014-02-09 04:00:49 +07:00
d6a283b025 release 2014.02.08.2 2014-02-08 19:20:35 +01:00
9766538124 [jadorecettepub] Add extractor (Fixes #2148) 2014-02-08 19:20:23 +01:00
98dbee8681 [jeuxvideo] Modernize 2014-02-08 18:43:12 +01:00
e421491b3b release 2014.02.08.1 2014-02-08 18:38:05 +01:00
6828d37c41 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-08 18:37:53 +01:00
bf5f610099 [pbs] Add support for viralplayer links (Fixes #2350) 2014-02-08 18:37:33 +01:00
8b7f73404a [bbc.co.uk] Fix regex 2014-02-08 22:55:43 +07:00
85cacb2f51 [bbc.co.uk] Add one more link format 2014-02-08 22:54:05 +07:00
b3fa3917e2 release 2014.02.08 2014-02-08 16:25:03 +01:00
082c6c867a [bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184) 2014-02-08 21:55:28 +07:00
03fcf1ab57 Merge pull request #2342 from MikeCol/tube8
[Tube8] Extended valid urls schema
2014-02-08 04:00:50 +01:00
3b00dea5eb Extended valid urls schema 2014-02-08 00:09:26 +01:00
8bc6c8e3c0 [chilloutzone] Add additional tests (#2340) 2014-02-07 15:42:31 +01:00
79bc27b53a [channel9] Simplify 2014-02-07 19:41:18 +07:00
84dd703199 [ivi] Simplify 2014-02-07 19:36:50 +07:00
c6fdba23a6 [nfb] Add workaround for python2.6 2014-02-07 19:23:53 +07:00
b19fe521a9 Merge pull request #2340 from Fnordlab/master
[chilloutzone] Fixes refactoring bug
2014-02-07 12:46:56 +01:00
c1e672d121 [chilloutzone] fixes bug with youtube extraction
the id used for extracting the video from youtube is stored in
native_video_id not video_id. This id is only used on chilloutzone.net
2014-02-07 12:29:58 +01:00
f4371f4784 Merge remote-tracking branch 'upstream/master' 2014-02-07 12:20:58 +01:00
d914d9d187 [chilloutzone] Add import 2014-02-07 12:03:19 +01:00
845d14d377 credit @Fnordlab for chilloutzone 2014-02-07 12:00:58 +01:00
4a9540b6d2 [chilloutzone] Simplify (#2338) 2014-02-07 12:00:25 +01:00
9f31be7000 Merge remote-tracking branch 'Fnordlab/chilloutzone' 2014-02-07 11:50:26 +01:00
41fa1b627d release 2014.02.06.3 2014-02-07 01:41:01 +01:00
c0c4e66b29 Merge branch 'chilloutzone' 2014-02-06 21:33:16 +01:00
cd8662de22 [chilloutzone] Bug fix, runs against tests
Fixes a bug with python3.3 and made the extractor run successfully
against tox
2014-02-06 21:31:04 +01:00
3587159614 [nfb] Add encode POST data 2014-02-07 02:13:04 +07:00
d67cc9fa7c [youtube:playlist] Recognize ‘top tracks’ urls (closes #2332)
The list parameter starts with ‘MC’ and can have more characters after it, including dots
2014-02-06 19:46:26 +01:00
bf3a2fe923 [elpais] Fix typo 2014-02-07 00:38:29 +07:00
e9ea0bf123 [ndr] Add support for ndr.de (Closes #2325) 2014-02-07 00:35:26 +07:00
63424b6233 release 2014.02.06.2 2014-02-06 15:45:47 +01:00
0bf35c5cf5 [nfb] Add support for onf.ca URLs 2014-02-06 21:41:31 +07:00
95c29381eb [mooshare] Fix bogus video page URL 2014-02-06 21:26:12 +07:00
94c4abce7f [nfb] Add support for nfb.ca (Closes #2069) 2014-02-06 21:19:13 +07:00
f2dffe55f8 Merge branch 'chilloutzone' 2014-02-06 11:49:38 +01:00
46a073bfac [chilloutzone] Added support for chilloutzone.net
Added support for chilloutzone.net videos including embedded youtube
and vimeo movies. In case you find a not working movie, drop me an
email.
2014-02-06 11:44:44 +01:00
df872ec4e7 release 2014.02.06.1 2014-02-06 11:30:00 +01:00
5de90176d9 [elpais] Add extractor 2014-02-06 11:29:46 +01:00
dcf3eec47a [test_download] Skip over BadStatusLine errors
An error like https://travis-ci.org/rg3/youtube-dl/jobs/18317799#L449 is almost certainly the server's fault.
2014-02-06 04:19:57 +01:00
e9e4f30d26 [pbs] Remove unused import 2014-02-06 04:19:43 +01:00
83cebd73d4 [collegehumor] We only get shortened descriptions now 2014-02-06 04:16:22 +01:00
1df4229bd7 [mtv/gametrailers] Change order of title preference
It looks like the plain title is better again
2014-02-06 04:15:12 +01:00
3c995527e9 release 2014.02.06 2014-02-06 03:30:30 +01:00
7c62b568a2 Merge branch 'master' of github.com:rg3/youtube-dl 2014-02-06 03:30:18 +01:00
ccf9114e84 [googlesearch] Fix start, and skip playlists (Fixes #2329) 2014-02-06 03:29:10 +01:00
d8061908bb [ina] Improve _VALID_URL regex (fixes #2328)
Accept all letters in upper case and don’t require anything after the id
2014-02-05 23:01:24 +01:00
211e17dd43 release 2014.02.05 2014-02-05 21:23:28 +01:00
6cb38a9994 [firstpost] Add extractor (Fixes #2324) 2014-02-05 21:23:21 +01:00
fa7df757a7 [thisav] Simplify and use unicode literals 2014-02-05 19:13:06 +07:00
8c82077619 [toutv] Use unicode literals 2014-02-05 19:02:03 +07:00
e5d1f9e50a [m6] Add support for m6.fr (Closes #2313) 2014-02-05 17:38:17 +07:00
7ee50ae7b5 release 2014.02.04.1 2014-02-04 23:26:55 +01:00
de563c9da0 [ina] Simplify
Download the feed with ‘_download_xml’ to make the extraction easier
2014-02-04 23:15:36 +01:00
50451f2a18 [vbox7] simplify 2014-02-04 23:02:53 +01:00
9bc70948e1 [statigram] Simplify 2014-02-04 22:52:27 +01:00
5dc733f071 [vine] Simplify 2014-02-04 22:02:15 +01:00
bc4850908c [test/youtube_signature] Add a test with the last player
To verify it correctly handles function with “$” in their names.
2014-02-04 21:56:17 +01:00
20650c8654 [youtube] signatures: Recognize javascript functions that contain “$” (fixes #2304) 2014-02-04 21:38:50 +01:00
56dced2670 remove accidentally duplicated test file 2014-02-04 16:35:22 +01:00
eef726c04b release 2014.02.04 2014-02-04 16:33:19 +01:00
acf1555d76 Merge remote-tracking branch 'origin/master' 2014-02-04 16:33:06 +01:00
22e7f1a6ec [pbs] Add support for article pages (Fixes #870) 2014-02-04 16:31:00 +01:00
3c49325658 [lifenews] Fix video URL extraction (Closes #2302) 2014-02-04 21:31:25 +07:00
bb1cd2bea1 [mooshare] Add support for mooshare.biz (Closes #2149) 2014-02-04 20:53:46 +07:00
fdf1f8d4ce [collegehumor] Adapt test to changed video description 2014-02-04 10:37:01 +01:00
117c8c6b97 [bliptv] Remove unused imports 2014-02-04 10:25:19 +01:00
5cef4ff09b [subtittles] Check that the result is not empty 2014-02-04 10:24:17 +01:00
91264ce572 [iprima] Use centralized format sorting 2014-02-04 10:24:00 +01:00
c79ef8e1ae Merge remote-tracking branch 'pulpe/_iprima' 2014-02-04 10:21:42 +01:00
58d915df51 [traileraddict] mark as broken
traileraddict has changed their URL encoding scheme.
I'm working on restoring support, but that may take some time.
2014-02-04 10:13:52 +01:00
7881a64499 [iprima] Add support for play.iprima.cz 2014-02-04 07:45:41 +01:00
90159f5561 release 2014.02.03.1 2014-02-03 15:20:41 +01:00
99877772d0 [generic] Add support for multiple brightcove URLs (Fixes #2283) 2014-02-03 15:19:40 +01:00
b0268cb6ce [vimeo] Remove superfluous whitespace 2014-02-03 20:24:11 +07:00
4edff4cfa8 [vimeo] Add subtitle tests 2014-02-03 20:19:23 +07:00
1eac553e7e [vimeo] Add support for subtitles (Closes #2239) 2014-02-03 20:02:58 +07:00
9d3ac7444d release 2014.02.03 2014-02-03 06:54:37 +01:00
588128d054 Add --ignore-config option (Fixes #633) 2014-02-03 06:54:27 +01:00
8e93b9b9aa Merge remote-tracking branch 'origin/master'
Conflicts:
	youtube_dl/extractor/bliptv.py
2014-02-03 05:19:28 +01:00
b4bcffefa3 [blip.tv] Add support for subtitles (#2274) 2014-02-03 05:18:30 +01:00
2b39af9b4f [BlipTV] Add a test case w/ subtitles (#2274) 2014-02-03 02:41:59 +01:00
23fe495feb Merge pull request #2274 from z00nx/master
[bliptv] Filter out SRT files
2014-02-02 17:31:57 -08:00
b5dbe89bba Merge branch 'master' of https://github.com/rg3/youtube-dl 2014-02-03 01:22:41 +07:00
dbe80ca7ad [tinypic] Add support for tinypic.com videos (Closes #2210) 2014-02-03 01:20:03 +07:00
009a3408f5 [cspan] Fix extraction (fixes #2291)
The webpage urls have changed.
The title and thumbnail are now extracted from an xml.
2014-02-02 18:24:20 +01:00
dst
b58e3c8918 [vube] Use 'id' and 'ext' instead of 'file' 2014-02-02 20:04:44 +07:00
56b6faf91e [traileraddict] Fix extraction 2014-02-02 12:52:47 +01:00
7ac1f877a7 [collegehumor] Fix test
The description simply changed, our code is working fine
2014-02-02 12:43:09 +01:00
d55433bbfd Remove unused imports and simplify 2014-02-02 12:03:36 +01:00
f0ce2bc1c5 Merge remote-tracking branch 'dstftw/vube' 2014-02-02 11:54:23 +01:00
c3bc00b90e [Normalboots] Update test video description 2014-02-02 07:17:48 +01:00
ff6b7b049b Merge pull request #2279 from prutz1311/master
Added support for normalboots.com (#2237)
2014-02-01 22:16:37 -08:00
dst
f46359121f [vube] Make video description optional as it may be missing 2014-02-02 12:03:55 +07:00
dst
37c1525c17 [vube] Remove unnecessary coding cookie 2014-02-02 10:49:38 +07:00
dst
c85e4cf7b4 [vube] Add support for vube.com (Closes #2285) 2014-02-02 08:33:24 +07:00
c66dcda287 Merge pull request #2282 from dstftw/lifenews
[lifenews] Add support for lifenews.ru and fix og content extraction regex
2014-01-31 10:23:46 -08:00
dst
6d845922ab [lifenews] Fix test title 2014-02-01 01:10:15 +07:00
2949cbe036 Update normalboots.py
fixed
2014-01-31 16:51:34 +03:00
c3309a7774 [collegehumor] fix test description 2014-01-31 14:48:49 +01:00
7aed837595 [ro220] Simplify and use unicode_literals 2014-01-31 14:07:58 +01:00
0eb799bae9 [ustream] Simplify and use unicode_literals 2014-01-31 14:05:33 +01:00
4baff4a4ae [spiegel] Simplify and use unicode_literals 2014-01-31 14:00:55 +01:00
45d7bc2f8b [vevo] Simplify and use unicode_literals 2014-01-31 13:56:45 +01:00
c0c2ddddcd Merge pull request #2281 from matthewfranglen/master
Fix #2280: Antigen now links to python script
2014-01-30 19:24:43 -08:00
a96ed91610 Add tutorial for adding a new IE 2014-01-31 04:23:39 +01:00
dst
c1206423c4 Fix extraction of og content in single quotes 2014-01-31 03:57:33 +07:00
dst
659aa21ba1 [lifenews] Add support for lifenews.ru 2014-01-31 03:48:00 +07:00
efd02e858a Fix #2280: Antigen now links to python script 2014-01-30 20:44:16 +00:00
3bf8bc7f37 Update normalboots.py
_TEST added
2014-01-30 23:01:35 +03:00
8ccda826d5 release 2014.01.30.2 2014-01-30 19:33:02 +01:00
b9381e43c2 Fix the extraction of full-episodes urls from southpark.com (fixes #2278)
Added an additional regex to the generic _real_extract method of MTVServicesInfoExtractor
2014-01-30 19:04:33 +01:00
fcdea2666d [collegehumor] Add support for embedded youtube videos (fixes #2277) 2014-01-30 18:33:49 +01:00
c4db377cbb [collegehumor] The video may not contain any file in webm format (#2277)
For example http://www.collegehumor.com/video/5812266
2014-01-30 18:33:49 +01:00
90dc5e8693 Merge pull request #2252 from matthewfranglen/master
Add antigen compatible plugin description
2014-01-30 09:28:10 -08:00
c81a855b0f Added support for normalboots.com 2014-01-30 21:26:50 +04:00
c8d8ec8567 Add requested documentation 2014-01-30 15:09:09 +00:00
4f879a5be0 [bliptv] Filter out SRT files 2014-01-30 20:44:53 +11:00
1a0648b4a9 [malemotion] Disable test case
I am not going to look for an alternative one, but feel free to suggest one.
2014-01-30 06:15:50 +01:00
3c1b4669d0 [francetv] Use unicode_literals 2014-01-30 06:13:57 +01:00
24b3d5e538 [francetvinfo.fr] Support more ID suffixes 2014-01-30 06:12:56 +01:00
ab083b08ab [generic] remove testcase
The video seems to have been removed from the site.
2014-01-30 06:10:57 +01:00
89acb96927 [liveleak] Support old and new URLs 2014-01-30 06:09:06 +01:00
79752e18b1 release 2014.01.30.1 2014-01-30 05:33:31 +01:00
55b41c723c Merge branch 'master' of github.com:rg3/youtube-dl 2014-01-30 05:30:16 +01:00
9f8928d032 [generic] Match JWPlayerOptions
This adds support for The Guardian, among others
Closes #2271, fixes #2267
2014-01-30 05:29:10 +01:00
3effa7ceaa Merge pull request #2273 from dstftw/crunchyroll
[crunchyroll] Add support for mobile URLs and use unicode literals
2014-01-29 20:15:38 -08:00
ed9cc2f1e0 release 2014.01.30 2014-01-30 04:52:54 +01:00
975fa541c2 [liveleak] Support multiple formats (Fixes #2262) 2014-01-30 04:52:50 +01:00
251974e44c Merge pull request #2272 from dstftw/master
Improve some regexes
2014-01-29 14:58:14 -08:00
dst
38a40276ec [crunchyroll] Add support for mobile URLs and use unicode literals 2014-01-30 05:23:44 +07:00
dst
57b6288358 [comedycentral] Improve regexes 2014-01-30 04:33:00 +07:00
dst
c3f51436bf Improve some regexes for embedded players 2014-01-30 04:26:46 +07:00
0c708f11cb [bloomberg] Fix ooyala url extraction
Added a helper method to InfoExtractor for searching the ‘twitter:player’ meta property.
Now the OoyalaIE also recognizes the ‘ec’ parameter in the url as the embed code.
2014-01-29 18:03:32 +01:00
fb2a706d11 [myspass] Simplify and use unicode_literals 2014-01-29 16:59:22 +01:00
0b76600deb [youjizz] Simplify and use unicode_literals 2014-01-29 16:59:21 +01:00
245b612a36 [rbmaradio] Simplify and use unicode_literals 2014-01-29 16:59:10 +01:00
d882161d5a [infoq] Simplify and use unicode_literals 2014-01-29 15:34:35 +01:00
d4a21e0b49 [tutv] Simplify and use unicode_literals 2014-01-29 15:22:41 +01:00
26a78d4bbf [nba] Simplify and use unicode_literals
Remove the commented parts for extracting the upload date
2014-01-29 15:16:18 +01:00
8db69786c2 release 2014.01.29 2014-01-29 11:16:28 +01:00
b11cec4162 [youtube:user] Fix id key (Fixes #1745) 2014-01-29 11:16:12 +01:00
7eeb5bef24 [liveleak] Simplify 2014-01-28 21:57:38 +01:00
9d2032932c Merge remote-tracking branch 'dstftw/ivi' 2014-01-28 21:47:05 +01:00
6490306017 Merge remote-tracking branch 'dstftw/channel9' 2014-01-28 21:46:42 +01:00
dst
ceb2b7d257 [ivi] Fix test and use unicode literals 2014-01-29 02:20:48 +07:00
dst
459a53c2c2 [channel9] Remove unnecessary coding cookie 2014-01-29 02:07:29 +07:00
dst
adc267eebf [channel9] Use unicode literals 2014-01-29 02:00:56 +07:00
dst
ffe8f62d27 [smotri] Simplify login and use unicode literals 2014-01-29 01:52:57 +07:00
ed85007039 [ninegag] Use unicode_literals 2014-01-28 18:55:06 +01:00
5aaca50d60 [keek] Simplify and use unicode_literals 2014-01-28 18:47:31 +01:00
869baf3565 [funnyordie] Simplify and use unicode_literals 2014-01-28 18:41:39 +01:00
e299f6d27f [pornhd] Fix 2014-01-28 03:53:00 +01:00
4a192f817e release 2014.01.28.1 2014-01-28 03:44:19 +01:00
bc1d1a5a71 release 2014.01.28 2014-01-28 03:37:42 +01:00
456895d9cf [tumblr] Test new URL format (#2255) 2014-01-28 03:37:38 +01:00
218c15ab59 Merge remote-tracking branch 'mike/tumblr-url' 2014-01-28 03:35:52 +01:00
17ab4d3b5e [brightcove] Move test to generic 2014-01-28 03:35:32 +01:00
31ef0ff038 Merge remote-tracking branch 'dstftw/rutube-channel' 2014-01-28 03:32:22 +01:00
37e3b90d59 [rutube] Simplify 2014-01-28 03:32:07 +01:00
dst
00ff8f92a5 [rutube] Update test 2014-01-28 09:31:14 +07:00
4857beba3a Merge remote-tracking branch 'dstftw/rutube-channel' 2014-01-28 03:30:21 +01:00
c1e60cc2bf Merge remote-tracking branch 'dstftw/master' 2014-01-28 03:29:10 +01:00
dst
98669ed79c [imdb] Fix playlist test 2014-01-28 09:13:08 +07:00
dst
a3978a6159 [imdb] Fix duplicated entries bug 2014-01-28 09:12:23 +07:00
dst
e3a9f32f52 [rutube] Add support for user videos 2014-01-28 08:47:17 +07:00
dst
87fac3238d [rutube] Add channel test 2014-01-28 08:25:56 +07:00
dst
a2fb2a2134 [rutube] Improve video extractor 2014-01-28 08:19:45 +07:00
9e8ee54553 VALID_URL changed to match different kinds of Tumblr-URLs 2014-01-28 01:41:18 +01:00
117bec936c [brightcove] Parse URL from meta element if available (Fixes #2253) 2014-01-28 01:01:23 +01:00
dst
1547c8cc88 [rutube] Add support for channels and movies 2014-01-28 06:56:09 +07:00
075911d48e [la7] Skip test on travis 2014-01-27 23:47:22 +01:00
b21a918984 release 2014.01.27.2 2014-01-27 19:22:45 +01:00
f9b8549609 [ard] Support multiple formats (Closes #2247) 2014-01-27 18:40:10 +01:00
d1b30713fb Add antigen compatible plugin description 2014-01-27 15:33:16 +00:00
e2ba07024f Merge remote-tracking branch 'origin/master' 2014-01-27 12:45:59 +01:00
9b05bd42e5 [discovery] Extract more info and simplify 2014-01-27 12:41:30 +01:00
b6d3a99678 [cliphunter] Simplify (#2233) 2014-01-27 12:39:39 +01:00
96d7b8873a Merge remote-tracking branch 'sahutd/master' 2014-01-27 12:21:00 +01:00
efc867775e [cliphunter] Simplify 2014-01-27 07:55:30 +01:00
5ab772f09c Merge branch 'cliphunter' of https://github.com/pornophage/youtube-dl 2014-01-27 07:48:51 +01:00
2a89386232 Credit @MikeCol for malemotion IE 2014-01-27 07:43:41 +01:00
4d9be98dbc Malemotion extractor 2014-01-27 07:43:02 +01:00
6737907826 [tumblr] Fix thumbnail extraction
Signed-off-by: Philipp Hagemeister <phihag@phihag.de>
2014-01-27 07:38:55 +01:00
c060b77446 [tumblr] Use unicode_literals 2014-01-27 07:36:18 +01:00
7e8caf30c0 Throw an error if no video formats are found 2014-01-27 07:31:54 +01:00
ca3e054750 release 2014.01.27.1 2014-01-27 07:09:55 +01:00
1da1558f46 [la7] Support more URLs 2014-01-27 07:08:01 +01:00
25c67d257c release 2014.01.27 2014-01-27 07:05:39 +01:00
a17d16d59c [la7] Add support 2014-01-27 07:05:28 +01:00
d16076ff3e [huffpost] Fix extractor 2014-01-27 06:55:35 +01:00
6c57e8a063 [setup.py] Only print a warning if documentation files are missing (Fixes #780) 2014-01-27 06:22:15 +01:00
db1f388878 [huffpost] Add support 2014-01-27 05:47:38 +01:00
0f2999fe2b Merge pull request #2221 from Rudloff/master
Removed websurg extractor
2014-01-26 18:03:26 -08:00
53bfd6b24c Added support for Discovery Issue #2227 2014-01-26 14:05:34 +05:30
5700e7792a [youtube] Encode the data when submitting the form for confirming the age
Needed on python 3
2014-01-25 17:22:41 +01:00
38c2e5b8d5 [youtube] Use https: in more urls 2014-01-25 17:11:55 +01:00
48f9678a32 [test/youtube_lists] Change the list used for testing the Top Lists extractor
The ‘Top tracks’ list is not always present in the channel page
2014-01-25 17:02:32 +01:00
beddbc2ad1 [youtube:toplist] Make the regex for finding the playlist link more flexible
`title={foo}` may not be at the end of the `href` string.
2014-01-25 15:47:03 +01:00
f89197d73e Some pep8 style fixes 2014-01-25 15:33:23 +01:00
944d65c762 [extractor/common] Encode the url when calculating the md5 with —write-pages option
This doesn’t cause any problem in python 2.*, but on python 3 the `md5` function only accepts bytes.
2014-01-25 15:32:56 +01:00
f945612bd0 [rtlnow] Simplify 2014-01-25 14:18:54 +01:00
59188de113 Properly escape ‘.’ in some _VALID_URL properties 2014-01-25 11:48:08 +01:00
352d08e3e5 Add an extractor for freespeech.org (closes #2234) 2014-01-25 11:31:30 +01:00
bacb5e4f44 Minor fixes
Remove empty description
Set correct md5 test
2014-01-25 02:34:08 +01:00
008af8660b Add cliphunter extractor 2014-01-25 01:46:52 +01:00
886fa72324 release 2014.01.23.4 2014-01-24 00:06:55 +01:00
2c5bae429a [youtube] Fix new formats 2014-01-24 00:06:26 +01:00
f265fc1238 release 2014.01.23.3 2014-01-23 23:55:53 +01:00
1394ce65b4 [youtube] Add new formats (Fixes #2221) 2014-01-23 23:54:06 +01:00
67ccb77197 Removed websurg extractor 2014-01-23 23:42:34 +01:00
63ef36e8d8 Add build instructions (Fixes #2218) 2014-01-23 23:28:29 +01:00
0b65e5d40f [youtube] Do not break upon unknown formats 2014-01-23 23:21:42 +01:00
629be17af4 release 2014.01.23.2 2014-01-23 19:05:05 +01:00
fd28827864 Do not count unmatched videos for --max-downloads (Fixes #2211) 2014-01-23 19:04:22 +01:00
8c61d9a9b1 Mention default for -f (Fixes #2215) 2014-01-23 18:50:04 +01:00
975d35dbab [youtube:truncated_url] Also match mail subscription links (#2214) 2014-01-23 16:14:54 +01:00
8b769664c4 [sina] Recognize http://video.sina.com.cn/v/b/{id}-*.html urls (fixes #2212) 2014-01-23 14:03:14 +01:00
76f270a46a [sina] use unicode_literals 2014-01-23 14:00:29 +01:00
9dab1b7f28 release 2014.01.23.1 2014-01-23 10:37:34 +01:00
d3e5bbf437 Correct --max-downloads with --ignore-errors 2014-01-23 10:36:47 +01:00
18a25c5d78 Clarify update output (Fixes #2205)
No, we are not intentionally hiding the version number. Why would we?
2014-01-23 10:24:44 +01:00
924f47f7b6 [rottentomatoes] Use unicode_literals 2014-01-23 04:05:58 +01:00
22ff1c4a93 [xhamster] Futher simplification 2014-01-23 04:04:39 +01:00
35409e1101 [xhamster] Use unicode_literals 2014-01-23 03:52:59 +01:00
65d781128a [xhamster] Add support for hd video
Signed-off-by: Philipp Hagemeister <phihag@phihag.de>
2014-01-23 03:51:09 +01:00
c35b1b07e2 release 2014.01.23 2014-01-23 00:13:00 +01:00
066f6a0630 [nowness] Add support 2014-01-23 00:12:47 +01:00
12ed57418c [gamespot] Fix regexp 2014-01-22 22:31:19 +01:00
8b1be5cd73 Move --youtube-include-dash-manifest into correct option group 2014-01-22 22:17:53 +01:00
780083dbc6 release 2014.01.22.5 2014-01-22 21:57:17 +01:00
4919603f66 [youtube] Make DASH manifest download conditional for now
DASH download fails on many videos (all with encrypted signatures? not sure yet), for example 07FYdnEawAQ, with a 403.
2014-01-22 21:56:38 +01:00
dd26ced164 Add __len__ to PagedLists 2014-01-22 21:43:33 +01:00
bd2d82a5d3 [newgrounds] Simplify 2014-01-22 21:41:28 +01:00
c4cd138b92 release 2014.01.22.4 2014-01-22 21:01:52 +01:00
65697b3bf3 Merge branch 'paged-lists'
Conflicts:
	test/test_utils.py
	youtube_dl/extractor/youtube.py
2014-01-22 20:00:16 +01:00
50317b111d Merge branch 'youtube-dash-manifest'
Conflicts:
	youtube_dl/extractor/youtube.py
2014-01-22 19:58:31 +01:00
d7975ea287 [xvideos] Simplify 2014-01-22 19:02:48 +01:00
714d709a31 [xvideos] Fix thumbnail extraction
Signed-off-by: Philipp Hagemeister <phihag@phihag.de>
2014-01-22 19:01:41 +01:00
11577ec054 [cspan] Disable test
It works fine from all my machines, no matter where, but from travis, we get lots of 403s.
Maybe another project is scraping CSPAN from travis and they're blocking the travis machines?
2014-01-22 15:10:02 +01:00
79bf58f9b5 Document -f worstaudio as well 2014-01-22 14:55:45 +01:00
cd8a562267 release 2014.01.22.3 2014-01-22 14:53:36 +01:00
de3ef3ed58 Default to -f best-audio when only audio is requested 2014-01-22 14:53:23 +01:00
8908741806 Use unicode_literals in test_YoutubeDL 2014-01-22 14:48:02 +01:00
ba7678f9cc Add -f bestaudio (Fixes #2163) 2014-01-22 14:47:29 +01:00
a70c83768e release 2014.01.22.2 2014-01-22 14:33:16 +01:00
04b4d394d9 Add new --default-search option (#2193) 2014-01-22 14:16:43 +01:00
130f12985a [comedycentral] Use the generic _real_extract provided by the base class 2014-01-22 11:44:26 +01:00
4ca5d43cd8 Merge pull request #2195 from dstftw/master
[space] Add support for mobile URLs
2014-01-22 02:39:17 -08:00
4bbf139aa7 [southparkstudios] Use the generic _real_extract provided by the base class 2014-01-22 11:35:17 +01:00
dst
47739636a9 [space] Add support for mobile URLs 2014-01-22 17:25:32 +07:00
407ae733ab [cspan] Make ‘www’ optional and improve the regex for extracting the id (fixes #2194) 2014-01-22 11:06:03 +01:00
c39f7013e1 [gametrailers] Use the generic _real_extract provided by the base class 2014-01-22 10:51:17 +01:00
a4a028323e [comedycentral] Use unicode_literals 2014-01-22 03:50:49 +01:00
780ee4e501 [comedycentral] Adapt testcase
In contrast to other sites, ComedyCentral seems to understand how to sensibly use MTV IE, but the additional text shouldn't hurt.
2014-01-22 03:49:17 +01:00
d7b51547c0 [imdb:list] Switch to loading the webpage
The RSS method seems to be defunct.
2014-01-22 03:41:25 +01:00
43030f36db [d8] typo 2014-01-22 03:10:31 +01:00
48c63f1653 [d8] disable test; video got deleted 2014-01-22 03:09:21 +01:00
90f479b6d5 [novamov] Skip tests 2014-01-22 03:04:10 +01:00
6fd2957163 release 2014.01.22.1 2014-01-22 02:17:00 +01:00
d3a1c71917 [ringtv] Fix and add news extraction 2014-01-22 02:16:40 +01:00
af1588c05f [mtv] Update tests and xpath function for new title extraction 2014-01-22 02:04:51 +01:00
2250865fb0 [Wimp] Use new URL relay method 2014-01-22 02:01:39 +01:00
99f770caa8 [hotnewhiphop] Retrieve media key 2014-01-22 01:55:50 +01:00
00122de6a9 [gametrailers/mtv] Fix pre-3.x compatibility function for find_xpath_attr
Fixes #2189
2014-01-22 01:04:12 +01:00
a70515c0fd [servingsys] Do not run test on travis
Apparantly, even the advertisers do geoblocking now!?
From the US, this isn't outright blocked, but there are no videos returned.
2014-01-22 00:27:18 +01:00
398edd0689 release 2014.01.22 2014-01-22 00:21:41 +01:00
6562df768d Merge branch 'master' of github.com:rg3/youtube-dl
Conflicts:
	youtube_dl/extractor/mtv.py
2014-01-22 00:21:27 +01:00
06769acd71 [gametrailers] Use unicode_literals
Conflicts:
	youtube_dl/extractor/gametrailers.py
2014-01-22 00:18:52 +01:00
32dac6943d [mtv] Use unicode_literals 2014-01-22 00:18:09 +01:00
90834c78fe [mtv] Fix title for gametrailers (Fixes #2188)
We now prefer the title including the category, because that title is what is presented at the actual sites.
2014-01-22 00:17:33 +01:00
47917f24c4 [brightcove] Fix extraction of embedded videos
There was a leading ‘:’ in the regex.
The ‘flashvars’ parameter is not always available.
2014-01-21 22:04:46 +01:00
d614aa40e3 [brightcove] Fix check for url in the result
It may have the ‘formats’ field instead of ‘url’.
2014-01-21 21:53:10 +01:00
bc4ba05fcb [mtv] Add an extractor for mtviggy.com (#2072) 2014-01-21 20:59:31 +01:00
8d9453b9e8 Add an extractor for spike.com (#2072)
Added a generic _real_extract to MTVServicesInfoExtractor
2014-01-21 20:54:47 +01:00
e4f320a4d0 [mtv] Check for geo-blocked videos in the xml document, not in the xml’s string
Allows to use the `_download_xml` method
2014-01-21 19:59:02 +01:00
ef9f2ba7af [mtv] Use unicode_literals 2014-01-21 19:58:21 +01:00
4a3b72771f release 2014.01.21.1 2014-01-21 18:21:53 +01:00
913f32929b [vk] Add support for HQ videos (Fixes #2187) 2014-01-21 18:21:44 +01:00
9834872bf6 [facebook] Add support for embeds
Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html
2014-01-21 18:10:17 +01:00
94a23d2a1e [vk] Use unicode_literals 2014-01-21 17:32:03 +01:00
608bf69880 [vk] avoid built-in names 2014-01-21 17:29:04 +01:00
032b3df5af [redtube] Use unicode_literals 2014-01-21 14:16:44 +01:00
9d11a41fe4 [redtube] Add support for thumbnails
Signed-off-by: Philipp Hagemeister <phihag@phihag.de>
2014-01-21 14:14:55 +01:00
2989501131 release 2014.01.21 2014-01-21 14:07:41 +01:00
7b0817e8e1 [servingsys] Add support
This also adds support for brightcove advertisements.
Fixes #2181
2014-01-21 02:09:51 +01:00
9d4288b2d4 [extractor/common] Clarify when and when not we generate the filename 2014-01-21 01:41:13 +01:00
3486df383b [generic] Improve testcase 2014-01-21 01:40:34 +01:00
b60016e831 Deal with implicitly UTF-16 decoded webpages
These webpages don't specify an encoding and rely on the BOM
2014-01-21 01:39:40 +01:00
5aafe895fc Correct XML ampersand fixup 2014-01-20 22:11:34 +01:00
b853d2e155 release 2014.01.20 2014-01-20 11:44:37 +01:00
b7ab059084 Add infrastructure for paged lists
This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175
2014-01-20 11:36:47 +01:00
c91778f8c0 [youtube] Fall back to header if playlist title is not available
Sometimes (in about 10% of requests), the og:title is missing for a weird reason.
See #2170 for an example
2014-01-20 02:45:51 +01:00
5016f3eac8 [myspace] More robust mediatype check 2014-01-20 02:44:08 +01:00
efb1bb90a0 [myspace] Add support for song urls (fixes #2040) 2014-01-19 11:38:48 +01:00
4cf393bb4b [dropbox] Correct test case (#2171) 2014-01-19 06:16:40 +01:00
ce4e242a6f [dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00
b27bec212f Merge remote-tracking branch 'sahutd/master' 2014-01-19 06:12:20 +01:00
704519c7e3 Modified dropbox to reflect small changes 2014-01-19 10:24:20 +05:30
6b79f40c3d Added support for Dropbox 2014-01-19 10:20:26 +05:30
dd27fd1739 [youtube] Download DASH manifest
If given, download and parse the DASH manifest file, in order to get ultra-HQ formats.
Fixes #2166
2014-01-19 05:47:20 +01:00
dfa50793d8 Merge pull request #2153 from jaimeMF/ffmpeg-merger-check-install
Don’t try to merge the formats if ffmpeg or avconv are not installed
2014-01-18 20:42:51 -08:00
2a7c35dd46 added dropbox support 2014-01-18 20:50:42 +05:30
f2ffd10bb2 Update __init__.py 2014-01-18 20:48:43 +05:30
8da531359e Added dropbox support. issue #2055 2014-01-18 20:45:53 +05:30
e2b944cf43 Merge branch 'master' of github.com:rg3/youtube-dl 2014-01-17 14:48:15 +01:00
3ec05685f7 [extractor/common] Limit --write-pages filename to 200 chars
This avoids problems with very long URLs.
2014-01-17 14:47:47 +01:00
e103fd46ca FFmpegMergerPP: Print an info message with the destination before running ffmpeg 2014-01-17 14:31:23 +01:00
877bfd69d1 [cnn] Improve test 2014-01-17 05:06:13 +01:00
e0ef49f205 release 2014.01.17.2 2014-01-17 04:22:15 +01:00
f68cd00fe3 [kankan] Skip test 2014-01-17 04:21:54 +01:00
ca70d215cf [kankan] Simplify 2014-01-17 04:21:22 +01:00
d0390a0c92 [mixcloud] Use unicode_literals 2014-01-17 04:06:18 +01:00
dd2535c38a [mixcloud] Fix URL extraction 2014-01-17 04:05:15 +01:00
b78d180170 [mpora] Fix uploader name extraction 2014-01-17 03:59:42 +01:00
26dca1661e [ted] Updated checksums 2014-01-17 03:54:54 +01:00
f853f8594d [ted] Use unicode_literals 2014-01-17 03:52:17 +01:00
8307aa73fb Remove youtube swf signature test
Apparently, swf players are no longer in use. If we find one, we'll readd it.
2014-01-17 03:49:59 +01:00
d0da491e1e [condenast] Allow multiple formats, and sort centralized 2014-01-17 03:36:03 +01:00
6e249060cf [condenast] Use unicode_literals 2014-01-17 03:32:02 +01:00
fbcd7b5f83 [soundcloud] Use unicode_literals and centralized sorting 2014-01-17 03:29:41 +01:00
9ac0a67581 [spankwire] Use centralized format sorting and unicode_literals 2014-01-17 03:26:05 +01:00
befdc8f3b6 [teamcoco] Use centralized sorting 2014-01-17 03:22:02 +01:00
bb198c95e2 [teamcoco] Use unicode_literals 2014-01-17 03:15:09 +01:00
c1195541b7 [gamespot] Use unicode_literals 2014-01-17 03:13:40 +01:00
26844eb57b [franceinter] Remove superfluous whitespace 2014-01-17 03:10:54 +01:00
a7732b672e Credit @sahutd for franceinter (#2152) 2014-01-17 03:09:34 +01:00
677b3ce82f [franceinter] Minor improvements (#2152) 2014-01-17 03:09:07 +01:00
fabfe17d5e [flickr] Use unicode literals 2014-01-17 03:07:01 +01:00
82696d5d5d Merge remote-tracking branch 'sahutd/master' 2014-01-17 03:02:55 +01:00
9eea4fb835 release 2013.01.17.1 2014-01-17 02:57:46 +01:00
484aaeb204 [everyonesmixtape] Add support (Fixes #2161) 2014-01-17 02:56:13 +01:00
8e589a8a47 release 2013.01.17 2014-01-17 02:13:13 +01:00
2f21eb2db6 [generic] Do not fetch XML URLs (Fixes #2162) 2014-01-17 02:13:00 +01:00
c11529618a [redtube] Make ‘http:’ not optional (closes #2160)
If the url doesn’t specify the protocol we can’t directly use it to download the webpage, we would need to build a new url.
Instead, we let the generic extractor add the protocol.
2014-01-16 11:21:33 +01:00
58c3c7ae38 Don’t try to merge the formats if ffmpeg or avconv are not installed 2014-01-15 12:59:15 +01:00
c8650f7ecd Made modification as suggested on https://github.com/rg3/youtube-dl/pull/2151 2014-01-15 16:48:55 +05:30
14e7543a5a franceinter [Issue #2105]
Added franceinterIE import to reflect addition of FranceInter support. Issue #2105
2014-01-15 11:51:12 +05:30
bf6705f584 Added franceinter [Issue #2105] 2014-01-15 11:49:50 +05:30
a9f53ce7ea Add a couple of missing http:// in test URLs 2014-01-14 16:01:31 -05:00
a45ea17042 Implement a different adult sites checking algorithm 2014-01-14 16:01:00 -05:00
4950f30890 Fix --list-formats description (Closes #2142) 2014-01-13 00:03:31 +01:00
7df7f00385 Merge remote-tracking branch 'origin/master' 2014-01-12 12:55:05 +01:00
d2250ea7fd [nowvideo] Recognize nowvideo.sx urls (fixes #2127) 2014-01-12 12:42:06 +01:00
17093b83ca Allow ~ in --download-archive (Fixes #2137) 2014-01-12 01:27:55 +01:00
5d8683a5cd [nowvideo] Add support for .sx version (Fixes #2127) 2014-01-12 01:26:37 +01:00
cede88e5bb Merge pull request #2139 from dstftw/master
Tidy help text
2014-01-11 16:18:38 -08:00
aadc71642a Merge pull request #2138 from dstftw/lynda-membership-support
[lynda] Add support for member accounts and paid videos (Closes #2125)
2014-01-11 16:18:08 -08:00
dst
67d28bff12 Tidy help text 2014-01-12 06:27:00 +07:00
dst
7ee40b5d1c [lynda] Add support for member accounts and paid videos (Closes #2125) 2014-01-12 05:31:56 +07:00
db22af36ec [brightcove] The ‘id’ attribute is not always present in the object tag (fixes #2132)
It looks like the ‘flashId’ parameter is not needed.
2014-01-10 19:39:42 +01:00
f8b5ab8cfa [bandcamp] Make thumbnail and uploader optional
Fixes #2129
2014-01-09 23:04:36 +01:00
298f16f954 [bandcamp] Fix variable name 2014-01-09 20:23:28 +01:00
3d97cbbdaf Fix typo in the readme 2014-01-09 18:40:23 +01:00
ce6b9a2dba [youtube] Add a pseudo format for rtmp videos (#2123) 2014-01-09 02:38:50 +01:00
c3197e3e5c [youtube] Correct subtitle URL (Fixes #2120) 2014-01-09 01:36:21 +01:00
d420d8dd1b release 2014.01.08 2014-01-08 23:42:52 +01:00
3fabeaa1f4 [vimeo] Support protocol-relative URLs 2014-01-08 22:42:52 +01:00
35aa7098cd Merge remote-tracking branch 'origin/prefer-ffmpeg' 2014-01-08 18:32:06 +01:00
9d6192a5b8 [bloomberg] Fix ooyala url extraction 2014-01-08 18:18:45 +01:00
76b1bd672d Add ‘--prefer-avconv’ and ‘--prefer-ffmpeg’ options (#2115)
Affects the ffmpeg post processors, if ‘--prefer-ffmpeg’ is given and both avconv and ffmpeg are installed, it will use ffmpeg. Otherwise it will follow the old behaviour.
2014-01-08 17:53:34 +01:00
469ec9416a [francetv] Add extractor for Culturebox (closes #2117) 2014-01-08 16:16:34 +01:00
70af3439e9 [hls] Fix the program name when reporting the file size 2014-01-08 16:15:20 +01:00
bb3c20965e Merge pull request #2116 from dstftw/novamov
[novamov] Add embedded player support
2014-01-08 01:27:11 -08:00
dst
5f59ee7942 [novamov] Remove superfluous tabs 2014-01-08 08:11:46 +07:00
dst
8f89e68781 [novamov] Add embedded player support 2014-01-08 08:09:13 +07:00
10bff13a66 [novamov] Simplify 2014-01-08 01:18:47 +01:00
166ff8a3c7 Merge remote-tracking branch 'dstftw/novamov' 2014-01-08 01:15:43 +01:00
b4622a328b Use double quotes in error message (#2112)
On Windows, double quotes are required, because single quotes get served to youtube-dl. (Yes, cmd.exe is crazy like that).
On other system, both double and single quotes are fine, unless the string contains a dollar sign (then you need single quotes).
Since virtually no URLs contain dollar signs, double quotes should do.
2014-01-08 00:05:11 +01:00
dst
cc253000e4 [novamov] Add support for novamov.com (Fixes #2035) 2014-01-07 22:18:10 +07:00
42e4fcf23a [generic] Fix regexes 2014-01-07 11:04:27 +01:00
9c63128668 [metacritic] Use centralized sorting and unicode_literals 2014-01-07 10:27:35 +01:00
9933b57430 [pornhub] Use centralized sorting 2014-01-07 10:25:34 +01:00
84c92dc00f [c56] Add suppot for multiple formats 2014-01-07 10:19:15 +01:00
42154ad5bc [archiveorg] Use centralized sorting 2014-01-07 10:16:22 +01:00
96f1b0741c release 2014.01.07.5 2014-01-07 10:09:56 +01:00
bac268e243 Clarify --date* documentation (Fixes #2093) 2014-01-07 10:09:37 +01:00
3798eadccd More unicode literals 2014-01-07 10:06:30 +01:00
2537186d43 release 2014.01.07.4 2014-01-07 09:52:29 +01:00
0eecc6a417 [vimeo] Add support for passwords for player. URLs
Fixes #2053
2014-01-07 09:52:00 +01:00
0dc13f4c4a Correctly set IE_NAME field 2014-01-07 09:45:58 +01:00
f577e0ce15 switch more to unicode_literals 2014-01-07 09:45:40 +01:00
bd1b906527 Remove unusued import 2014-01-07 09:42:38 +01:00
ecfef3e5bf +unicode_literals 2014-01-07 09:41:13 +01:00
3d3538e422 [khanacademy] Add support (Fixes #2066) 2014-01-07 09:35:34 +01:00
0cdad20c75 release 2014.01.07.3 2014-01-07 08:28:13 +01:00
50144133c5 [release] Check for useless files before release 2014-01-07 08:28:05 +01:00
089cb705e8 release 2014.01.07.2 2014-01-07 08:21:05 +01:00
525e1076ad release 2014.01.07.1 2014-01-07 08:09:08 +01:00
282962bd36 --list-formats: Only add "@" if vbr is given 2014-01-07 08:08:48 +01:00
c93c2ab1c3 [mpora] Add support (Fixes #2096) 2014-01-07 08:07:46 +01:00
7b09a4d847 [lynda] Fix download if subtitles were not requested 2014-01-07 07:17:49 +01:00
73a25b30ea [lynda] Remove superfluous space 2014-01-07 07:14:46 +01:00
ac260dd81e [lynda] Remove useless u"" 2014-01-07 07:14:12 +01:00
48a2034671 [vimeo] Fix playlist URL matching 2014-01-07 07:13:47 +01:00
a9ce0c631e [xattr] Correct on Windows 2014-01-07 06:50:24 +01:00
afc7bc33cb [xattr] Always use UTF-8
On Windows and other systems, other encodings would break when trying to encode non-ASCII characters.
Simply use UTF-8, like every sane system.
2014-01-07 06:49:15 +01:00
168da92b9a [xattr] Rework
In particular, explicitly require NT before trying ADS, and do not try to parse process output that may be localized.
2014-01-07 06:36:34 +01:00
d70ad093af Move check_executable into a helper ufnction 2014-01-07 06:23:41 +01:00
2a2e2770cc [xattr] Always output a warning message on errors 2014-01-07 06:12:28 +01:00
42cc71e80b [xattr] Write bytestrings, not characters 2014-01-07 06:11:21 +01:00
496c19234c Split postprocessor package into multiple modules 2014-01-07 05:59:22 +01:00
4f81667d76 [orf] Remove unused variable name 2014-01-07 05:51:46 +01:00
56327689a2 Move postprocessor into its own package 2014-01-07 05:49:17 +01:00
ad84831537 [xattr] Coding style 2014-01-07 05:45:15 +01:00
5f263296ea Merge remote-tracking branch 'epitron/metadata-pp'
Conflicts:
	youtube_dl/PostProcessor.py
2014-01-07 05:44:44 +01:00
89650ea3a6 release 2014.01.07 2014-01-07 05:34:32 +01:00
79f8295303 Use original Referer URL in Brightcove requests (Fixes #2110) 2014-01-07 05:34:14 +01:00
400e58103d [brightcove] Use unicode_literals 2014-01-07 05:23:20 +01:00
fcee8ee784 [vimeo] Use _search_regex 2014-01-07 05:19:28 +01:00
9148eb002b [vimeo] Use unicode_literals 2014-01-06 23:38:16 +01:00
559e370f44 [vimeo] Proper warning when password is required (Fixes #2053)
In player. URLs, the password warning is different.
2014-01-06 23:35:27 +01:00
cdeb10b5cd release 2014.01.06.1 2014-01-06 19:25:43 +01:00
e6162a90e6 release 2014.01.06 2014-01-06 17:37:24 +01:00
9a6422a81e Merge remote-tracking branch 'origin/master' 2014-01-06 17:37:20 +01:00
fcea44c6d5 [vimeo] Add support for review pages
Since the regexp is already overboarding and review pages have a distinct URL format (with non-trivial stuff after the ID), use a dedicated IE.
Fixes #2106
2014-01-06 17:34:23 +01:00
5d73273f6f [orf] Use new extraction method (Fixes #2057) 2014-01-06 17:15:27 +01:00
c11a0611d9 [veehd] Send requests twice (Fixes #2102) 2014-01-06 12:54:01 +01:00
796495886e [generic] Use unicode_literals instead of duplicating the u' 2014-01-06 01:47:52 +01:00
fa27f667c8 Merge pull request #2104 from dstftw/lynda
[lynda] Add subtitles extraction
2014-01-05 16:44:21 -08:00
fc9713a1d2 [youtube] Support jwplayer with YouTube URLs (Closes #2075) 2014-01-06 01:42:58 +01:00
dst
62bcfa8c57 [lynda] Add subtitles extraction 2014-01-05 23:59:33 +07:00
7f9886379c release 2014.01.05.6 2014-01-05 11:44:20 +01:00
c6e4b225b1 Restore binary files for backwards compatibility
Fixes 9656ee5d1d
New year's resolution: Check which systems of Ubuntu / RHEL still serve the ancient versions.
If it's only RHEL, consider removing these binary files in 2015 or so.
2014-01-05 11:41:44 +01:00
1c0f31f9f7 [bash-completion] Complete filename if —load-info is given 2014-01-05 11:28:01 +01:00
41292a3827 Fix list comprehension for decoding the URLs (fixes #2100)
It wasn’t a comprehension, it was just using the last url from the previous comprehension.
That didn’t raise an error in python 2, but in python 3 the variable was not defined.
2014-01-05 10:58:36 +01:00
20f1be02df release 2014.01.05.5 2014-01-05 05:48:39 +01:00
a339e5cfb5 Remove unused imports 2014-01-05 05:48:30 +01:00
f46f4a995b [veoh] Simplify 2014-01-05 05:48:12 +01:00
4ddba33f78 [veoh] Add support for mobile URLs
Fixes #2052
2014-01-05 05:47:50 +01:00
e3b7aa8428 release 2014.01.05.4 2014-01-05 05:41:30 +01:00
d981cef6b9 [generic] Support gorillavid.in
Previously, we were a little bit over-eager and got a random swf file.
Fixes #2084.
2014-01-05 05:34:08 +01:00
6fa81ee96e release 2014.01.05.3 2014-01-05 05:26:43 +01:00
a1a337ade9 release 2014.01.05.02 2014-01-05 05:25:07 +01:00
c774b3c696 Make sure URLs are always character strings (Fixes #2051) 2014-01-05 05:24:50 +01:00
3e34db3170 More Atom feed improvements (#2081) 2014-01-05 05:16:16 +01:00
317d4edfa8 Improve Atom feed creation (Fixes #2081) 2014-01-05 05:04:46 +01:00
9b12003c35 atom feed generator: Make IDs proper URLs (#2081) 2014-01-05 04:49:43 +01:00
4ea170b8a0 release 2014.01.05.1 2014-01-05 04:44:34 +01:00
49f2bf76a8 Fix make_readme on Python 2 2014-01-05 04:44:29 +01:00
01c62591d1 [setup.py] Do not use unicode literals
See http://bugs.python.org/issue13943 for context
2014-01-05 04:41:50 +01:00
1e91866f77 Make make_readme run in a locale-less environment
Mentioned in #267
2014-01-05 04:39:27 +01:00
9656ee5d1d Document --socket-timeout 2014-01-05 04:36:46 +01:00
a5f1e12a02 release 2014.01.05 2014-01-05 04:30:29 +01:00
ca9e792253 [cspan] Use HTTP download (Fixes #2098) 2014-01-05 04:30:19 +01:00
aff24732b9 Merge remote-tracking branch 'rzhxeo/blip'
Conflicts:
	youtube_dl/extractor/bliptv.py
2014-01-05 03:48:45 +01:00
455fa214b6 Ignore more downloaded files 2014-01-05 03:44:38 +01:00
a9c5e5ca6e Set required properties for format merging 2014-01-05 03:44:08 +01:00
cefcb9fde3 [bliptv] Use centralized format sorting
This also makes youtube-dl use the better "Source" format by default.
2014-01-05 03:21:23 +01:00
bca4e93076 [bliptv] Simplify 2014-01-05 03:18:45 +01:00
67c20aebb7 Merge remote-tracking branch 'rzhxeo/blip2' 2014-01-05 03:16:19 +01:00
448711e39f [pornhd] Add support for ISO-3166 subpages (Fixes #2088) 2014-01-05 03:13:10 +01:00
8bf48f237d Fix/work around Windows encoding issues (Fixes #2095) 2014-01-05 03:07:55 +01:00
7c0578dc86 [collegehumor] Use character strings by default 2014-01-05 03:07:15 +01:00
55033ffb0a [collegehumor] Add support for age_limit 2014-01-05 03:03:15 +01:00
b4a9bf701a [collegehumor] Support multiple formats (Fixes #2092)
Unfortunately, we lose a part of the description in the new JSON format, but that's still better than a non-functioning URL.
2014-01-05 02:50:10 +01:00
a015dce0e2 Merge remote-tracking branch 'jaimeMF/merge-formats' 2014-01-05 02:06:48 +01:00
28ab2e48ae fix typo 2014-01-05 02:04:21 +01:00
6febd1c1df Prepare widespread unicode literal use 2014-01-05 01:52:03 +01:00
6350728be2 Allow merging formats (closes #1612)
Multiple formats can be requested using `-f 137+139`, each one is downloaded and then the two are merged with ffmpeg.
2014-01-04 13:13:51 +01:00
a7c26e7338 [lynda] minor changes 2014-01-03 13:24:29 +01:00
c880557666 Merge remote-tracking branch 'origin/master' 2014-01-03 13:10:00 +01:00
85689a531f [macgamestore] Minor fixes (#2044) 2014-01-03 13:09:39 +01:00
cc14dfb8ec Merge remote-tracking branch 'dstftw/macgamestore' 2014-01-03 13:06:22 +01:00
91d7d0b333 FFmpegMetadataPP; Write temporary file to something.temp.{ext} (fixes #2079)
ffmpeg correctly recognize the formats of extensions like m4a, but it doesn’t works if it’s passed with the `—format` option.
2014-01-03 12:54:19 +01:00
9887c9b2d6 [jpopsuki] Simplify 2014-01-03 12:51:37 +01:00
d2fee313ec Merge remote-tracking branch 'diffycat/jpopsuki' 2014-01-03 12:20:18 +01:00
fa7f58e433 release 2014.01.03 2014-01-03 12:12:17 +01:00
71cd2a571e [dreisat] Make ‘index.php’ optional in the url (fixes #2080) 2014-01-03 12:02:08 +01:00
7c094bfe2f Reveal a little bit more detail about what we cache (#858) 2014-01-03 10:57:31 +01:00
0f30658329 Clarify --cache-dir (#858) 2014-01-02 23:27:47 +01:00
31c1cf5a9d [soundcloud] recognize more players’ urls (fixes #2078) 2014-01-02 16:18:51 +01:00
e63fc1bed4 Added '--xattrs' option which writes metadata to the file's extended attributes using a youtube-dl postprocessor.
Works on Linux, OSX, and Windows.
2014-01-02 07:47:28 -05:00
efa1739b74 [comedycentral] Recognize ‘video-collections’ urls (#2072) 2014-01-01 21:11:35 +01:00
5ffecde73f [mixcloud] Fix track url transformation (fixes #2068)
‘/previews/‘ must be replaced with ‘/c/originals/‘ now.
2014-01-01 21:07:55 +01:00
08d13955dd [wistia] Prefer original video format above all others
We could also set up a formula which would weigh filesize/bitrate and vcodec/acodec (say, 1GB h264 < 3 GB MPEG2 < 2 GB h264), but that would get really messy real soon.
2014-01-01 20:23:49 +01:00
531147dd5e [BlipTVIE] Extract all formats 2014-01-01 19:45:45 +01:00
a17c95f5e4 [README] Bug reporting: Add an item for unrelated questions 2014-01-01 19:18:20 +01:00
eadaf08c16 Merge remote-tracking branch 'origin/master' 2014-01-01 15:30:46 +01:00
4a9c9b6fdb [jpopsuki] Add script encoding definition for python2 2014-01-01 18:27:02 +04:00
b969ab48d9 Add support for jpopsuki.tv 2014-01-01 17:59:54 +04:00
8fa8a6299b [youtube] Add itag 264 (closes #2063)
It has a better bitrate than 137 but the same resolution
2014-01-01 13:45:33 +01:00
b2b0870b3a [dreisat] Update test filename and checksum 2014-01-01 13:30:58 +01:00
4fb757d1e0 Merge pull request #2041 from dstftw/imdb-list
[imdb] Add support for IMDb list (#2033)
2014-01-01 12:45:09 +01:00
241bce7aaf Merge pull request #2061 from rzhxeo/var
Correct variable name in YoutubeDL.list_formats
2014-01-01 03:33:34 -08:00
33ec2ae8d9 Merge remote-tracking branch 'origin/master' 2014-01-01 10:43:58 +01:00
c801b2051a Add an extractor for cmt.com (closes #2049)
It just inherits from MTVIE.
Some videos also come from vevo.com
2013-12-31 17:21:44 +01:00
7976fcac55 [http] Fix ‘err’ variable not being assigned in an except block (#2045) 2013-12-31 13:44:57 +01:00
e9f9a10fba Fix initialization of YoutubeDL with params set to None
Set it to an empty dictionary because it’s directly accessed when setting some properties
2013-12-31 13:34:52 +01:00
1cdfc31e1f Correct variable name in YoutubeDL 2013-12-30 06:50:12 +01:00
19dab5e6cc [GenericIE] Outsource embedded blip.tv player video id extraction to BlipTVIE and fix minor errors in RegEx 2013-12-30 06:15:02 +01:00
c0f9969b9e [BlipTVIE] Fix and simplify extraction of embedded videos 2013-12-30 06:14:10 +01:00
a0ddb8a2fa Add new --print-traffic option 2013-12-29 15:28:32 +01:00
c1d1facd06 [generic] Output something before making network requests 2013-12-27 08:38:42 +01:00
b26559878f release 2013.12.26 2013-12-26 21:56:23 +01:00
fd46a318a2 Print out encoding information in -v (#2046) 2013-12-26 21:55:42 +01:00
5d4f3985be Document that format_id field should be present 2013-12-26 21:19:00 +01:00
360babf799 [theplatform] Use centralized sorting 2013-12-26 21:18:18 +01:00
a1b92edbb3 [channel 9] Use centralized format sorting 2013-12-26 21:14:43 +01:00
12c978739a [internetvideoarchive] Use centralized format sorting 2013-12-26 21:08:52 +01:00
4bc60dafeb [blinkx] Use centralized format sorting 2013-12-26 21:05:30 +01:00
bf5b0a1bfb [ivi] Use centralized format sorting 2013-12-26 18:40:16 +01:00
bfe9de8510 [youporn] Add support for multiple formats 2013-12-26 18:37:12 +01:00
5ecd3c6a09 [bandcamp] Add support for multiple formats 2013-12-26 14:08:57 +01:00
608d11f515 [cnn] Add multiple formats, duration, and upload_date 2013-12-26 13:49:44 +01:00
dst
c7f8537dd9 [lynda] Add support for lynda.com (#1966) 2013-12-26 15:48:24 +07:00
723f839911 Remove unused imports 2013-12-25 15:33:19 +01:00
61224dbcdd [zdf] Make width extraction more robust 2013-12-25 15:33:09 +01:00
c3afc93a69 Merge remote-tracking branch 'origin/master' 2013-12-25 15:24:44 +01:00
7b8af56340 [appletrailers] Use centralized format selection 2013-12-25 15:24:41 +01:00
539179f45b [wistia] Use centralized sorting 2013-12-25 15:20:14 +01:00
7217e148fb [yahoo] Use centralized sorting, and add tbr field 2013-12-25 15:18:40 +01:00
d29b5e812b Merge pull request #2042 from dstftw/master
[smotri] Fix typo
2013-12-25 04:34:05 -08:00
dst
1e923b0d29 [macgamestore] Add extractor (#2043) 2013-12-25 16:07:34 +07:00
dst
f7e9d77f34 [smotri] Fix typo 2013-12-25 09:02:35 +07:00
dst
41cc67c542 [imdb] Add playlist test 2013-12-25 08:40:09 +07:00
dst
c645c7658d [imdb] Extractor for lists (#2033) 2013-12-25 08:34:41 +07:00
b874fe2da8 [mdr] Use centralized format selection 2013-12-24 23:34:11 +01:00
c7deaa4c74 [zdf] Use centralized sorting 2013-12-24 23:32:04 +01:00
e6812ac99d [spiegel] Use centralized sorting 2013-12-24 12:40:23 +01:00
719d3927d7 [mit] Add support for multiple formats 2013-12-24 12:38:08 +01:00
55e663a8d7 [dreisat] Use centralized format sorting 2013-12-24 12:35:08 +01:00
2c62dc26c8 [youtube] Simplify format specification 2013-12-24 12:34:09 +01:00
3d4a70b821 Add more tests for format selection 2013-12-24 12:33:33 +01:00
4bcc7bd1f2 Add temporary _sort_formats helper function 2013-12-24 12:31:42 +01:00
f49d89ee04 Add a resolution field and improve general --list-formats output 2013-12-24 11:56:02 +01:00
dabc127362 Remove dead code 2013-12-23 16:03:06 +01:00
c25c991809 [mplayer] Fix error introduced by downloader separation 2013-12-23 16:00:48 +01:00
f45f96f8f8 [myvideo] Use RTMP instead of RTMPT (Fixes #2032) 2013-12-23 15:57:43 +01:00
1538eff6d8 [bliptv] Remove support for direct downloads
This is now handled by the generic IE
2013-12-23 15:49:21 +01:00
00b2685b9c Merge remote-tracking branch 'origin/master' 2013-12-23 13:52:15 +01:00
8e3e03229e [YoutubeDL] fix tests (Closes #2036) 2013-12-23 13:51:56 +01:00
9d8d675e0e [subtitles-tests] Fix youtube test
It returns now a single info_dict
2013-12-23 10:40:28 +01:00
933605d7e8 YoutubeDL: rename _fd_progress_hooks back to _progress_hooks
In the future it may report more things.
2013-12-23 10:37:27 +01:00
b3d9ef88ec YoutubeDL: only set the ‘formats’ field of the info_dict if it was already set before
It caused a circular reference error, when trying to dump it to json (for example with the test video for myvideo.de or any other video without formats)
2013-12-23 10:23:13 +01:00
8958b6916c release 2013.12.23.4 2013-12-23 05:08:35 +01:00
9fc3bef87a Merge remote-tracking branch 'jaimeMF/split-downloaders' 2013-12-23 05:03:32 +01:00
d80044c235 [youtube] Prefer videos with sound 2013-12-23 04:51:42 +01:00
bc2103f3bf release 2013.12.23.3 2013-12-23 04:39:55 +01:00
f82b18efc1 Merge remote-tracking branch 'rzhxeo/youtube' 2013-12-23 04:37:40 +01:00
504c668d3b release 2013.12.23.2 2013-12-23 04:31:45 +01:00
466617f539 [bliptv] Simplify (From #2000) 2013-12-23 04:31:38 +01:00
196938835a Remove debugging code
Introduced by accident in 5d681e960d
2013-12-23 04:30:57 +01:00
a94e129a65 release 2013.12.23.1 2013-12-23 04:20:25 +01:00
5d681e960d Use bidiv instead of fribidi if available (Fixes #1912) 2013-12-23 04:19:50 +01:00
c7b487d96b release 2013.12.23 2013-12-23 03:45:02 +01:00
7dbf5ae587 [smotri] Add support for moderated (?) videos (Fixes #2030) 2013-12-23 03:44:47 +01:00
8d0bdeba18 [smotri] Make optional attributes optional 2013-12-23 03:38:29 +01:00
1b969041d7 [blinkx] Support mobile URLs (Closes #2022) 2013-12-22 07:43:54 +01:00
e302f9ce32 [youtube:user] Speed up --match-title 2013-12-22 03:57:42 +01:00
5a94982abe Remove unused import 2013-12-22 03:52:12 +01:00
7115ca84aa [vimeo/generic] Add support for embedded SWF vimeo videos 2013-12-22 03:34:13 +01:00
04ff34ab89 Show all matching URLs 2013-12-22 03:25:55 +01:00
bbafbe20c2 [vimeo] Better formatting for regexp 2013-12-22 03:21:28 +01:00
c4d55a33fc [brightcove] Test checksum changed 2013-12-20 17:28:50 +01:00
147e4aece0 [vbox7] New video checksum 2013-12-20 17:27:43 +01:00
bd1488ae64 [mdr] Remove test
For context, refer to the http://de.wikipedia.org/wiki/Depublizieren
2013-12-20 17:24:48 +01:00
79fed2a4df [crunchyroll] Fix test (#1721) 2013-12-20 17:20:39 +01:00
304cbe981e Merge remote-tracking branch 'rzhxeo/crunchyroll' 2013-12-20 17:13:26 +01:00
3fefbf50e3 Merge pull request #2005 from dstftw/ivi.ru
Add support for ivi.ru
2013-12-20 08:12:38 -08:00
f65c1d2be0 release 2013.12.20 2013-12-20 17:08:16 +01:00
aa94a6d315 [aparat] Add support (Fixes #2012) 2013-12-20 17:05:39 +01:00
768df74538 [blinkxx] Add support for youtube videos 2013-12-19 21:02:25 +01:00
1f9da9049b [generic] Support YouTube swf embed (Fixes #2010) 2013-12-19 20:44:30 +01:00
c0d0b01f0e [generic] Detect ooyala videos (fixes #2013) 2013-12-19 20:32:12 +01:00
7c86a5b864 Merge pull request #2011 from dstftw/master
[imdb] Add support for mobile site URLs
2013-12-19 11:28:34 -08:00
dst
97e302a419 [imdb] Add support for mobile site URLs 2013-12-20 00:21:04 +07:00
71507a11c8 [soundcloud] Support mobile URLs (Fixes #2009) 2013-12-19 16:39:01 +01:00
dst
a51e37af62 [ivi] Simplify 2013-12-19 10:53:38 +07:00
1fb8f09273 Merge pull request #2006 from dstftw/master
[smotri] Fix duration field name
2013-12-18 15:40:40 -08:00
dst
6c6db72ed4 [ivi] Skip tests for travis build 2013-12-19 06:19:41 +07:00
dst
0cc83dc54b [smotri] Fix duration field name 2013-12-19 05:56:48 +07:00
dst
5ce54a8205 [ivi] Neat import 2013-12-19 05:53:34 +07:00
dst
8c21b7c647 [ivi] Add playlist tests 2013-12-19 05:39:22 +07:00
dst
77aa6b329d [ivi] Add support for ivi.ru 2013-12-19 05:28:16 +07:00
62d68c43ed Make prefer_free_formats sorting more robust 2013-12-18 21:25:13 +01:00
bfaae0a768 Filter and sort videos before calling list_formats 2013-12-18 21:24:39 +01:00
e56f22ae20 [YoutubeIE] Sort formats by resolution 2013-12-18 21:22:37 +01:00
dbd1988ed9 [YoutubeIE] Add width and height to format dict 2013-12-18 21:21:25 +01:00
4ea3be0a5c [YoutubeIE] Externalize format selection 2013-12-18 03:30:55 +01:00
3e78514568 [generic] Support application/ogg for direct links
Also remove some debugging code.
2013-12-17 16:26:34 +01:00
e029b8bd43 [utils] Remove duplicated line
This line was added by accident in 42393ce234
2013-12-17 16:12:20 +01:00
f5567e401c Merge pull request #1997 from rg3/simplify-url_basename
Simplify url_basename
2013-12-17 07:08:48 -08:00
9b8aaeed85 Simplify url_basename
Use urlparse from the standard library.
2013-12-17 14:56:29 +01:00
6086d121cb release 2013.12.17.2 2013-12-17 12:35:57 +01:00
7de6e075b4 [radiofrance] remove unused imports 2013-12-17 12:35:16 +01:00
946135aa2a [academicearth] remove unused imports 2013-12-17 12:34:30 +01:00
42393ce234 Add support for direct links to a video (#1973) 2013-12-17 12:33:55 +01:00
d6c7a367e8 [utils] Fix url_basename 2013-12-17 12:32:58 +01:00
cecaaf3f58 [generic] Do not use compatibility result fallback 2013-12-17 12:04:33 +01:00
f09828b4e1 release 2013.12.17.1 2013-12-17 04:13:41 +01:00
29eb517403 Add webpage_url_basename info_dict field (Fixes #1938) 2013-12-17 04:13:36 +01:00
44c471c3b8 release 2013.12.17 2013-12-17 02:51:22 +01:00
46374a56b2 [youtube] Do not warn for videos with allow_rating=0
This fixes #1982
Test video: http://www.youtube.com/watch?v=gi2uH3YxohU
2013-12-17 02:49:56 +01:00
ec98946ef9 [academicearth] Support playlists (Closes #1976) 2013-12-17 02:41:34 +01:00
fa77b742ac [radiofrance] Fill in test details 2013-12-16 23:07:57 +01:00
8b4e274610 [rtlnow] Fix URL calculation (Closes #1989) 2013-12-16 22:28:52 +01:00
d6756d3758 [playlist-test] require a string 2013-12-16 22:25:02 +01:00
11b68f6e1b release 2013.12.16.7 2013-12-16 22:18:58 +01:00
88bb52ee18 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-16 22:18:37 +01:00
d90df974c3 [academicearth] Add support for courses (#1976) 2013-12-16 22:18:27 +01:00
5c541b2cb7 [mtv] Add support for urls from the mobile site (fixes #1959) 2013-12-16 22:05:28 +01:00
87a28127d2 _search_regex's "isatty" call fails with Py2exe's
_search_regex calls the sys.stderr.isatty() function for unix systems.

Py2exe uses a custom Stderr() stream which doesn't have an `isatty()`
function, leading to it's crash.

Fixes easily with checking that it's a unix system first.
2013-12-16 21:50:26 +01:00
ebce53b3d8 [vevo] Add suppor for videoplayer. URLs (#1957) 2013-12-16 21:48:38 +01:00
83c632dc43 release 2013.12.16.6 2013-12-16 21:46:16 +01:00
ff07a05575 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-16 21:46:11 +01:00
f25571ffbf Add support for embedded vevo player (Fixes #1957) 2013-12-16 21:45:21 +01:00
f7a6892572 [arte:ddc] Remove test
video seems to expire in 7 days, as arte+7
2013-12-16 21:42:41 +01:00
8fe56478f8 release 2013.12.16.5 2013-12-16 21:34:47 +01:00
0e2a436dce [radiofrance] Add support (Fixes #1942) 2013-12-16 21:34:41 +01:00
24050dd11c release 2013.12.16.4 2013-12-16 21:10:18 +01:00
8c8e3eec79 [facebook] Recognize #! URLs (Fixes #1988) 2013-12-16 21:10:06 +01:00
7ebc9dee69 Merge pull request #1987 from rzhxeo/blip
[GenericIE] Add support for embedded blip.tv
2013-12-16 11:28:34 -08:00
ee3e63e477 [GenericIE] Add support for embedded blip.tv 2013-12-16 20:08:23 +01:00
e9c424c144 Merge pull request #1984 from alimirjamali/patch-1
Incorrect variable is used to check whether thumbnail exists
2013-12-16 09:04:36 -08:00
0a9ce268ba Incorrect variable is used to check whether thumbnail exists
Dear @phihag

I believe in line 848, the correct variable to check is 'thumb_filename' rather than 'infofn'

Kindly advise

Mit freundlichen Gruessen
Ali
2013-12-16 20:14:28 +03:30
4b2da48ea7 release 2013.12.16.3 2013-12-16 14:44:29 +01:00
e64eaaa97d Fix execution under Python 3 2013-12-16 14:44:17 +01:00
780603027f [videopremium] Skip test 2013-12-16 14:42:07 +01:00
00902cd601 release 2013.12.16.2 2013-12-16 14:13:51 +01:00
d67b0b1596 Reorder info_dict documentation 2013-12-16 14:13:40 +01:00
d7dda16888 [blinkx] Add extractor (Fixes #1972) 2013-12-16 13:56:30 +01:00
a19fd00cc4 Simplify --playlist-start / --playlist-end interface 2013-12-16 13:16:20 +01:00
d66152a898 [ndtv] Remove unused imports 2013-12-16 08:16:38 +01:00
8c5f0c9fbc [mdr] Clean up 2013-12-16 08:16:11 +01:00
6888a874a1 release 2013.12.16.1 2013-12-16 05:45:15 +01:00
09dacfa57f [mdr] Simplify 2013-12-16 05:44:34 +01:00
b2ae513586 Merge remote-tracking branch 'mc2avr/master' 2013-12-16 05:14:03 +01:00
e4a0489f6e Merge remote-tracking branch 'dstftw/channel9'
Conflicts:
	youtube_dl/extractor/__init__.py
2013-12-16 05:14:00 +01:00
b83be81d27 Credit @mjorlitzky for pornhd (#1961) 2013-12-16 05:11:19 +01:00
6f5dcd4eee [pornhd] Simplify 2013-12-16 05:10:42 +01:00
1bb2fc98e0 Merge remote-tracking branch 'mjorlitzky/master' 2013-12-16 05:07:58 +01:00
e3946f989e Set process title to youtube-dl
This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.
2013-12-16 05:04:55 +01:00
8863d0de91 release 2013.12.16 2013-12-16 04:45:32 +01:00
7b6fefc9d4 Apply --no-overwrites for --write-* files as well (Fixes #1980) 2013-12-16 04:39:13 +01:00
525ef9227f Add --get-duration (Fixes #859) 2013-12-16 04:15:10 +01:00
c0ba0f4859 Document duration field 2013-12-16 04:09:43 +01:00
b466b7029d [youtube] Make duration an integer or None 2013-12-16 04:09:05 +01:00
fa3ae234e0 [cbs] Add extractor (Fixes #1977) 2013-12-16 03:53:43 +01:00
48462108f3 [theplatform] Fix geographic restriction check 2013-12-16 03:43:45 +01:00
f8b56e95b8 [theplatform] Detect geoblocked content 2013-12-16 03:34:46 +01:00
5fe18bdbde Add --min-views / --max-views (Fixes #1979) 2013-12-16 03:09:49 +01:00
dca02c80bc Fix detection of the extension if the 'extractaudio' is given and improve the error message (#1969)
Using 'foo.mp4' shouldn't raise an error.
If 'foo' is given suggest using 'foo.%(ext)s' for the template
2013-12-15 11:42:38 +01:00
9ee859b683 [daylimotion] Add support for urls from the mobile site (fixes #1953)
It uses the 'touch' subdomain and adds a '#' before 'video'
2013-12-14 14:20:12 +01:00
8e05c870b4 Add support for pornhd.com. 2013-12-13 22:24:32 -05:00
5d574e143f [ign] Update one of test video's title 2013-12-13 17:04:40 +01:00
2a203a6cda Merge pull request #1956 from dstftw/master
Fix typo in month name
2013-12-13 07:41:34 -08:00
dst
dadb8184e4 Fix typo in month name 2013-12-13 22:27:37 +07:00
7a563df90a [daum] Recognize mobile urls (#1952) 2013-12-12 13:05:38 +01:00
24b173fa5c [naver] Recognize mobile urls (fixes #1951) 2013-12-12 13:04:02 +01:00
dst
9b17ba0fa5 [channel9] Fix test description md5 2013-12-12 16:10:17 +07:00
dst
211f555d4c [channel9] Missing import in __init__ 2013-12-12 15:55:31 +07:00
dst
4d2ebb6bd7 [channel9] Cleanup 2013-12-12 15:19:23 +07:00
dst
df53747436 [channel9] Initial implementation (#1885) 2013-12-12 15:13:45 +07:00
3bc2ddccc8 Move FileDownloader to its own module and create a new class for each download process
A suitable downloader can be found using the 'get_suitable_downloader' function.

Each subclass implements 'real_download', for downloading an info dict you call the 'download' method, which first checks if the video has already been downloaded
2013-12-11 16:18:48 +01:00
8ab470f1b2 Now a new FileDownloader is created when downloading a video
The progress hooks can be added using the method "add_downloader_progress_hook"
2013-12-11 16:04:42 +01:00
f2c36ee43e release 2013.12.11.2 2013-12-11 09:22:25 +01:00
00381b4ccb [pornhub] Fix URL regexp 2013-12-11 09:22:08 +01:00
fca1ef19c1 release 2013.12.11.1 2013-12-11 08:54:54 +01:00
357ddadbf5 Fix thumbnail filename determination (Fixes #1945) 2013-12-11 08:54:48 +01:00
08d03235f9 release 2013.12.11 2013-12-11 08:45:51 +01:00
1825836235 Use _download_xml in more extractors 2013-12-10 21:03:53 +01:00
a0088bdf93 [vimeo] Fix unused argument of the _real_extract method 2013-12-10 20:43:16 +01:00
48ad51b243 [vimeo] Fix the extraction for some 'player' or 'pro' videos
The variable the config dict is assigned to can change, now we try to detect it or fallback to a, b or c
2013-12-10 20:28:12 +01:00
5458b4cefb [dailymotion] Fix view count extraction and make it non fatal (fixes #1940) 2013-12-10 19:47:00 +01:00
7c86cd5ab1 [dailymotion] Fix uploader extraction
Now it looks directly in the info dictionary
2013-12-10 19:44:16 +01:00
df1d7da2af add MDRIE 2013-12-10 18:40:50 +01:00
cbfc470228 [mixcloud] Try to get the m4a url if the mp3 url fails to download (fixes #1939) 2013-12-10 13:42:41 +01:00
f67ca84d4a [soundcloud] Fix the extension for 'downloadable' songs
In this case the 'original_format' field must be used.
2013-12-10 13:04:21 +01:00
e2b38da931 [mtv] Fixup incorrectly encoded XML documents 2013-12-10 12:45:22 +01:00
a30a60d8eb release 2013.12.10 2013-12-10 11:54:59 +01:00
5a3ea17c94 [zdf] Correct order of unknown formats (#1936) 2013-12-10 11:52:10 +01:00
475700acfe [soundcloud] Do not mistake original_format for ext (Fixes #1934) 2013-12-10 11:45:13 +01:00
45598aab08 [YoutubeDL] Simplify filename preparation 2013-12-10 11:23:35 +01:00
26e6393134 Set 'NA' as the default value for missing fields in the output template (fixes #1931)
Remove the `except KeyError` clause, it won't get raised anymore
2013-12-09 22:00:42 +01:00
49929a20a7 release 2013.12.09.4 2013-12-09 20:05:27 +01:00
f8bd0194a7 Remove superfluous spaces 2013-12-09 20:05:10 +01:00
77526143e7 [brightcove] Use the original url (usually the player) as the default referer (fixes #1929) 2013-12-09 20:01:43 +01:00
4ff50ef846 [soundcloud] Do not match sets (Fixes #1930) 2013-12-09 19:57:00 +01:00
caefb1de87 [ndtv] Add extractor (Fixes #1924) 2013-12-09 19:44:33 +01:00
1e1f84dac9 release 2013.12.09.3 2013-12-09 18:56:17 +01:00
1d87e3a1c6 [rtlnow] Allow double slashes after domain name (Fixes #1928) 2013-12-09 18:56:05 +01:00
df8ae1e3a2 release 2013.12.09.2 2013-12-09 18:31:31 +01:00
f7d8d4a116 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-09 18:29:12 +01:00
1c088fa89d Improve --bidi-workaround support 2013-12-09 18:29:07 +01:00
de2dd4c502 [soundcloud] add support for private links (fixes #1927) 2013-12-09 17:08:58 +01:00
395293a889 [--load-info] Always read file as UTF-8
This allows editing the file (and not escaping non-ASCII characters) and reloading it in.
2013-12-09 04:59:51 +01:00
db4da14027 Merge remote-tracking branch 'jaimeMF/load-info' 2013-12-09 04:55:02 +01:00
2101830c0d Remove unused imports 2013-12-09 04:53:23 +01:00
977887469c Lower number of expected entries in top list 2013-12-09 04:50:48 +01:00
ffa8f0df0a Merge remote-tracking branch 'jaimeMF/yt-toplists' 2013-12-09 04:49:32 +01:00
693b8b2d31 Merge remote-tracking branch 'dstftw/smotri.com-broadcast'
Conflicts:
	youtube_dl/FileDownloader.py
	youtube_dl/extractor/smotri.py
2013-12-09 04:42:35 +01:00
a0d96c9843 Add filename to --dump-json output (Fixes #1908) 2013-12-09 04:31:18 +01:00
2a18bc9a4b Add some bug reporting hints 2013-12-09 04:20:14 +01:00
eaa1a7bde3 release 2013.12.09.1 2013-12-09 04:09:06 +01:00
0783b09b92 Add a workaround for terminals without bidi support (Fixes #1912) 2013-12-09 04:08:51 +01:00
ffe62508e4 release 2013.12.09 2013-12-09 03:03:01 +01:00
ac79fa02b8 Restore Python 2.6.<6 compatibility (Fixes #1860) 2013-12-09 03:02:54 +01:00
7cc3570e53 Add fatal=False parameter to _download_* functions.
This allows us to simplify the calls in the youtube extractor even further.
2013-12-09 01:49:03 +01:00
baa7b1978b Remove the calls to 'compat_urllib_request.urlopen' in a few extractors 2013-12-08 22:24:55 +01:00
ac5118bcb9 [arte.tv:ddc] Add fields to the test and skip download (rtmp) 2013-12-08 16:35:29 +01:00
5adb818947 Merge remote-tracking branch 'spjoe/master' (closes PR #1921) 2013-12-08 16:33:34 +01:00
52defb0c9b made ddc.arte.tv test working 2013-12-08 16:22:31 +01:00
56a8ab7d60 added arte.tv extractor support for subdomain ddc - Mit offenen Karten(german) Le Dessous des Cartes(france) 2013-12-08 14:43:15 +01:00
22686b91f0 release 2013.12.08.1 2013-12-08 07:32:25 +01:00
31812a9e0e [youtube:channel] Fix automated channel detection 2013-12-08 07:30:42 +01:00
11bf848191 [wimp] simplify 2013-12-08 07:22:19 +01:00
d4df5ed14c release 2013.12.08 2013-12-08 06:54:52 +01:00
303b479e0a Automatically load SSL certs on Windows 2013-12-08 06:54:39 +01:00
4c52160646 [FileDownloader] Fix progress report on Windows (Fixes #1918) 2013-12-08 06:53:46 +01:00
a213880aaf Simplify status reporting (#1918) 2013-12-08 05:49:35 +01:00
42d3bf844a Merge pull request #1919 from rzhxeo/xhamster
[XHamsterIE] Fix HD video detection
2013-12-07 14:35:17 -08:00
b860967ce4 [XHamsterIE] Fix md5 in second test 2013-12-07 22:17:13 +01:00
8ca6b8fba1 [XHamsterIE] Fix HD video detection 2013-12-07 21:39:32 +01:00
c4d9e6731a [pyvideo] add support for videos that don't come from Youtube 2013-12-07 11:19:59 +01:00
0d9ec5d963 [pyvideo] Cleanup and fix test 2013-12-07 11:00:56 +01:00
870fc4e578 Merge remote-tracking branch 'gekitsuu/master' (closes PR #1913) 2013-12-07 10:50:06 +01:00
f623530d6e removing bad VALID_URL 2013-12-06 21:12:10 -08:00
ca9e02dc00 Adding pyvideo support 2013-12-06 21:11:01 -08:00
fb30ec22fd [vimeo] Add an extractor for groups 2013-12-06 22:01:41 +01:00
5cc14c2fd7 [vimeo] Add an extractor for albums (closes #1911) 2013-12-06 21:48:44 +01:00
d349cd2240 [imdb] Fix extraction
The paths to each format's page may have leading whitespace.
The height and the duration can't be extracted.
2013-12-06 20:26:55 +01:00
0b6a9f639f [vevo] Update test video's duration 2013-12-06 20:14:29 +01:00
715c8e7bdb [youtube:playlist] Recognize mix ids for direct use (fixes #1295) 2013-12-06 19:52:41 +01:00
7d4afc557f [youtube:playlist] Support mix ids longer than 13 (#1295) 2013-12-06 19:48:54 +01:00
563e405411 [dailymotion] Fix view count regex
In some languages they can be in the format '123,456' instead of '123.456'
2013-12-06 13:41:07 +01:00
f53c966a73 [dailymotion] Extract view count (#1895) 2013-12-06 13:36:36 +01:00
336c3a69bd [youtube] Extract like and dislike count (#1895) 2013-12-06 13:22:27 +01:00
4e76179476 [vimeo] Extract views count, likes count and comments count (#1895) 2013-12-06 13:03:08 +01:00
ef4fd84857 [wistia] Add extractor 2013-12-06 09:15:04 +01:00
72135030d1 Merge remote-tracking branch 'origin/master' 2013-12-05 22:30:04 +01:00
3514813d5b [francetv] Add support for urls in the format http://www.france3.fr/emissions/{program}/diffusions/{date} (fixes #1898) 2013-12-05 21:49:30 +01:00
9e60602084 [francetv] Add support for more channels: 3, 4, 5 and Ô (#1898)
Rename the France2IE extractor to FranceTVIE
2013-12-05 21:48:41 +01:00
19e3dfc9f8 [9gag] Like/dislike count (#1895) 2013-12-05 18:29:07 +01:00
a1ef7e85d6 Remove unused imports 2013-12-05 14:31:54 +01:00
ef2fac6f4a Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-05 14:29:14 +01:00
7fc3fa0545 [9gag] Add extractor 2013-12-05 14:29:08 +01:00
673d1273ff [vevo] Support '/watch/{id}' urls 2013-12-05 12:41:58 +01:00
b9a2c53833 [metacafe] Add support for cbs videos (fixes #1838)
They use theplatform.com
2013-12-04 23:43:50 +01:00
e9bf7479d2 Add an extractor for theplatform.com 2013-12-04 23:41:22 +01:00
bfb9f7bc4c [hotnewhiphop] Update test's title 2013-12-04 20:36:26 +01:00
6a656a843a Update description value for the write_info_json test (required after 27dcce1904) 2013-12-04 20:35:00 +01:00
29030c0a4c Merge remote-tracking branch 'dstftw/correct-valid-urls' 2013-12-04 19:56:05 +01:00
dst
c0ade33e16 Correct some extractor _VALID_URL regexes 2013-12-04 20:34:47 +07:00
671c0f151d release 2013.12.04 2013-12-04 14:19:07 +01:00
27dcce1904 [youtube] Resolve URLs in comments 2013-12-04 14:18:49 +01:00
dst
8aff7b9bc4 [smotri] Fix broadcast ticket regex 2013-12-04 12:36:12 +07:00
dst
55f6597c67 [smotri] Add an extractor for live rtmp broadcasts 2013-12-04 08:41:09 +07:00
d494389821 Option '--load-info': if the download fails, try extracting the info with the 'webpage_url' field of the info dict
The video url may have expired.
2013-12-03 20:16:52 +01:00
1dcc4c0cad Add --load-info option (#972)
It just calls the 'YoutubeDL.process_ie_result' with the dictionary from the json file
2013-12-03 20:15:20 +01:00
84db81815a Move common code for extractors based in MTV services to a new base class
Removes the duplication of the thumbnail extraction code (only MTVIE needs to override it)
2013-12-03 14:58:24 +01:00
fb7abb31af Remove the compatibility code used before the new format system was implemented 2013-12-03 14:31:20 +01:00
ce93879a9b [daum] Fix real video ID extraction 2013-12-03 14:16:58 +01:00
938384c587 [redtube] Fix search for title 2013-12-03 14:08:16 +01:00
e9d8e302aa [xhamster] Change test checksum 2013-12-03 14:06:16 +01:00
cb7fb54600 Change the ie_name of YoutubeSearchDateIE
It produced a duplicate entry when listing the extractors with '--list-extractors' and generates noise in the commit log when generating the supported sites webpage (like in 09f355f73b)
2013-12-03 13:55:25 +01:00
cf6758d204 Document disabling proxy (#1882) 2013-12-03 13:33:07 +01:00
731e3dde29 release 2013.12.03 2013-12-03 13:13:09 +01:00
a0eaa341e1 [configuration] Undo code breakage 2013-12-03 13:11:20 +01:00
fb27c2295e Correct configuration file locations 2013-12-03 13:09:48 +01:00
1b753cb334 Add Windows configuration file locations (#1881) 2013-12-03 13:04:02 +01:00
36a826a50d Clarify --download-archive help (#1757) 2013-12-03 11:54:52 +01:00
8796857429 Credit @dstftw for smotri IE 2013-12-02 17:43:22 +01:00
aaebed13a8 [smotri] Simplify 2013-12-02 17:08:17 +01:00
25939ffe56 Merge branch 'smotri.com' of https://github.com/dstftw/youtube-dl 2013-12-02 15:56:35 +01:00
dst
5270d8cb13 Added extractors for smotri.com 2013-12-02 20:10:19 +07:00
0037e02921 release 2013.12.02 2013-12-02 13:37:26 +01:00
6ad14cab59 Add --socket-timeout option 2013-12-02 13:37:05 +01:00
a9be0cc736 Merge branch 'master' of github.com:rg3/youtube-dl 2013-12-02 13:36:20 +01:00
55a10eab48 [vimeo] Add an extractor for users (closes #1871) 2013-12-01 22:36:18 +01:00
e344693b65 Make socket timeout configurable, and bump default to 10 minutes (#1862) 2013-12-01 11:42:02 +01:00
355e4fd07e [generic] Find embedded dailymotion videos (Fixes #1848) 2013-12-01 01:21:33 +01:00
5e09d6abbd [clipfish] Skip test on travis 2013-12-01 01:16:20 +01:00
0a688bc0b2 [youtube] Add support for downloading top lists (fixes #1868)
It needs to know the channel and the title of the list, because the ids change every time you browse the channels and are attached to a 'VISITOR_INFO1_LIVE' cookie.
2013-11-30 14:56:51 +01:00
b138de72f2 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-30 00:42:56 +01:00
06dcbb71d8 Clarify help of --write-pages (#1853) 2013-11-30 00:42:43 +01:00
c5171c454b [yahoo] Force use of the http protocol for downloading the videos. 2013-11-29 22:06:17 +01:00
323ec6ae56 Clarify --download-archive help 2013-11-29 15:57:43 +01:00
befd88b786 [yahoo] Add an extractor for yahoo news (closes #1849) 2013-11-29 15:25:43 +01:00
a3fb4675fb Do not mutate default arguments
In this case, it looks rather harmless (since the conditions for --restrict-filenames should not change while a process is running), but just to be sure.
This also simplifies the interface for callers, who can just pass in the idiomatic None for "I don't care, whatever is the default".
2013-11-29 15:25:11 +01:00
5f077efcb1 Merge pull request #1850 from nikai3d/master
fix typo in help
2013-11-29 01:48:14 -08:00
9986238ba9 fix typo in help 2013-11-29 09:48:38 +01:00
e1f900d6a4 fix typo in README.md 2013-11-29 09:44:05 +01:00
acf37ca151 [imdb] Fix the resolution values (fixes #1847)
We were using the size of the player, it was the same for all the formats
2013-11-29 07:56:14 +01:00
17769d5a6c release 2013.11.29 2013-11-29 03:34:26 +01:00
677c18092d [podomatic] Add extractor 2013-11-29 03:33:25 +01:00
3862402ff3 Add an extractor for Clipsyndicate (closes #1744) 2013-11-28 14:38:10 +01:00
b03d0d064c [imdb] Fix extraction in python 2.6
Using a regular expression because the html cannot be parsed.
2013-11-28 13:49:00 +01:00
d8d6148628 Add an extractor for Internet Movie Database trailers (closes #1832) 2013-11-28 13:32:49 +01:00
2be54167d0 release 2013.11.28.1 2013-11-28 06:17:56 +01:00
4e0084d92e [youtube/subtitles] Change MD5 of vtt subtitle in test 2013-11-28 06:14:17 +01:00
fc9e1cc697 [clipfish] Use FIFA trailer as testcase (#1842) 2013-11-28 06:10:37 +01:00
f8f60d2793 [clipfish] Fix imports (#1842) 2013-11-28 05:54:46 +01:00
ea07dbb8b1 release 2013.11.28 2013-11-28 05:48:32 +01:00
2a275ab007 [zdf] Use _download_xml 2013-11-28 05:47:50 +01:00
a2e6db365c [zdf] add a pseudo-testcase and fix URL matching 2013-11-28 05:47:20 +01:00
9d93e7da6c Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-28 04:37:02 +01:00
0e44d8381a [youtube:feeds] Use the 'paging' value from the downloaded json information (fixes #1845) 2013-11-28 00:33:27 +01:00
35907e23ec [yahoo] Fix video extraction and use the new format system exclusively 2013-11-27 21:24:55 +01:00
76d1700b28 [youtube:playlist] Fix the extraction of the title for some mixes (#1844)
Like https://www.youtube.com/watch?v=g8jDB5xOiuE&list=RDIh2gxLqR7HM
2013-11-27 20:01:51 +01:00
dcca796ce4 [clipfish] Effect a better error message (#1842) 2013-11-27 18:33:51 +01:00
4b19e38954 [videopremium] support new .me domain 2013-11-27 02:54:51 +01:00
5f09bbff4d [bash-completion] Complete the ':ythistory' keyword 2013-11-27 00:42:59 +01:00
c1f9c59d11 [bash-completion] Complete filenames or directories if the previous option requires it 2013-11-27 00:41:30 +01:00
652cdaa269 [youtube:playlist] Add support for YouTube mixes (fixes #1839) 2013-11-26 21:35:03 +01:00
e26f871228 Use the new '_download_xml' helper in more extractors 2013-11-26 19:17:25 +01:00
6e47b51eef [youtube:playlist] Remove the link with index 0
It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)
2013-11-26 19:09:14 +01:00
4a98cdbf3b YoutubeDL: set the 'params' property before any message/warning/error is sent (fixes #1840)
If it sets the 'restrictfilenames' param, it will first report a warning. It will try to get the logger from the 'params' property, which would be set at that moment to None, raising the error 'AttributeError: 'NoneType' object has no attribute 'get''
2013-11-26 18:54:14 +01:00
c5ed4e8f7e release 2013.11.26 2013-11-26 10:41:35 +01:00
c2e52508cc Include the proxy in the parameters for YoutubeDL (fixes #1831) 2013-11-26 08:03:11 +01:00
d8ec4959c8 Merge pull request #1830 from jaimeMF/download-archive
Use the 'extractor_key' field for the download archive file
2013-11-25 14:14:25 -08:00
d31209a144 Use the 'extractor_key' field for the download archive file
It has the same value as the ie_key.
2013-11-25 22:57:15 +01:00
529a2e2cc3 Fix typo in the documentation of the 'download_archive' param 2013-11-25 22:52:09 +01:00
781a7d0546 release 2013.11.25.3 2013-11-25 22:36:18 +01:00
fb04e40396 [soundcloud] Support for listing of audio-only files 2013-11-25 22:34:56 +01:00
d9b011f201 Fix rtmpdump with non-ASCII filenames on Windows on 2.x
Reported in #1798
2013-11-25 22:31:38 +01:00
b0b9eaa196 Merge pull request #1829 from jaimeMF/ydl-empty-params
Allow to initialize a YoutubeDL object without parameters
2013-11-25 13:19:59 -08:00
8b134b1062 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-25 22:16:07 +01:00
0c75c3fa7a Do not warn about fixed output template if --max-downloads is 1
Fixes #1828
2013-11-25 22:15:33 +01:00
a3927cf7ee Allow to initialize a YoutubeDL object without parameters
Having to pass the 'outtmpl' parameter feels really strange when you just want to extract the info of a video.
2013-11-25 22:03:39 +01:00
1a62c18f65 [bambuser] Skip the download in the test
It doesn't respect the 'Range' header.
2013-11-25 22:03:20 +01:00
2a15e7063b [soundcloud] Prefer HTTP over RTMP (#1798) 2013-11-25 20:30:41 +01:00
d46cc192d7 Reduce socket timeout 2013-11-25 19:11:01 +01:00
bb2bebdbe1 release 2013.11.25.2 2013-11-25 15:47:14 +01:00
5db07df634 Fix --download-archive (Fixes #1826) 2013-11-25 15:46:54 +01:00
ea36cbac5e Merge remote-tracking branch 'rbrito/swap-dimensions' 2013-11-25 06:19:15 +01:00
d0d2b49ab7 [FileDownloader] use moved format_bytes method 2013-11-25 06:17:41 +01:00
31cb6d8fef Merge remote-tracking branch 'rzhxeo/rtmpdump' 2013-11-25 06:16:18 +01:00
daa0dd2973 release 2013.11.25.1 2013-11-25 06:06:39 +01:00
de79c46c8f [viki] Fix subtitle extraction 2013-11-25 06:06:18 +01:00
94ccb6fa2e [viki] Fix subtitles extraction 2013-11-25 05:58:04 +01:00
07e4035879 [viki] Fix uploader extraction 2013-11-25 05:57:55 +01:00
d0efb9ec9a [tests] Remove global_setup function 2013-11-25 03:47:32 +01:00
ac05067d3d release 2013.11.25 2013-11-25 03:37:49 +01:00
113577e155 [generic] Improve detection
Allow download of http://goo.gl/7X5tOk
Fixes #1818
2013-11-25 03:35:53 +01:00
79d09f47c2 Merge branch 'opener-to-ydl' 2013-11-25 03:30:37 +01:00
c059bdd432 Remove quality_name field and improve zdf extractor 2013-11-25 03:28:55 +01:00
02dbf93f0e [zdf/common] Use API in ZDF extractor.
This also comes with a lot of extra format fields
Fixes #1518
2013-11-25 03:13:22 +01:00
1fb2bcbbf7 [viki] Make uploader field optional (#1813) 2013-11-25 02:02:34 +01:00
16e055849e Update the keywords tests for the rename of the old ComedyCentralIE 2013-11-24 22:13:20 +01:00
66cfab4226 [comedycentral] Add support for comedycentral.com videos (closes #1824)
It's a subclass of MTVIE

The extractor for colbertnation.com and thedailyshow.com is called now ComedyCentralShowsIE
2013-11-24 21:18:35 +01:00
6d88bc37a3 [viki] Skip travis test
Also provide a better error message for geoblocked videos.
2013-11-24 15:28:50 +01:00
b7553b2554 [vik] Clarify output 2013-11-24 15:20:16 +01:00
e03db0a077 Merge branch 'master' into opener-to-ydl 2013-11-24 15:18:44 +01:00
a1ee09e815 Document proxy 2013-11-24 15:03:25 +01:00
267ed0c5d3 [collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
2013-11-24 14:59:19 +01:00
f459d17018 [youtube] Add an extractor for downloading the watch history (closes #1821) 2013-11-24 14:33:50 +01:00
dc65dcbb6d [mixcloud] The description field may be missing (fixes #1819) 2013-11-24 11:28:44 +01:00
d214fdb8fe [brightcove] Don't use 'or' with the xml nodes, use the 'value' attribute instead 2013-11-24 11:02:34 +01:00
138df537ff release 2013.11.24.1 2013-11-24 07:51:56 +01:00
0c7c19d6bc [clipfish] Add extractor (Fixes #1760) 2013-11-24 07:51:44 +01:00
eaaafc59c2 release 2013.11.24 2013-11-24 07:30:34 +01:00
382ed50e0e [viki] Add extractor (fixes #1813) 2013-11-24 07:30:05 +01:00
66ec019240 [youtube] do not use variable name twice 2013-11-24 06:54:26 +01:00
bd49928f7a [niconico] Clarify download 2013-11-24 06:53:50 +01:00
23e6d50d73 [bandcamp] Remove unused variable 2013-11-24 06:52:53 +01:00
2e767313e4 [update] fix error 2013-11-24 06:52:21 +01:00
38b2db6a66 Credit @takuya0301 for niconico 2013-11-24 06:39:49 +01:00
13ebea791f [niconico] Simplify and make work with old Python versions
The website requires SSLv3, otherwise it just times out during SSL negotiation.
2013-11-24 06:39:10 +01:00
4c9c57428f Merge remote-tracking branch 'takuya0301/niconico' 2013-11-24 06:09:11 +01:00
8bf9319e9c Simplify logger code(#1811) 2013-11-24 06:08:11 +01:00
4914120727 Merge remote-tracking branch 'iTaybb/master' 2013-11-24 06:07:12 +01:00
36de0a0e1a [brightcove] Set the 'videoPlayer' value to the 'videoId' if it's missing in the parameters (fixes #1815) 2013-11-23 23:27:15 +01:00
e5c146d586 [streamcloud] skip test on travis 2013-11-23 15:57:42 +01:00
52ad14aeb0 Add support for niconico 2013-11-23 18:19:44 +09:00
43afe28588 Log to an external logger (fixes #1810)
Sadly applications using youtube-dl's python sources can't directly
access it's log stream. It's pretty much limited to stdout and stderr
only.

It should log to logging.Logger instance passed to YoutubeDL's params
dictionary.
2013-11-23 10:22:18 +02:00
a87b0615aa release 2013.11.22.2 2013-11-22 23:08:15 +01:00
d7386f6276 [update] Check if version from repository is newer before updating
Closes #1704
2013-11-22 23:05:58 +01:00
081640940e Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 22:46:57 +01:00
7012b23c94 Match --download-archive during playlist processing (Fixes #1745) 2013-11-22 22:46:46 +01:00
d3b30148ed [bambuser:channel] Update test 2013-11-22 21:26:31 +01:00
9f79463803 [howcast] update test's checksum 2013-11-22 21:25:12 +01:00
d35dc6d3b5 [bandcamp] move the album test to the album extractor and return a single track instead of a playlist 2013-11-22 21:19:31 +01:00
50123be421 release 2013.11.22.1 2013-11-22 20:23:55 +01:00
3f8ced5144 Merge remote-tracking branch 'jaimeMF/yt-playlists' 2013-11-22 20:11:54 +01:00
00ea0f11eb Print full title in --get-title output (#1806) 2013-11-22 20:00:35 +01:00
dca0872056 Move the opener to the YoutubeDL object.
This is the first step towards being able to just import youtube_dl and start using it.
Apart from removing global state, this would fix problems like #1805.
2013-11-22 19:57:52 +01:00
0b63aed8df [update] do not assign to unused variables 2013-11-22 19:15:36 +01:00
15c3adbb16 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-22 19:08:33 +01:00
f143a42fe6 [bandcamp] Skip album test 2013-11-22 19:08:25 +01:00
241650c7ff [vimeo] Fix the extraction of vimeo pro and player.vimeo.com videos 2013-11-22 18:20:31 +01:00
bfe7439a20 release 2013.11.22 2013-11-22 17:46:26 +01:00
cffa6aa107 [bandcamp] Support trackinfo-style songs (Fixes #1270) 2013-11-22 17:44:55 +01:00
02e4ebbbad [streamcloud] Add IE (Fixes #1801) 2013-11-22 17:19:22 +01:00
ab009f59ef [toutv] Fix a typo 2013-11-22 17:18:03 +01:00
0980426559 [bandcamp] add support for albums (reported in #1270) 2013-11-22 16:05:14 +01:00
b1c9c66936 Remove unnecessary slash in setup.py (Fixes #1778) 2013-11-21 23:26:28 +01:00
a6a173c2fd utils.shell_quote: Convert the args to unicode strings
The youtube test video failed with `UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 34: ordinal not in range(128)`, the problem was with the filenames being encoded.
2013-11-21 14:09:28 +01:00
2bb683c201 release 2013.11.21 2013-11-21 13:59:33 +01:00
64bb5187f5 [soundcloud] Retrieve the file url using the client_id for the iPhone (fixes #1798)
The desktop's client_id always give the rtmp url, but with the iPhone one it returns the http url if it's available.
2013-11-21 13:16:19 +01:00
9e4f50a8ae [sztv] skip test, site is undergoing mid-term maintenance 2013-11-20 09:59:03 +01:00
0190eecc00 [nhl] Make NHLVideocenter IE_DESC fit with other descriptions 2013-11-20 09:45:29 +01:00
ca872a4c0b [spankwire] Fix description search 2013-11-20 09:23:53 +01:00
f2e87ef4fa [anitube] Skip test (on travis) 2013-11-20 07:46:44 +01:00
0ad97bbc05 [spankwire] fix check for description 2013-11-20 07:45:32 +01:00
c4864091a1 [videopremium] Support new crazy redirect scheme 2013-11-20 07:43:21 +01:00
9a98a466b3 [toutv] really skip test 2013-11-20 07:37:22 +01:00
f99e0f1ed6 Adapt age restriction tests to new .info.json filenames 2013-11-20 07:37:07 +01:00
d323bcb152 release 2013.11.20 2013-11-20 07:25:17 +01:00
da6a795fdb [escapist] Fix title search 2013-11-20 07:23:23 +01:00
c5edcde21f [escapist] upper-case URL 2013-11-20 06:56:59 +01:00
15ff3c831e [escapist] Fix syntax error 2013-11-20 06:55:07 +01:00
100959a6d9 [escapist] Add support for HD format (Closes #1755) 2013-11-20 06:52:08 +01:00
0a120f74b2 Credit @diffycat for anitube 2013-11-20 06:36:00 +01:00
8f05351984 [anitube] Minor fixes (#1776) 2013-11-20 06:35:02 +01:00
4eb92208a3 Adapt test to changed .info.json name 2013-11-20 06:34:48 +01:00
71791f414c Merge remote-tracking branch 'diffycat/master' 2013-11-20 06:28:13 +01:00
f3682997d7 Clean up unused imports and other minor mistakes 2013-11-20 06:27:48 +01:00
cc13cc0251 [teamcoco] Correct error 2013-11-20 06:25:33 +01:00
86bd5f2ca9 Merge remote-tracking branch 'dz0ny/patch-1' 2013-11-20 06:21:05 +01:00
8694c60000 import json for --dump-json 2013-11-20 06:18:24 +01:00
9d1538182f Add an option to dump json information 2013-11-20 06:14:57 +01:00
5904088811 Add support for tou.tv (Fixes #1792) 2013-11-20 06:13:19 +01:00
69545c2aff [d8] inherit from CanalplusIE
it reuses the same extraction process
2013-11-19 20:44:20 +01:00
495da337ae Merge pull request #1758 from migbac/master
Add support for d8.tv
2013-11-19 20:43:14 +01:00
34b3afc7be release 2013.11.19 2013-11-19 12:41:01 +01:00
00373a4c5d Merge pull request #1790 from rg3/console-title
Correctly write and restore the console title on the stack (fixes #1782)
2013-11-18 07:50:10 -08:00
cb7dfeeac4 [youtube] only allow domain name to be upper-case (#1786) 2013-11-18 16:42:35 +01:00
efd6c574a2 Correctly write and restore the console title on the stack (fixes #1782) 2013-11-18 16:35:41 +01:00
4113e6ab56 [auengine] Do not return unnecessary ext 2013-11-18 14:36:01 +01:00
9a942a4671 release 2013.11.18.1 2013-11-18 13:56:53 +01:00
9906d397a0 [auengine] Simplify 2013-11-18 13:56:45 +01:00
ae8f787141 Remove iPhone from user agent. This breaks a lot of extractors
In the future, it might be worth investigating whether we get better content when we claime to be an iPhone.
2013-11-18 13:52:26 +01:00
a81b4d5c8f release 2013.11.18 2013-11-18 13:30:43 +01:00
887c6acdf2 Support multiple embedded YouTube URLs (Fixes #1787) 2013-11-18 13:28:26 +01:00
83aa529330 Support protocol-independent URLs (#1787) 2013-11-18 13:18:17 +01:00
96b31b6533 Add iPhone to UA (#1746) 2013-11-18 13:05:58 +01:00
fccd377198 Suppor embed-only videos (Fixes #1746) 2013-11-18 13:05:18 +01:00
2b35c9ef74 Merge branch 'master' into rtmpdump
Conflicts:
	youtube_dl/FileDownloader.py

Merge
2013-11-18 00:27:06 +01:00
73c566695f release 2013.11.17 2013-11-17 22:14:13 +01:00
63b7b7224a [MTVIE] Try with RTMP URL if download fails
This fixes youtube-dl http://www.southpark.de/clips/155251/cartman-vs-the-dog-whisperer
2013-11-17 22:11:40 +01:00
ce80c8b8ee Merge pull request #1784 from rzhxeo/southpark
Add support for southpark.de
2013-11-17 12:15:13 -08:00
749febf4d1 Allow --console-title when --quiet is given (Fixes #1783) 2013-11-17 21:12:50 +01:00
bdde425cbe Save and restore console title (Fixes #1782) 2013-11-17 21:10:11 +01:00
746f491f82 Add support for southpark.de 2013-11-17 17:54:47 +01:00
1672647ade [SouthParkStudiosIE] Move from _TEST to _TESTS 2013-11-17 17:43:58 +01:00
90b6bbc38c [SouthParkStudiosIE] Also detect urls without http:// or www 2013-11-17 17:42:24 +01:00
ce02ed60f2 Remove * imports 2013-11-17 16:47:52 +01:00
1e5b9a95fd Move console_title to YoutubeDL 2013-11-17 11:39:52 +01:00
1d699755e0 [youtube] Add view_count (Fixes #1781) 2013-11-17 11:06:16 +01:00
ddf49c6344 [arte] remove two typos 2013-11-17 11:05:49 +01:00
ba3881dffd Add support for anitube.se (#1417) 2013-11-16 18:26:34 +04:00
d1c252048b [redtube] Do not test md5, seems to vary 2013-11-16 10:30:09 +01:00
eab2724138 [gamekings] Do not test md5 sum, precise file changes regularly 2013-11-16 02:32:23 +01:00
21ea3e06c9 [gamekings] remove unnecessary import 2013-11-16 02:31:02 +01:00
52d703d3d1 [tvp] Skip tests 2013-11-16 02:09:30 +01:00
ce152341a1 [bambuser] Do not test for MD5, seems to be flaky 2013-11-16 01:59:28 +01:00
f058e34011 [dailymotion] Fix playlists 2013-11-16 01:56:23 +01:00
b5349e8721 Fix indentation of (best) and (worst) in --list-formats 2013-11-16 01:39:45 +01:00
7150858d49 [spiegel] Implement format selection 2013-11-16 01:33:12 +01:00
91c7271aab Add automatic generation of format note based on bitrate and codecs 2013-11-16 01:08:43 +01:00
aa13b2dffd release 2013.11.15.1 2013-11-15 14:35:00 +01:00
fc2ef392be [ted] Fix playlists (Fixes #1770) 2013-11-15 14:33:51 +01:00
463a908705 [ted] simplify 2013-11-15 14:06:38 +01:00
d24ffe1cfa [rtlnow] Remove the test for nitro
The videos expire.
2013-11-15 12:57:59 +01:00
78fb87b283 Don't accept '>' inside the content attribute in OpenGraph regexes 2013-11-15 12:54:13 +01:00
ab2d524780 Improve the OpenGraph regex
* Do not accept '>' between the property and content attributes.
* Recognize the properties if the content attribute is before the property attribute using two regexes (fixes the extraction of the description for SlideshareIE).
2013-11-15 12:24:54 +01:00
85d61685f1 [tvp] Update the title and the description of the test video 2013-11-15 12:10:22 +01:00
b9643eed7c [youtube:channel] Fix the extraction of autogenerated channels
The ajax pages are empty, now it looks directly in the channel's /videos page
2013-11-15 11:51:45 +01:00
feee2ecfa9 Pass the 'download' argument to 'process_video_result' (fixes #1769) 2013-11-15 11:04:26 +01:00
a25a5cfeec release 2013.11.15 2013-11-15 01:47:15 +01:00
0e145dd541 Merge branch 'master' of github.com:rg3/youtube-dl 2013-11-15 01:46:50 +01:00
9f9be844fc [youtube] Fix protocol-independent URLs (Fixes #1768) 2013-11-15 01:45:39 +01:00
e3b9ab5e18 [soundlcoud] Set the correct extension for the tracks (fixes #1766)
Some tracks are not in mp3 format, they can be wav files.
2013-11-14 19:45:39 +01:00
c66d2baa9c [livestream] Add an extractor for the original version of livestream (closes #1764)
The two versions use different systems.
2013-11-14 13:16:32 +01:00
08bc37cdd0 Update test_write_info_json.py 2013-11-13 18:55:49 +01:00
9771cceb2c Fix filename extension leaking to json filename
Makes writeinfojson behaving exactly as writethumbnail in case where filename contains mediafile extension.

Case:

video.mp4 converted to music.mp3 would yield music.mp4.info.json instead music.mp3.info.json or music.info.json
2013-11-13 18:34:03 +01:00
ca715127a2 Don't assume the 'subtitlesformat' is set in the params dict (fixes #1750) 2013-11-13 17:14:10 +01:00
ea7a7af1d4 [gamekings] Fix the test video checksum 2013-11-13 17:13:06 +01:00
880e1c529d [youtube:playlist] Login into youtube if requested (fixes #1757)
Allows to download private playlists
2013-11-13 16:39:11 +01:00
dcbb45803f [youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
2013-11-13 16:26:50 +01:00
80b9bbce86 release 2013.11.13 2013-11-13 11:09:04 +01:00
d37936386f Credit @saper for tvp IE (#1730) 2013-11-13 11:08:07 +01:00
c3a3028f9f [tvp] Minor improvements (#1730) 2013-11-13 11:06:53 +01:00
6c5ad80cdc Merge remote-tracking branch 'saper/tvp' 2013-11-13 11:03:49 +01:00
b5bdc2699a Credit @jelly for gamekings extractor (#1759) 2013-11-13 10:52:22 +01:00
384b98cd8f [gamekings] Minor fixes (#1759) 2013-11-13 10:51:00 +01:00
eb9b5bffef Add extractor for gamekings.tv 2013-11-13 10:38:47 +01:00
0bd59f3723 Add support for d8.tv 2013-11-12 23:32:03 +01:00
8b8cbd8f6d [vine] Fix uploader extraction 2013-11-12 20:50:52 +01:00
72b18c5d34 FFmpegMetadataPP: don't enclose the values with " (fixes #1756) 2013-11-12 20:38:13 +01:00
eb0a839866 [common] Simplify og_search_property 2013-11-12 10:36:23 +01:00
1777d5a952 release 2013.11.11 2013-11-11 18:28:17 +01:00
d4b7da84c3 Clarify -c. Do not pass it in if you don't know what you're doing
Suggested in #1743
2013-11-11 14:21:14 +01:00
801dbbdffd Use avconv for downloading with m3u8 manifests if it's available (fixes #1735) 2013-11-10 16:47:03 +01:00
0ed05a1d2d Use the 'rtmp_live' field for the live parameter of rtmpdump 2013-11-10 12:45:17 +01:00
1008bebade Merge remote-tracking branch 'rzhxeo/rtmpdump_live' 2013-11-10 12:38:40 +01:00
ae84f879d7 Merge all the subtitles test into a single file
They reuse a base class
2013-11-10 12:28:21 +01:00
be6dfd1b49 [ted] Return a single info_dict for talks urls
It failed with the --list-subs option
2013-11-10 12:09:12 +01:00
231516b6c9 Merge pull request #1705 from iemejia/master
[ted] support for subtitles
2013-11-10 11:54:18 +01:00
fb53d58dcf Merge pull request #1726 from saper/escaped
Fix AssertionError when og property not found
2013-11-10 02:51:52 -08:00
2a9e9b210b Fix the documentation of '--autonumber-size' (#1743)
it's '--auto-number' not '--autonumber'
2013-11-09 19:21:30 +01:00
897d6cc43a Improve format listing for long format ids
Now arte.tv videos have quite long ids.
2013-11-09 19:07:34 +01:00
f470c6c812 [arte] Improve the format sorting
Also use the bitrate.
Prefer normal version and sourds/mal version over original version with subtitles.
2013-11-09 19:05:19 +01:00
566d4e0425 [arte] Make sure the format_id is unique (closes #1739)
Include the bitrate and use the height instead of the quality field.
2013-11-09 19:01:23 +01:00
81be02d2f9 [cnn] Accept www.cnn.com urls (fixes #1740) 2013-11-09 18:16:32 +01:00
c2b6a482d5 [brightcove] the format function requires to specify the index in python2.6 2013-11-09 18:10:11 +01:00
12c167c881 [soundcloud] Allow to download tracks marked as not 'streamable'
They use the rtmp protocol but if the are marked as 'downloadable' it can use the direct download link.
2013-11-09 18:08:03 +01:00
20aafee7fa [kankan] Fix the video url
It now requires two additional parameters, one is a timestamp we get from the getCdnresource_flv page and the other is a key we have to build.
2013-11-09 16:51:11 +01:00
be07375b66 Don't recode the video with m3u8 downloads (fixes #1741) 2013-11-09 16:40:00 +01:00
c8434e8316 Add support for crunchyroll.com 2013-11-09 11:25:12 +01:00
4894fe8c5b Report download progress of rtmpdump 2013-11-09 11:14:40 +01:00
dd5bcdc4c9 [brightcove] Set the 'Referer' header if the url has the 'linkBaseUrl' parameter (fixes #1553) 2013-11-07 21:06:48 +01:00
6161d17579 release 2013.11.07 2013-11-07 11:06:34 +01:00
4ac5306ae7 Fix the report progress when file_size is unknown (#1731)
The report_progress function will accept eta and percent with None value and will set the message to 'Unknow ETA' or 'Unknown %'.
Otherwise the values must be numbers.
2013-11-07 08:03:35 +01:00
b1a80ec1a9 [xnxx] Accept urls that start with 'www' (fixes #1734) 2013-11-06 23:45:01 +01:00
672fe94dcb release 2013.11.06.1 2013-11-06 22:11:46 +01:00
51040b72ed [brightcove] Support redirected urls from bcove.me (fixes #1732)
'bctid' needs to be changed to '@videoPlayer', and 'bckey' to 'playerKey'.
2013-11-06 22:03:00 +01:00
4f045eef8f [youtube:channel] Fix the extraction
The page don't include the 'load more' button anymore, now we directly get the 'c4_browse_ajax' pages.
2013-11-06 21:42:33 +01:00
5d7b253ea0 Add an extractor for eitb.tv (fixes #1608)
The BrighcoveExperience object doesn't contain the video id, the extractor adds it and passes the url to BrightcoveIE.
2013-11-06 20:06:14 +01:00
b0759f0c19 [brightcove] Extract all the available formats 2013-11-06 19:05:41 +01:00
065472936a Add an extractor for space.com (fixes #1718)
It uses Brightcove, but requires some special process for getting a url with the playerKey field in some videos
2013-11-06 17:37:39 +01:00
fc4a0c2aec [brightcove] Change the 'videoId' or 'videoID' field to '@videoPlayer' (fixes #1697)
It seems to be needed when using the htmlFederated page
2013-11-06 17:31:47 +01:00
eeb165e674 [brightcove] Add the extraction of the url from generic 2013-11-06 16:58:03 +01:00
9ee2b5f6f2 tests: don't run the test if any of the extractors listed in the 'add_ie' field is marked as not working 2013-11-06 16:43:26 +01:00
da54be877a release 2013.11.06 2013-11-06 14:02:52 +01:00
50a886b7ab Fix reporting when file size is unkown (Fixes #1731) 2013-11-06 14:02:33 +01:00
76e67c2cb6 Clean up imports 2013-11-06 14:01:43 +01:00
5137ebac0b [tvp] Telewizja Polska: new extractor for tvp.pl, fixes #1719
Thanks-To: mplonski

https://github.com/mplonski/linux/blob/master/tvp-dl.py
2013-11-05 23:47:40 +01:00
a8eeb0597b Fix AssertionError when og property not found
On tvp.pl some webpages contain OpenGraph
metadata and some don't.

If og property is not found, _og_search_description
fails with

WARNING: unable to extract OpenGraph description; please report this issue on http://yt-dl.org/bug
Traceback (most recent call last):
  File "/usr/home/saper/bin/youtube-dl", line 18, in <module>
    youtube_dl.main()
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 766, in main
    _real_main(argv)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/__init__.py", line 719, in _real_main
    retcode = ydl.download(all_urls)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 715, in download
    videos = self.extract_info(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/YoutubeDL.py", line 348, in extract_info
    ie_result = ie.extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 125, in extract
    return self._real_extract(url)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/tvp.py", line 56, in _real_extract
    info['description'] = self._og_search_description(webpage)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 331, in _og_search_description
    return self._og_search_property('description', html, fatal=False, **kargs)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/extractor/common.py", line 325, in _og_search_property
    return unescapeHTML(escaped)
  File "/usr/home/saper/sw/youtube-dl/youtube_dl/utils.py", line 494, in unescapeHTML
    assert type(s) == type(u'')
AssertionError

The patch allows me to use:

  try:
    info['description'] = self._og_search_description(webpage)
    info['thumbnail'] = self._og_search_thumbnail(webpage)
  except RegexNotFoundError:
    pass
2013-11-05 23:19:29 +01:00
4ed3e51080 [ted] fixed error in case of no subtitles present
I created a test, but I leave it commented since TED videos get
new subtitles frequently.
2013-11-05 12:00:13 +01:00
7f34001d57 Merge pull request #1724 from rzhxeo/generic_youtube
[GenericIE] Also detect youtube if src url of iframe is embedded in ' instead of "
2013-11-04 23:00:46 -08:00
2dcf7d8f99 [GenericIE] Also detect youtube if src url of iframe is embedded in ' instaed of " 2013-11-05 02:08:02 +01:00
19b0668251 [canal2c] Accept more urls (fixes #1723)
The url only needs to have the 'idVideo' field in the query, in any position.
We have to set the 'void=oui' in the webpage url, so that we get the file name.
2013-11-04 22:26:19 +01:00
e7e6b54d8a [teamcoco] Parse the xml file and extract all the formats 2013-11-03 17:48:12 +01:00
2a1a8ffe41 Merge pull request #1693 from alexvh/teamcoco_fix
[teamcoco] Fix video url extraction for some videos
2013-11-03 17:19:51 +01:00
08fb86c49b [youtube] Add description for YoutubeSearchDateIE (#1710) 2013-11-03 15:59:10 +01:00
3633d77c0f Merge remote-tracking branch 'CBGoodBuddy/ytsearchtime' 2013-11-03 15:56:55 +01:00
165e179764 release 2013.11.03 2013-11-03 15:50:36 +01:00
12ebdd1506 [viddler] Support non-digit IDs (Fixes #1714) 2013-11-03 15:49:59 +01:00
1baf9a5938 Merge pull request #1698 from rzhxeo/cinemassacre
[CinemassacreIE] Support more embed urls
2013-11-03 05:17:12 -08:00
a56f9de156 Style fixes for extractors: remove spaces around (,),{ and } 2013-11-03 14:06:47 +01:00
fa5d47af4b Merge pull request #1679 from rzhxeo/mofosex
Add support for http://www.mofosex.com
2013-11-03 05:04:14 -08:00
d607038753 Merge pull request #1677 from rzhxeo/xtube
Add support for http://www.xtube.com
2013-11-03 03:28:02 -08:00
9ac6a01aaf Merge pull request #1676 from rzhxeo/extremetube
Add support for http://www.extremetube.com
2013-11-03 03:25:46 -08:00
be97abc247 Set the 'extractor_key' field in the info_dict
It's the string returned by the class method 'ie_key', which allows to retrieve the extractor with 'get_info_extractor'
2013-11-03 12:14:44 +01:00
9103bbc5cd Add the 'webpage_url' field to info_dict
The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
2013-11-03 12:11:13 +01:00
b6c45014ae Set the extra_info inside YoutubeDL.process_ie_result and set only if the keys are missing 2013-11-03 11:57:04 +01:00
a3dd924871 Add YoutubeSearchDateIE extractor to youtube.py & __init__.py, which searches by publication date. 2013-11-02 22:40:48 -04:00
137bbb3e37 [XTubeIE] Add description to TEST 2013-11-02 22:45:48 +01:00
86ad94bb2e [ExtremeTubeIE] Set age_limit to 18 and fix uploader extraction 2013-11-02 22:33:49 +01:00
3e56add7c9 Merge pull request #1678 from rzhxeo/keezmovies
[KeezMoviesIE] Detect URLs with numbers in the SEO part correct
2013-11-02 14:15:52 -07:00
f52f01b5d2 [brightcove] Don't set the extension
If the video only has the 'FLVFullLengthURL' key, it can still be an mp4 file.
2013-11-02 21:20:46 +01:00
98d7efb537 [exfm] skip tests
The site is down too often.
2013-11-02 20:51:09 +01:00
cf51923545 [youtube] Remove vevo test
The video is no longer available and it seems that vevo video don't use encrypted signatures anymore.
2013-11-02 20:46:26 +01:00
38fcd4597a Merge remote-tracking branch 'iemejia/master' 2013-11-02 19:56:06 +01:00
165e3bb67a [bambuser] Add an extractor for channels (closes #1702) 2013-11-02 19:50:57 +01:00
38db46794f Merge branch 'ted_subtitles' 2013-11-02 19:50:45 +01:00
a9a3876d55 [ted] Added support for subtitle download 2013-11-02 19:48:39 +01:00
1f343eaabb [subtitles] refactor to support websites with subtitle information the
webpage.

I added the parameter webpage, so now it's similar to the way automatic
captions are handled. This is an improvement needed for websites like
TED.
2013-11-02 19:29:25 +01:00
72a5b4f702 Add an extractor for bambuser.com (#1702) 2013-11-02 19:01:01 +01:00
0a43ddf320 [CinemassacreIE] Add live paramter to extracted info as a workaround 2013-11-02 18:08:35 +01:00
31366066bd Add support for live parameter to rtmpdump 2013-11-02 18:08:16 +01:00
aa2484e390 release 2013.11.02 2013-11-02 11:21:36 +01:00
8eddf3e91d [youtube] Encode subtitle track name in request (Fixes #1700) 2013-11-02 11:21:05 +01:00
60d142aa8d Add an extractor for vk.com (closes #1635) 2013-11-01 22:34:18 +01:00
66cf3ac342 [metacafe] Fix support for age-restricted videos (fixes #1696)
The 'Content-Type' header must be set for disabling the family filter.
The 'flashversion' cookie  is only needed for AnyClip videos.
Added tests for standard metacafe videos and for age-restricted videos.
Also set the 'age_limit' field.
2013-11-01 11:56:15 +01:00
ab4e151347 [CinemassacreIE] Support more embed urls 2013-11-01 01:24:23 +01:00
ac2547f5ff [teamcoco] Fix video url extraction for some videos
Video url extraction failed for some videos,
e.g. http://teamcoco.com/video/old-time-baseball

The url extracted was also occasionally suboptimal quality,
e.g. http://teamcoco.com/video/louis-ck-interview-george-w-bush
2013-10-31 15:41:14 -04:00
5f1ea943ab [livestream] fix the extraction of events
It now uses a json dictionary from the webpage.
2013-10-31 08:07:26 +01:00
0ef7ad5cd4 Fix the test for dailymotion subtitles
The extractor returns a single info_dict now.
2013-10-31 07:55:03 +01:00
9f1109a564 [dailymotion] Fix support for age-restricted videos (Fixes #1688) 2013-10-31 00:20:49 +01:00
33b1d9595d release 2013.10.30 2013-10-30 01:17:20 +01:00
7193498811 Use index in formt string (Fixes vevo test on Python 2.6) 2013-10-30 01:17:00 +01:00
72321ead7b [vevo] Readd support for SMIL (Fixes #1683) 2013-10-30 01:14:17 +01:00
b5d0d817bc Remove superfluous space 2013-10-30 01:09:44 +01:00
94badb2599 Fix output indenting for --list-formats 2013-10-30 01:09:26 +01:00
b9a836515f Update the Vimeo test vector md5
confirmed that this is indeed the first 10241 (we went off by one with
byte range 0-10240) of the full, playing mp4, so they probably
reencoded or something
2013-10-29 16:44:35 -04:00
21c924f406 [arte] Download the 'Originalversion' version if it's the only one available (fixes #1682) 2013-10-29 20:58:49 +01:00
e54fd4b23b [vevo] Add more format details 2013-10-29 15:10:09 +01:00
57dd9a8f2f Nicer --list-formats output 2013-10-29 15:09:45 +01:00
912cbf5d4e [vevo] Fix timestamp handling
( / 1000 is implicit float division )
2013-10-29 14:00:23 +01:00
43d7895ea0 release 2013.10.29 2013-10-29 06:48:39 +01:00
f7ff55aa78 Merge remote-tracking branch 'origin/master' 2013-10-29 06:48:18 +01:00
795f28f871 [youtube] Fix login (Fixes #1681) 2013-10-29 06:45:54 +01:00
f6cc16f5d8 [tests] a HTTP 503 is a transient issue 2013-10-28 19:07:16 -04:00
321a01f971 [mtv] Remove the templates from the mediagen url 2013-10-28 23:37:01 +01:00
646e17a53d Fix YouTubeDL test 2013-10-28 23:18:13 +01:00
dd508b7c4f [tests] don't fail on network errors
This is suboptimal, but at least this way we will need to look at the logs
only to check for network errors that happen too often, instead of
parsing a ton of lines each time to see if there is some true test failing
2013-10-28 18:03:26 -04:00
2563bcc85c Add an extractor for MySpace (closes #1666) 2013-10-28 22:02:17 +01:00
702665c085 tests: build the filename from the info_dict if the 'file' key is missing
It will need to have the 'id' and 'ext' keys to work.
2013-10-28 22:01:37 +01:00
dcc2a706ef Add support for http://www.xtube.com 2013-10-28 19:23:48 +01:00
2bc67c35ac [KeezMoviesIE] Detect URLs with numbers in the SEO part correct 2013-10-28 18:22:55 +01:00
77ae65877e Add support for http://www.mofosex.com 2013-10-28 18:18:58 +01:00
32a35e4418 Add support for http://www.extremetube.com 2013-10-28 17:35:01 +01:00
369a759acc setup.py: Make sure the setuptools_available variable is set
Otherwise it would crash if it can't import setuptools.
2013-10-28 16:54:48 +01:00
79b3f61228 Merge pull request #1675 from rzhxeo/fix
Check if description and thumbnail are None to prevent crash
2013-10-28 08:35:40 -07:00
216d71d001 Check if description and thumbnail are None to prevent crash 2013-10-28 16:28:35 +01:00
78a3a9f89e Make "requested format not available" expected (#1655) 2013-10-28 11:41:59 +01:00
a7685f3bf4 mixcloud does not do any format selection 2013-10-28 11:41:32 +01:00
f088ea5486 release 2013.10.28 2013-10-28 11:34:21 +01:00
1003d108d5 [vimeo] Support hash in URL (Fixes #1669) 2013-10-28 11:32:22 +01:00
8abeeb9449 Nicer --list-formats output 2013-10-28 11:31:12 +01:00
c1002e96e9 Let extractors omit ext in formats 2013-10-28 11:28:02 +01:00
77d0a82fef [addanime] Use new formats system 2013-10-28 11:24:47 +01:00
ebc14f251c Merge remote-tracking branch 'origin/master' 2013-10-28 10:44:13 +01:00
d41e6efc85 New debug option --write-pages 2013-10-28 10:44:02 +01:00
8ffa13e03e [Instagram] get the non-https link, as they are serving Akamai cert from a instagram.com domain 2013-10-28 02:34:29 -04:00
db477d3a37 Merge pull request #1620 from jaimeMF/console_script
Use the console_scripts entry point if setuptools is available
2013-10-27 23:08:59 -07:00
750e9833b8 Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test 2013-10-28 01:50:17 -04:00
82f0ac657c Merge pull request #1657 by @rzhxeo
[YouPornIE] Extract all encrypted links and remove doubles at the end
2013-10-28 01:45:52 -04:00
eb6a2277a2 Merge pull request #1659 by @rzhxeo
Add support for http://www.tube8.com
2013-10-28 01:38:28 -04:00
f8778fb0fa Merge pull request #1663 by @rzhxeo
Add support for http://www.spankwire.com
2013-10-28 01:35:11 -04:00
e2f9de207c Merge pull request #1664 by @rzhxeo
Add support for http://www.keezmovies.com
2013-10-28 01:25:46 -04:00
a93cc0d943 Merge pull request #1661 by @rzhxeo
Add support for http://www.pornhub.com
2013-10-28 00:50:39 -04:00
7d8c2e07f2 [Exfm] replace the failing Soundcloud test vector (broken also in browser) 2013-10-28 00:33:43 -04:00
efb4c36b18 Merge pull request #1660 from pyed/master
[addanime] try to download HQ before normal
2013-10-27 21:14:19 -07:00
29526d0d2b Merge pull request #1656 from rzhxeo/xhamster
[XHamsterIE] Extract SD and HD video
2013-10-27 10:12:59 -07:00
198e370f23 [addanime] better regex. 2013-10-27 19:48:02 +03:00
c19f7764a5 [generic] Detect bandcamp pages that use custom domains (closes #1662)
They embed the original url in the 'og:url' property.
2013-10-27 14:40:25 +01:00
bc63d9d329 [rtlnow] Change the test for rtlnitronow 2013-10-27 14:26:19 +01:00
aa929c37d5 [generic] Fix test video's checksum 2013-10-27 14:21:37 +01:00
af4d506eb3 [faz] Use a regex for getting the description
The page cannot be parsed in python2.6 with the html parser.
2013-10-27 14:18:55 +01:00
5da0549581 [KeezMoviesIE] Correct return value for embedded videos 2013-10-27 12:48:09 +01:00
749a4fd2fd [facebook] Don't recommend to report the issue if the video is private. 2013-10-27 12:13:55 +01:00
6f71ef580c [facebook] Report a more meaningful message if the video cannot be accessed (closes #1658) 2013-10-27 12:09:46 +01:00
67874aeffa [facebook] Fix the login process (fixes #1244) 2013-10-27 12:07:58 +01:00
3e6a330d38 [addanime] fix md5sum 2013-10-27 13:51:26 +03:00
aee5e18c8f [addanime] catch 'RegexNotFoundError' 2013-10-27 13:36:43 +03:00
5b11143d05 Add support for http://www.keezmovies.com 2013-10-27 10:10:28 +01:00
7b2212e954 Add support for http://www.spankwire.com 2013-10-27 01:59:26 +02:00
71865091ab [Tube8IE] Fix regex for uploader extraction 2013-10-27 01:08:03 +02:00
125cfd78e8 Add support for http://www.pornhub.com 2013-10-27 01:04:22 +02:00
8cb57d9b91 [Tube8IE] Escape dot in regex 2013-10-27 00:21:27 +02:00
14e10b2b6e [addanime] try to download HQ before normal 2013-10-27 01:19:38 +03:00
6e76104d66 [YouPornIE] Make webpage download more robust 2013-10-26 23:33:32 +02:00
1d45a23b74 Add support for http://www.tube8.com 2013-10-26 23:27:30 +02:00
7df286540f [YouPornIE] Extract all encrypted links and remove doubles at the end 2013-10-26 21:57:10 +02:00
5d0c97541a [XHamsterIE] Extract SD and HD video 2013-10-26 20:38:54 +02:00
49a25557b0 [8tracks] Use track count instead of looking at at_last_track property
This fixes the error:

$ youtube-dl http://8tracks.com/vladmc/counting-stars
[8tracks] counting-stars: Downloading webpage
[8tracks] counting-stars: Downloading song information 1/4
[8tracks] counting-stars: Downloading song information 2/4
[8tracks] counting-stars: Downloading song information 3/4
[8tracks] counting-stars: Downloading song information 4/4
[8tracks] counting-stars: Downloading song information 5/4
Traceback (most recent call last):
  File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/phihag/projects/youtube-dl/youtube_dl/__main__.py", line 18, in <module>
    youtube_dl.main()
  File "/home/phihag/projects/youtube-dl/youtube_dl/__init__.py", line 761, in main
    _real_main(argv)
  File "/home/phihag/projects/youtube-dl/youtube_dl/__init__.py", line 714, in _real_main
    retcode = ydl.download(all_urls)
  File "/home/phihag/projects/youtube-dl/youtube_dl/YoutubeDL.py", line 701, in download
    videos = self.extract_info(url)
  File "/home/phihag/projects/youtube-dl/youtube_dl/YoutubeDL.py", line 342, in extract_info
    ie_result = ie.extract(url)
  File "/home/phihag/projects/youtube-dl/youtube_dl/extractor/common.py", line 121, in extract
    return self._real_extract(url)
  File "/home/phihag/projects/youtube-dl/youtube_dl/extractor/eighttracks.py", line 111, in _real_extract
    'id': track_data['id'],
KeyError: 'id'
2013-10-25 23:46:19 +02:00
b5936c0059 Document the %(format_id)s field for the output template 2013-10-25 17:18:06 +02:00
600cc1a4f0 [youtube] Set the format_id field to the itag of the format (closes #1624) 2013-10-25 17:17:46 +02:00
ea32fbacc8 Fix the extensions of two tests with youtube videos
The best quality is now a mp4 video.
2013-10-25 16:55:37 +02:00
00fe14fc75 [youtube] Also use the 'adaptative_fmts' field from the /get_video_info page (fixes #1649)
The 'adaptative_fmts' field from the video page is not added to the 'url_encoded_fmt_stream_map'
2013-10-25 16:52:58 +02:00
fcc28edb2f [cinemassacre] Simplify
* Remove some rtmp parameters that are not needed.
* Remove the md5 checksums, the video is not downloaded.
* Remove the code used before the current format system.
2013-10-23 20:21:41 +02:00
fac6be2dd5 Merge pull request #1632 from rzhxeo/cinemassacre
[Cinemassacre] Download video that is shown in flash player
2013-10-23 20:15:39 +02:00
1cf64ee468 release 2013.10.23.2 2013-10-23 18:38:09 +02:00
cdec0190c4 [dailymotion] Extract all the available formats (closes #1028) 2013-10-23 17:33:38 +02:00
2450bcb28b [nowvideo] Fix key extraction
Extract it from the embed page
2013-10-23 17:00:33 +02:00
3126050c0f Hide the video password on verbose mode 2013-10-23 16:32:17 +02:00
93b22c7828 [vimeo] fix the extraction for videos protected with password
Added a test video.
2013-10-23 16:31:53 +02:00
d5a9bb4ea9 extractor: youtube: Swap video dimensions to match standard practice.
While working on this, I thought about simplifying things like changing
480x854 to 480p, and that seemed like a good option, until I realized that
people (me included) usually link the concept of some number followed by a p
with the video being 16:9.

So, we would be losing some information and, as we all know,
[explicit is better than implicit][*].

[*]: http://www.python.org/dev/peps/pep-0020/

This closes #1446.

Signed-off-by: Rogério Brito <rbrito@ime.usp.br>
2013-10-19 14:04:44 -03:00
b0505eb611 [CinemassacreIE] Fix information extraction 2013-10-19 16:46:17 +02:00
f44415360e Use the console_scripts entry point if setuptools is available 2013-10-18 13:49:25 +02:00
287 changed files with 19370 additions and 5553 deletions

2
.gitignore vendored
View File

@ -23,6 +23,8 @@ updates_key.pem
*.vtt *.vtt
*.flv *.flv
*.mp4 *.mp4
*.m4a
*.m4v
*.part *.part
test/testdata test/testdata
.tox .tox

View File

@ -3,3 +3,5 @@ include test/*.py
include test/*.json include test/*.json
include youtube-dl.bash-completion include youtube-dl.bash-completion
include youtube-dl.1 include youtube-dl.1
recursive-include docs *
prune docs/_build

474
README.md
View File

@ -14,169 +14,246 @@ your Unix box, on Windows or on Mac OS X. It is released to the public domain,
which means you can modify it, redistribute it or use it however you like. which means you can modify it, redistribute it or use it however you like.
# OPTIONS # OPTIONS
-h, --help print this help text and exit -h, --help print this help text and exit
--version print program version and exit --version print program version and exit
-U, --update update this program to latest version. Make sure -U, --update update this program to latest version. Make
that you have sufficient permissions (run with sure that you have sufficient permissions
sudo if needed) (run with sudo if needed)
-i, --ignore-errors continue on download errors, for example to to -i, --ignore-errors continue on download errors, for example to
skip unavailable videos in a playlist skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the --abort-on-error Abort downloading of further videos (in the
playlist or the command line) if an error occurs playlist or the command line) if an error
--dump-user-agent display the current browser identification occurs
--user-agent UA specify a custom user agent --dump-user-agent display the current browser identification
--referer REF specify a custom referer, use if the video access --user-agent UA specify a custom user agent
is restricted to one domain --referer REF specify a custom referer, use if the video
--list-extractors List all supported extractors and the URLs they access is restricted to one domain
would handle --list-extractors List all supported extractors and the URLs
--extractor-descriptions Output descriptions of all supported extractors they would handle
--proxy URL Use the specified HTTP/HTTPS proxy --extractor-descriptions Output descriptions of all supported
--no-check-certificate Suppress HTTPS certificate validation. extractors
--cache-dir DIR Location in the filesystem where youtube-dl can --proxy URL Use the specified HTTP/HTTPS proxy. Pass in
store downloaded information permanently. By an empty string (--proxy "") for direct
default $XDG_CACHE_HOME/youtube-dl or ~/.cache connection
/youtube-dl . --no-check-certificate Suppress HTTPS certificate validation.
--no-cache-dir Disable filesystem caching --prefer-insecure Use an unencrypted connection to retrieve
information about the video. (Currently
supported only for YouTube)
--cache-dir DIR Location in the filesystem where youtube-dl
can store some downloaded information
permanently. By default $XDG_CACHE_HOME
/youtube-dl or ~/.cache/youtube-dl . At the
moment, only YouTube player files (for
videos with obfuscated signatures) are
cached, but that may change.
--no-cache-dir Disable filesystem caching
--socket-timeout None Time to wait before giving up, in seconds
--bidi-workaround Work around terminals that lack
bidirectional text support. Requires bidiv
or fribidi executable in PATH
--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large
apple". By default (with value "auto")
youtube-dl guesses.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user
configuration in ~/.config/youtube-dl.conf
(%APPDATA%/youtube-dl/config.txt on
Windows)
## Video Selection: ## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1) --playlist-start NUMBER playlist video to start at (default is 1)
--playlist-end NUMBER playlist video to end at (default is last) --playlist-end NUMBER playlist video to end at (default is last)
--match-title REGEX download only matching titles (regex or caseless --match-title REGEX download only matching titles (regex or
sub-string) caseless sub-string)
--reject-title REGEX skip download for matching titles (regex or --reject-title REGEX skip download for matching titles (regex or
caseless sub-string) caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
--min-filesize SIZE Do not download any videos smaller than SIZE --min-filesize SIZE Do not download any videos smaller than
(e.g. 50k or 44.6m) SIZE (e.g. 50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. --max-filesize SIZE Do not download any videos larger than SIZE
50k or 44.6m) (e.g. 50k or 44.6m)
--date DATE download only videos uploaded in this date --date DATE download only videos uploaded in this date
--datebefore DATE download only videos uploaded before this date --datebefore DATE download only videos uploaded on or before
--dateafter DATE download only videos uploaded after this date this date (i.e. inclusive)
--no-playlist download only the currently playing video --dateafter DATE download only videos uploaded on or after
--age-limit YEARS download only videos suitable for the given age this date (i.e. inclusive)
--download-archive FILE Download only videos not present in the archive --min-views COUNT Do not download any videos with less than
file. Record all downloaded videos in it. COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
--no-playlist download only the currently playing video
--age-limit YEARS download only videos suitable for the given
age
--download-archive FILE Download only videos not listed in the
archive file. Record the IDs of all
downloaded videos in it.
--include-ads Download advertisements as well
(experimental)
--youtube-include-dash-manifest Try to download the DASH manifest on
YouTube videos (experimental)
## Download Options: ## Download Options:
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. -r, --rate-limit LIMIT maximum download rate in bytes per second
50K or 4.2M) (e.g. 50K or 4.2M)
-R, --retries RETRIES number of retries (default is 10) -R, --retries RETRIES number of retries (default is 10)
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K) --buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
(default is 1024) (default is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By --no-resize-buffer do not automatically adjust the buffer
default, the buffer size is automatically resized size. By default, the buffer size is
from an initial value of SIZE. automatically resized from an initial value
of SIZE.
## Filesystem Options: ## Filesystem Options:
-t, --title use title in file name (default) -t, --title use title in file name (default)
--id use only video ID in file name --id use only video ID in file name
-l, --literal [deprecated] alias of --title -l, --literal [deprecated] alias of --title
-A, --auto-number number downloaded files starting from 00000 -A, --auto-number number downloaded files starting from 00000
-o, --output TEMPLATE output filename template. Use %(title)s to get -o, --output TEMPLATE output filename template. Use %(title)s to
the title, %(uploader)s for the uploader name, get the title, %(uploader)s for the
%(uploader_id)s for the uploader nickname if uploader name, %(uploader_id)s for the
different, %(autonumber)s to get an automatically uploader nickname if different,
incremented number, %(ext)s for the filename %(autonumber)s to get an automatically
extension, %(format)s for the format description incremented number, %(ext)s for the
(like "22 - 1280x720" or "HD")%(upload_date)s for filename extension, %(format)s for the
the upload date (YYYYMMDD), %(extractor)s for the format description (like "22 - 1280x720" or
provider (youtube, metacafe, etc), %(id)s for the "HD"), %(format_id)s for the unique id of
video id , %(playlist)s for the playlist the the format (like Youtube's itags: "137"),
video is in, %(playlist_index)s for the position %(upload_date)s for the upload date
in the playlist and %% for a literal percent. Use (YYYYMMDD), %(extractor)s for the provider
- to output to stdout. Can also be used to (youtube, metacafe, etc), %(id)s for the
download to a different directory, for example video id, %(playlist)s for the playlist the
with -o '/my/downloads/%(uploader)s/%(title)s-%(i video is in, %(playlist_index)s for the
d)s.%(ext)s' . position in the playlist and %% for a
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s literal percent. %(height)s and %(width)s
when it is present in output filename template or for the width and height of the video
--autonumber option is given format. %(resolution)s for a textual
--restrict-filenames Restrict filenames to only ASCII characters, and description of the resolution of the video
avoid "&" and spaces in filenames format. Use - to output to stdout. Can also
-a, --batch-file FILE file containing URLs to download ('-' for stdin) be used to download to a different
-w, --no-overwrites do not overwrite files directory, for example with -o '/my/downloa
-c, --continue resume partially downloaded files ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
--no-continue do not resume partially downloaded files (restart --autonumber-size NUMBER Specifies the number of digits in
from beginning) %(autonumber)s when it is present in output
--cookies FILE file to read cookies from and dump cookie jar in filename template or --auto-number option
--no-part do not use .part files is given
--no-mtime do not use the Last-modified header to set the --restrict-filenames Restrict filenames to only ASCII
file modification time characters, and avoid "&" and spaces in
--write-description write video description to a .description file filenames
--write-info-json write video metadata to a .info.json file -a, --batch-file FILE file containing URLs to download ('-' for
--write-annotations write video annotations to a .annotation file stdin)
--write-thumbnail write thumbnail image to disk --load-info FILE json file containing the video information
(created with the "--write-json" option)
-w, --no-overwrites do not overwrite files
-c, --continue force resume of partially downloaded files.
By default, youtube-dl will resume
downloads if possible.
--no-continue do not resume partially downloaded files
(restart from beginning)
--cookies FILE file to read cookies from and dump cookie
jar in
--no-part do not use .part files
--no-mtime do not use the Last-modified header to set
the file modification time
--write-description write video description to a .description
file
--write-info-json write video metadata to a .info.json file
--write-annotations write video annotations to a .annotation
file
--write-thumbnail write thumbnail image to disk
## Verbosity / Simulation Options: ## Verbosity / Simulation Options:
-q, --quiet activates quiet mode -q, --quiet activates quiet mode
-s, --simulate do not download the video and do not write -s, --simulate do not download the video and do not write
anything to disk anything to disk
--skip-download do not download the video --skip-download do not download the video
-g, --get-url simulate, quiet but print URL -g, --get-url simulate, quiet but print URL
-e, --get-title simulate, quiet but print title -e, --get-title simulate, quiet but print title
--get-id simulate, quiet but print id --get-id simulate, quiet but print id
--get-thumbnail simulate, quiet but print thumbnail URL --get-thumbnail simulate, quiet but print thumbnail URL
--get-description simulate, quiet but print video description --get-description simulate, quiet but print video description
--get-filename simulate, quiet but print output filename --get-duration simulate, quiet but print video length
--get-format simulate, quiet but print output format --get-filename simulate, quiet but print output filename
--newline output progress bar as new lines --get-format simulate, quiet but print output format
--no-progress do not print progress bar -j, --dump-json simulate, quiet but print JSON information
--console-title display progress in console titlebar --newline output progress bar as new lines
-v, --verbose print various debugging information --no-progress do not print progress bar
--dump-intermediate-pages print downloaded pages to debug problems(very --console-title display progress in console titlebar
verbose) -v, --verbose print various debugging information
--dump-intermediate-pages print downloaded pages to debug problems
(very verbose)
--write-pages Write downloaded intermediary pages to
files in the current directory to debug
problems
--print-traffic Display sent and read HTTP traffic
## Video Format Options: ## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of -f, --format FORMAT video format code, specify the order of
preference using slashes: "-f 22/17/18". "-f mp4" preference using slashes: "-f 22/17/18".
and "-f flv" are also supported "-f mp4" and "-f flv" are also supported.
--all-formats download all available video formats You can also use the special names "best",
--prefer-free-formats prefer free video formats unless a specific one "bestvideo", "bestaudio", "worst",
is requested "worstvideo" and "worstaudio". By default,
--max-quality FORMAT highest quality format to download youtube-dl will pick the best quality.
-F, --list-formats list all available formats (currently youtube --all-formats download all available video formats
only) --prefer-free-formats prefer free video formats unless a specific
one is requested
--max-quality FORMAT highest quality format to download
-F, --list-formats list all available formats
## Subtitle Options: ## Subtitle Options:
--write-sub write subtitle file --write-sub write subtitle file
--write-auto-sub write automatic subtitle file (youtube only) --write-auto-sub write automatic subtitle file (youtube
--all-subs downloads all the available subtitles of the only)
video --all-subs downloads all the available subtitles of
--list-subs lists all available subtitles for the video the video
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube --list-subs lists all available subtitles for the video
only) --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt]
--sub-lang LANGS languages of the subtitles to download (optional) youtube only)
separated by commas, use IETF language tags like --sub-lang LANGS languages of the subtitles to download
'en,pt' (optional) separated by commas, use IETF
language tags like 'en,pt'
## Authentication Options: ## Authentication Options:
-u, --username USERNAME account username -u, --username USERNAME account username
-p, --password PASSWORD account password -p, --password PASSWORD account password
-n, --netrc use .netrc authentication data -n, --netrc use .netrc authentication data
--video-password PASSWORD video password (vimeo only) --video-password PASSWORD video password (vimeo, smotri)
## Post-processing Options: ## Post-processing Options:
-x, --extract-audio convert video files to audio-only files (requires -x, --extract-audio convert video files to audio-only files
ffmpeg or avconv and ffprobe or avprobe) (requires ffmpeg or avconv and ffprobe or
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or avprobe)
"wav"; best by default --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert "opus", or "wav"; best by default
a value between 0 (better) and 9 (worse) for VBR --audio-quality QUALITY ffmpeg/avconv audio quality specification,
or a specific bitrate like 128K (default 5) insert a value between 0 (better) and 9
--recode-video FORMAT Encode the video to another format if necessary (worse) for VBR or a specific bitrate like
(currently supported: mp4|flv|ogg|webm) 128K (default 5)
-k, --keep-video keeps the video file on disk after the post- --recode-video FORMAT Encode the video to another format if
processing; the video is erased by default necessary (currently supported:
--no-post-overwrites do not overwrite post-processed files; the post- mp4|flv|ogg|webm)
processed files are overwritten by default -k, --keep-video keeps the video file on disk after the
--embed-subs embed subtitles in the video (only for mp4 post-processing; the video is erased by
videos) default
--add-metadata add metadata to the files --no-post-overwrites do not overwrite post-processed files; the
post-processed files are overwritten by
default
--embed-subs embed subtitles in the video (only for mp4
videos)
--add-metadata write metadata to the video file
--xattrs write metadata to the video file's xattrs
(using dublin core and xdg standards)
--prefer-avconv Prefer avconv over ffmpeg for running the
postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors
# CONFIGURATION # CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
# OUTPUT TEMPLATE # OUTPUT TEMPLATE
@ -211,9 +288,14 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
Examples: Examples:
$ youtube-dl --dateafter now-6months #will only download the videos uploaded in the last 6 months # Download only the videos uploaded in the last 6 months
$ youtube-dl --date 19700101 #will only download the videos uploaded in January 1, 1970 $ youtube-dl --dateafter now-6months
$ youtube-dl --dateafter 20000101 --datebefore 20100101 #will only download the videos uploaded between 2000 and 2010
# Download only the videos uploaded on January 1, 1970
$ youtube-dl --date 19700101
$ # will only download the videos uploaded in the 200x decade
$ youtube-dl --dateafter 20000101 --datebefore 20091231
# FAQ # FAQ
@ -258,22 +340,92 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
python -m youtube_dl
To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
python -m unittest discover
python test/test_download.py
nosetests
If you want to create a build of youtube-dl yourself, you'll need
* python
* make
* pandoc
* zip
* nosetests
### Adding support for a new site
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
For discussions, join us in the irc channel #youtube-dl on freenode.
When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist):
### Is the description of the issue itself sufficient?
We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
- What the problem is
- How it could be fixed
- How your proposed solution would look like
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
### Are you using the latest version?
Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
### Is the issue already documented?
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or at https://github.com/rg3/youtube-dl/search?type=Issues . If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
### Why are existing options not enough?
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?
People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
### Does the issue involve one problem, and one problem only?
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
### Is anyone going to need the feature?
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
### Is your question about youtube-dl?
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
# COPYRIGHT # COPYRIGHT
youtube-dl is released into the public domain by the copyright holders. youtube-dl is released into the public domain by the copyright holders.
This README file was originally written by Daniel Bolton (<https://github.com/dbbolton>) and is likewise released into the public domain. This README file was originally written by Daniel Bolton (<https://github.com/dbbolton>) and is likewise released into the public domain.
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>
Please include:
* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem.
* If possible re-run the command with `--verbose`, and include the full output, it is really helpful to us.
* The output of `youtube-dl --version`
* The output of `python --version`
* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough).
For discussions, join us in the irc channel #youtube-dl on freenode.

View File

@ -1,10 +1,21 @@
__youtube_dl() __youtube_dl()
{ {
local cur prev opts local cur prev opts fileopts diropts keywords
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
opts="{{flags}}" opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater" keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
fileopts="-a|--batch-file|--download-archive|--cookies|--load-info"
diropts="--cache-dir"
if [[ ${prev} =~ ${fileopts} ]]; then
COMPREPLY=( $(compgen -f -- ${cur}) )
return 0
elif [[ ${prev} =~ ${diropts} ]]; then
COMPREPLY=( $(compgen -d -- ${cur}) )
return 0
fi
if [[ ${cur} =~ : ]]; then if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )

59
devscripts/check-porn.py Normal file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
"""
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
if we are not 'age_limit' tagging some porn site
A second approach implemented relies on a list of porn domains, to activate it
pass the list filename as the only argument
"""
# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from youtube_dl.utils import compat_urllib_parse_urlparse
from youtube_dl.utils import compat_urllib_request
if len(sys.argv) > 1:
METHOD = 'LIST'
LIST = open(sys.argv[1]).read().decode('utf8').strip()
else:
METHOD = 'EURISTIC'
for test in get_testcases():
if METHOD == 'EURISTIC':
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
except:
print('\nFail: {0}'.format(test['name']))
continue
webpage = webpage.decode('utf8', 'replace')
RESULT = 'porn' in webpage.lower()
elif METHOD == 'LIST':
domain = compat_urllib_parse_urlparse(test['url']).netloc
if not domain:
print('\nFail: {0}'.format(test['name']))
continue
domain = '.'.join(domain.split('.')[-2:])
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
or test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name']))
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
and test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name']))
else:
sys.stdout.write('.')
sys.stdout.flush()
print()

View File

@ -1,56 +1,76 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import datetime import datetime
import io
import json
import textwrap import textwrap
import json
atom_template=textwrap.dedent("""\ atom_template = textwrap.dedent("""\
<?xml version='1.0' encoding='utf-8'?> <?xml version="1.0" encoding="utf-8"?>
<atom:feed xmlns:atom="http://www.w3.org/2005/Atom"> <feed xmlns="http://www.w3.org/2005/Atom">
<atom:title>youtube-dl releases</atom:title> <link rel="self" href="http://rg3.github.io/youtube-dl/update/releases.atom" />
<atom:id>youtube-dl-updates-feed</atom:id> <title>youtube-dl releases</title>
<atom:updated>@TIMESTAMP@</atom:updated> <id>https://yt-dl.org/feed/youtube-dl-updates-feed</id>
@ENTRIES@ <updated>@TIMESTAMP@</updated>
</atom:feed>""") @ENTRIES@
</feed>""")
entry_template=textwrap.dedent(""" entry_template = textwrap.dedent("""
<atom:entry> <entry>
<atom:id>youtube-dl-@VERSION@</atom:id> <id>https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@</id>
<atom:title>New version @VERSION@</atom:title> <title>New version @VERSION@</title>
<atom:link href="http://rg3.github.io/youtube-dl" /> <link href="http://rg3.github.io/youtube-dl" />
<atom:content type="xhtml"> <content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml"> <div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a> Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
</div> </div>
</atom:content> </content>
<atom:author> <author>
<atom:name>The youtube-dl maintainers</atom:name> <name>The youtube-dl maintainers</name>
</atom:author> </author>
<atom:updated>@TIMESTAMP@</atom:updated> <updated>@TIMESTAMP@</updated>
</atom:entry> </entry>
""") """)
now = datetime.datetime.now() now = datetime.datetime.now()
now_iso = now.isoformat() now_iso = now.isoformat() + 'Z'
atom_template = atom_template.replace('@TIMESTAMP@',now_iso) atom_template = atom_template.replace('@TIMESTAMP@', now_iso)
entries=[]
versions_info = json.load(open('update/versions.json')) versions_info = json.load(open('update/versions.json'))
versions = list(versions_info['versions'].keys()) versions = list(versions_info['versions'].keys())
versions.sort() versions.sort()
entries = []
for v in versions: for v in versions:
entry = entry_template.replace('@TIMESTAMP@',v.replace('.','-')) fields = v.split('.')
entry = entry.replace('@VERSION@',v) year, month, day = map(int, fields[:3])
entries.append(entry) faked = 0
patchlevel = 0
while True:
try:
datetime.date(year, month, day)
except ValueError:
day -= 1
faked += 1
assert day > 0
continue
break
if len(fields) >= 4:
try:
patchlevel = int(fields[3])
except ValueError:
patchlevel = 1
timestamp = '%04d-%02d-%02dT00:%02d:%02dZ' % (year, month, day, faked, patchlevel)
entry = entry_template.replace('@TIMESTAMP@', timestamp)
entry = entry.replace('@VERSION@', v)
entries.append(entry)
entries_str = textwrap.indent(''.join(entries), '\t') entries_str = textwrap.indent(''.join(entries), '\t')
atom_template = atom_template.replace('@ENTRIES@', entries_str) atom_template = atom_template.replace('@ENTRIES@', entries_str)
with open('update/releases.atom','w',encoding='utf-8') as atom_file: with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
atom_file.write(atom_template) atom_file.write(atom_template)

View File

@ -1,20 +1,24 @@
import io
import sys import sys
import re import re
README_FILE = 'README.md' README_FILE = 'README.md'
helptext = sys.stdin.read() helptext = sys.stdin.read()
with open(README_FILE) as f: if isinstance(helptext, bytes):
helptext = helptext.decode('utf-8')
with io.open(README_FILE, encoding='utf-8') as f:
oldreadme = f.read() oldreadme = f.read()
header = oldreadme[:oldreadme.index('# OPTIONS')] header = oldreadme[:oldreadme.index('# OPTIONS')]
footer = oldreadme[oldreadme.index('# CONFIGURATION'):] footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
options = helptext[helptext.index(' General Options:')+19:] options = helptext[helptext.index(' General Options:') + 19:]
options = re.sub(r'^ (\w.+)$', r'## \1', options, flags=re.M) options = re.sub(r'^ (\w.+)$', r'## \1', options, flags=re.M)
options = '# OPTIONS\n' + options + '\n' options = '# OPTIONS\n' + options + '\n'
with open(README_FILE, 'w') as f: with io.open(README_FILE, 'w', encoding='utf-8') as f:
f.write(header) f.write(header)
f.write(options) f.write(options)
f.write(footer) f.write(footer)

View File

@ -14,9 +14,9 @@
set -e set -e
skip_tests=false skip_tests=true
if [ "$1" = '--skip-test' ]; then if [ "$1" = '--run-tests' ]; then
skip_tests=true skip_tests=false
shift shift
fi fi
@ -24,6 +24,8 @@ if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.0
version="$1" version="$1"
if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
useless_files=$(find youtube_dl -type f -not -name '*.py')
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
/bin/echo -e "\n### First of all, testing..." /bin/echo -e "\n### First of all, testing..."
@ -68,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
git checkout HEAD -- youtube-dl youtube-dl.exe git checkout HEAD -- youtube-dl youtube-dl.exe
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
@ -95,7 +97,7 @@ rm -rf build
make pypi-files make pypi-files
echo "Uploading to PyPi ..." echo "Uploading to PyPi ..."
python setup.py sdist upload python setup.py sdist bdist_wheel upload
make clean make clean
/bin/echo -e "\n### DONE!" /bin/echo -e "\n### DONE!"

1
docs/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
_build/

177
docs/Makefile Normal file
View File

@ -0,0 +1,177 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

71
docs/conf.py Normal file
View File

@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
#
# youtube-dl documentation build configuration file, created by
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
# Allows to import youtube_dl
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# -- General configuration ------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'youtube-dl'
copyright = u'2014, Ricardo Garcia Gonzalez'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
import youtube_dl
version = youtube_dl.__version__
# The full version, including alpha/beta/rc tags.
release = version
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Output file base name for HTML help builder.
htmlhelp_basename = 'youtube-dldoc'

23
docs/index.rst Normal file
View File

@ -0,0 +1,23 @@
Welcome to youtube-dl's documentation!
======================================
*youtube-dl* is a command-line program to download videos from YouTube.com and more sites.
It can also be used in Python code.
Developer guide
---------------
This section contains information for using *youtube-dl* from Python programs.
.. toctree::
:maxdepth: 2
module_guide
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

67
docs/module_guide.rst Normal file
View File

@ -0,0 +1,67 @@
Using the ``youtube_dl`` module
===============================
When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
.. code-block:: python
>>> from youtube_dl import YoutubeDL
>>> ydl = YoutubeDL()
>>> ydl.add_default_info_extractors()
Extracting video information
----------------------------
You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary:
.. code-block:: python
>>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False)
[youtube] Setting language
[youtube] BaW_jenozKc: Downloading webpage
[youtube] BaW_jenozKc: Downloading video info webpage
[youtube] BaW_jenozKc: Extracting video information
>>> info['title']
'youtube-dl test video "\'/\\ä↭𝕐'
>>> info['height'], info['width']
(720, 1280)
If you want to download or play the video you can get its url:
.. code-block:: python
>>> info['url']
'https://...'
Extracting playlist information
-------------------------------
The playlist information is extracted in a similar way, but the dictionary is a bit different:
.. code-block:: python
>>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False)
[TED] open_source_open_world: Downloading playlist webpage
...
>>> playlist['title']
'Open-source, open world'
You can access the videos in the playlist with the ``entries`` field:
.. code-block:: python
>>> for video in playlist['entries']:
... print('Video #%d: %s' % (video['playlist_index'], video['title']))
Video #1: How Arduino is open-sourcing imagination
Video #2: The year open data went worldwide
Video #3: Massive-scale online collaboration
Video #4: The art of asking
Video #5: How cognitive surplus will change the world
Video #6: The birth of Wikipedia
Video #7: Coding a better government
Video #8: The era of open innovation
Video #9: The currency of the new economy is trust

2
setup.cfg Normal file
View File

@ -0,0 +1,2 @@
[wheel]
universal = True

View File

@ -3,13 +3,17 @@
from __future__ import print_function from __future__ import print_function
import os.path
import pkg_resources import pkg_resources
import warnings
import sys import sys
try: try:
from setuptools import setup from setuptools import setup
setuptools_available = True
except ImportError: except ImportError:
from distutils.core import setup from distutils.core import setup
setuptools_available = False
try: try:
# This will create an exe that needs Microsoft Visual C++ 2008 # This will create an exe that needs Microsoft Visual C++ 2008
@ -42,14 +46,29 @@ py2exe_params = {
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
params = py2exe_params params = py2exe_params
else: else:
files_spec = [
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
('share/doc/youtube_dl', ['README.txt']),
('share/man/man1', ['youtube-dl.1'])
]
root = os.path.dirname(os.path.abspath(__file__))
data_files = []
for dirname, files in files_spec:
resfiles = []
for fn in files:
if not os.path.exists(fn):
warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn)
else:
resfiles.append(fn)
data_files.append((dirname, resfiles))
params = { params = {
'scripts': ['bin/youtube-dl'], 'data_files': data_files,
'data_files': [ # Installing system-wide would require sudo...
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
('share/doc/youtube_dl', ['README.txt']),
('share/man/man1/', ['youtube-dl.1'])
]
} }
if setuptools_available:
params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
else:
params['scripts'] = ['bin/youtube-dl']
# Get the version from youtube_dl/version.py without importing the package # Get the version from youtube_dl/version.py without importing the package
exec(compile(open('youtube_dl/version.py').read(), exec(compile(open('youtube_dl/version.py').read(),
@ -66,7 +85,10 @@ setup(
author_email='ytdl@yt-dl.org', author_email='ytdl@yt-dl.org',
maintainer='Philipp Hagemeister', maintainer='Philipp Hagemeister',
maintainer_email='phihag@phihag.de', maintainer_email='phihag@phihag.de',
packages=['youtube_dl', 'youtube_dl.extractor'], packages=[
'youtube_dl',
'youtube_dl.extractor', 'youtube_dl.downloader',
'youtube_dl.postprocessor'],
# Provokes warning on most systems (why?!) # Provokes warning on most systems (why?!)
# test_suite = 'nose.collector', # test_suite = 'nose.collector',

View File

@ -5,13 +5,14 @@ import json
import os.path import os.path
import re import re
import types import types
import sys
import youtube_dl.extractor import youtube_dl.extractor
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.utils import (
compat_str,
def global_setup(): preferredencoding,
youtube_dl._setup_opener(timeout=10) )
def get_params(override=None): def get_params(override=None):
@ -33,6 +34,21 @@ def try_rm(filename):
raise raise
def report_warning(message):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if sys.stderr.isatty() and os.name != 'nt':
_msg_header = u'\033[0;33mWARNING:\033[0m'
else:
_msg_header = u'WARNING:'
output = u'%s %s\n' % (_msg_header, message)
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
output = output.encode(preferredencoding())
sys.stderr.write(output)
class FakeYDL(YoutubeDL): class FakeYDL(YoutubeDL):
def __init__(self, override=None): def __init__(self, override=None):
# Different instances of the downloader can't share the same dictionary # Different instances of the downloader can't share the same dictionary
@ -58,7 +74,7 @@ class FakeYDL(YoutubeDL):
old_report_warning(message) old_report_warning(message)
self.report_warning = types.MethodType(report_warning, self) self.report_warning = types.MethodType(report_warning, self)
def get_testcases(): def gettestcases():
for ie in youtube_dl.extractor.gen_extractors(): for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None) t = getattr(ie, '_TEST', None)
if t: if t:
@ -70,3 +86,45 @@ def get_testcases():
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
def expect_info_dict(self, expected_dict, got_dict):
for info_field, expected in expected_dict.items():
if isinstance(expected, compat_str) and expected.startswith('re:'):
got = got_dict.get(info_field)
match_str = expected[len('re:'):]
match_rex = re.compile(match_str)
self.assertTrue(
isinstance(got, compat_str) and match_rex.match(got),
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
elif isinstance(expected, type):
got = got_dict.get(info_field)
self.assertTrue(isinstance(got, expected),
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
else:
if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(got_dict.get(info_field))
else:
got = got_dict.get(info_field)
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
# Are checkable fields missing from the test case definition?
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
for key, value in got_dict.items()
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys:
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
self.assertFalse(
missing_keys,
'Missing keys in test definition: %s' % (
', '.join(sorted(missing_keys))))

View File

@ -39,5 +39,6 @@
"writeinfojson": true, "writeinfojson": true,
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false "listssubtitles": false,
"socket_timeout": 20
} }

View File

@ -0,0 +1,44 @@
#!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
class TestIE(InfoExtractor):
pass
class TestInfoExtractor(unittest.TestCase):
def setUp(self):
self.ie = TestIE(FakeYDL())
def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
def test_html_search_regex(self):
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
def test_opengraph(self):
ie = self.ie
html = '''
<meta name="og:title" content='Foo'/>
<meta content="Some video's description " name="og:description"/>
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
'''
self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
if __name__ == '__main__':
unittest.main()

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
import os import os
import sys import sys
@ -7,6 +9,8 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL from test.helper import FakeYDL
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
class YDL(FakeYDL): class YDL(FakeYDL):
@ -28,105 +32,235 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = True ydl.params['prefer_free_formats'] = True
formats = [ formats = [
{u'ext': u'webm', u'height': 460}, {'ext': 'webm', 'height': 460},
{u'ext': u'mp4', u'height': 460}, {'ext': 'mp4', 'height': 460},
] ]
info_dict = {u'formats': formats, u'extractor': u'test'} info_dict = {'formats': formats, 'extractor': 'test'}
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'webm') self.assertEqual(downloaded['ext'], 'webm')
# Different resolution => download best quality (mp4) # Different resolution => download best quality (mp4)
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = True ydl.params['prefer_free_formats'] = True
formats = [ formats = [
{u'ext': u'webm', u'height': 720}, {'ext': 'webm', 'height': 720},
{u'ext': u'mp4', u'height': 1080}, {'ext': 'mp4', 'height': 1080},
] ]
info_dict[u'formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'mp4') self.assertEqual(downloaded['ext'], 'mp4')
# No prefer_free_formats => keep original formats order # No prefer_free_formats => prefer mp4 and flv for greater compatibilty
ydl = YDL() ydl = YDL()
ydl.params['prefer_free_formats'] = False ydl.params['prefer_free_formats'] = False
formats = [ formats = [
{u'ext': u'webm', u'height': 720}, {'ext': 'webm', 'height': 720},
{u'ext': u'flv', u'height': 720}, {'ext': 'mp4', 'height': 720},
{'ext': 'flv', 'height': 720},
] ]
info_dict[u'formats'] = formats info_dict['formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'ext'], u'flv') self.assertEqual(downloaded['ext'], 'mp4')
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
{'ext': 'flv', 'height': 720},
{'ext': 'webm', 'height': 720},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'flv')
def test_format_limit(self): def test_format_limit(self):
formats = [ formats = [
{u'format_id': u'meh'}, {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
{u'format_id': u'good'}, {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
{u'format_id': u'great'}, {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
{u'format_id': u'excellent'}, {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
] ]
info_dict = { info_dict = {
u'formats': formats, u'extractor': u'test', 'id': 'testvid'} 'formats': formats, 'extractor': 'test', 'id': 'testvid'}
ydl = YDL() ydl = YDL()
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'excellent') self.assertEqual(downloaded['format_id'], 'excellent')
ydl = YDL({'format_limit': 'good'}) ydl = YDL({'format_limit': 'good'})
assert ydl.params['format_limit'] == 'good' assert ydl.params['format_limit'] == 'good'
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'good') self.assertEqual(downloaded['format_id'], 'good')
ydl = YDL({'format_limit': 'great', 'format': 'all'}) ydl = YDL({'format_limit': 'great', 'format': 'all'})
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
self.assertEqual(ydl.downloaded_info_dicts[0][u'format_id'], u'meh') self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
self.assertEqual(ydl.downloaded_info_dicts[1][u'format_id'], u'good') self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
self.assertEqual(ydl.downloaded_info_dicts[2][u'format_id'], u'great') self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
self.assertTrue('3' in ydl.msgs[0]) self.assertTrue('3' in ydl.msgs[0])
ydl = YDL() ydl = YDL()
ydl.params['format_limit'] = 'excellent' ydl.params['format_limit'] = 'excellent'
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded[u'format_id'], u'excellent') self.assertEqual(downloaded['format_id'], 'excellent')
def test_format_selection(self): def test_format_selection(self):
formats = [ formats = [
{u'format_id': u'35', u'ext': u'mp4'}, {'format_id': '35', 'ext': 'mp4', 'preference': 1},
{u'format_id': u'45', u'ext': u'webm'}, {'format_id': '45', 'ext': 'webm', 'preference': 2},
{u'format_id': u'47', u'ext': u'webm'}, {'format_id': '47', 'ext': 'webm', 'preference': 3},
{u'format_id': u'2', u'ext': u'flv'}, {'format_id': '2', 'ext': 'flv', 'preference': 4},
] ]
info_dict = {u'formats': formats, u'extractor': u'test'} info_dict = {'formats': formats, 'extractor': 'test'}
ydl = YDL({'format': u'20/47'}) ydl = YDL({'format': '20/47'})
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'47') self.assertEqual(downloaded['format_id'], '47')
ydl = YDL({'format': u'20/71/worst'}) ydl = YDL({'format': '20/71/worst'})
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35') self.assertEqual(downloaded['format_id'], '35')
ydl = YDL() ydl = YDL()
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'2') self.assertEqual(downloaded['format_id'], '2')
ydl = YDL({'format': u'webm/mp4'}) ydl = YDL({'format': 'webm/mp4'})
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'47') self.assertEqual(downloaded['format_id'], '47')
ydl = YDL({'format': u'3gp/40/mp4'}) ydl = YDL({'format': '3gp/40/mp4'})
ydl.process_ie_result(info_dict) ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], u'35') self.assertEqual(downloaded['format_id'], '35')
def test_format_selection_audio(self):
formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none'},
{'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none'},
{'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 4},
]
info_dict = {'formats': formats, 'extractor': 'test'}
ydl = YDL({'format': 'bestaudio'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'audio-high')
ydl = YDL({'format': 'worstaudio'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [
{'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1},
{'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2},
]
info_dict = {'formats': formats, 'extractor': 'test'}
ydl = YDL({'format': 'bestaudio/worstaudio/best'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-high')
def test_format_selection_video(self):
formats = [
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'},
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'},
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3},
]
info_dict = {'formats': formats, 'extractor': 'test'}
ydl = YDL({'format': 'bestvideo'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'dash-video-high')
ydl = YDL({'format': 'worstvideo'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'dash-video-low')
def test_youtube_format_selection(self):
order = [
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '84', '102', '83', '101', '82', '100',
# Dash video
'138', '137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '139', '171',
]
for f1id, f2id in zip(order, order[1:]):
f1 = YoutubeIE._formats[f1id].copy()
f1['format_id'] = f1id
f2 = YoutubeIE._formats[f2id].copy()
f2['format_id'] = f2id
info_dict = {'formats': [f1, f2], 'extractor': 'youtube'}
ydl = YDL()
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id)
info_dict = {'formats': [f2, f1], 'extractor': 'youtube'}
ydl = YDL()
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1id)
def test_add_extra_info(self):
test_dict = {
'extractor': 'Foo',
}
extra_info = {
'extractor': 'Bar',
'playlist': 'funny videos',
}
YDL.add_extra_info(test_dict, extra_info)
self.assertEqual(test_dict['extractor'], 'Foo')
self.assertEqual(test_dict['playlist'], 'funny videos')
def test_prepare_filename(self):
info = {
'id': '1234',
'ext': 'mp4',
'width': None,
}
def fname(templ):
ydl = YoutubeDL({'outtmpl': templ})
return ydl.prepare_filename(info)
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
# Replace missing fields with 'NA'
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -6,8 +6,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup, try_rm from test.helper import try_rm
global_setup()
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
@ -24,7 +23,7 @@ def _download_restricted(url, filename, age):
} }
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
ydl.add_default_info_extractors() ydl.add_default_info_extractors()
json_filename = filename + '.info.json' json_filename = os.path.splitext(filename)[0] + '.info.json'
try_rm(json_filename) try_rm(json_filename)
ydl.download([url]) ydl.download([url])
res = os.path.exists(json_filename) res = os.path.exists(json_filename)

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
import os import os
import sys import sys
@ -7,11 +9,13 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases from test.helper import gettestcases
from youtube_dl.extractor import ( from youtube_dl.extractor import (
FacebookIE,
gen_extractors, gen_extractors,
JustinTVIE, JustinTVIE,
PBSIE,
YoutubeIE, YoutubeIE,
) )
@ -28,18 +32,20 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_playlist_matching(self): def test_youtube_playlist_matching(self):
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585 assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') #585
assertPlaylist(u'PL63F0C78739B09958') assertPlaylist('PL63F0C78739B09958')
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668 assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M')) self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
# Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
def test_youtube_matching(self): def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
@ -62,6 +68,13 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_show_matching(self): def test_youtube_show_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
def test_youtube_truncated(self):
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_justin_tv_channelid_matching(self): def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
@ -79,7 +92,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
def test_youtube_extract(self): def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id) assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
@ -87,12 +100,15 @@ class TestAllURLsMatching(unittest.TestCase):
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
assertExtractId('BaW_jenozKc', 'BaW_jenozKc') assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
def test_no_duplicates(self): def test_no_duplicates(self):
ies = gen_extractors() ies = gen_extractors()
for tc in get_testcases(): for tc in gettestcases():
url = tc['url'] url = tc['url']
for ie in ies: for ie in ies:
if type(ie).__name__ in ['GenericIE', tc['name'] + 'IE']: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
else: else:
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
@ -100,11 +116,32 @@ class TestAllURLsMatching(unittest.TestCase):
def test_keywords(self): def test_keywords(self):
self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
self.assertMatch(':thedailyshow', ['ComedyCentral']) self.assertMatch(':ythistory', ['youtube:history'])
self.assertMatch(':tds', ['ComedyCentral']) self.assertMatch(':thedailyshow', ['ComedyCentralShows'])
self.assertMatch(':colbertreport', ['ComedyCentral']) self.assertMatch(':tds', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentral']) self.assertMatch(':colbertreport', ['ComedyCentralShows'])
self.assertMatch(':cr', ['ComedyCentralShows'])
def test_vimeo_matching(self):
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
# https://github.com/rg3/youtube-dl/issues/1930
def test_soundcloud_not_matching_sets(self):
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
def test_tumblr(self):
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr'])
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
def test_pbs(self):
# https://github.com/rg3/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,70 +0,0 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import DailymotionIE
class TestDailymotionSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.url = 'http://www.dailymotion.com/video/xczg00'
def getInfoDict(self):
IE = DailymotionIE(self.DL)
info_dict = IE.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@ -6,8 +6,14 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, get_testcases, global_setup, try_rm, md5 from test.helper import (
global_setup() get_params,
gettestcases,
expect_info_dict,
md5,
try_rm,
report_warning,
)
import hashlib import hashlib
@ -17,12 +23,15 @@ import socket
import youtube_dl.YoutubeDL import youtube_dl.YoutubeDL
from youtube_dl.utils import ( from youtube_dl.utils import (
compat_http_client,
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_HTTPError,
DownloadError, DownloadError,
ExtractorError, ExtractorError,
UnavailableVideoError, UnavailableVideoError,
) )
from youtube_dl.extractor import get_info_extractor
RETRIES = 3 RETRIES = 3
@ -42,7 +51,7 @@ def _file_md5(fn):
with open(fn, 'rb') as f: with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest() return hashlib.md5(f.read()).hexdigest()
defs = get_testcases() defs = gettestcases()
class TestDownload(unittest.TestCase): class TestDownload(unittest.TestCase):
@ -55,17 +64,23 @@ def generator(test_case):
def test_template(self): def test_template(self):
ie = youtube_dl.extractor.get_info_extractor(test_case['name']) ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
def print_skipping(reason): def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason)) print('Skipping %s: %s' % (test_case['name'], reason))
if not ie._WORKING: if not ie.working():
print_skipping('IE marked as not _WORKING') print_skipping('IE marked as not _WORKING')
return return
if 'playlist' not in test_case and not test_case['file']: if 'playlist' not in test_case:
print_skipping('No output file specified') info_dict = test_case.get('info_dict', {})
return if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
if 'skip' in test_case: if 'skip' in test_case:
print_skipping(test_case['skip']) print_skipping(test_case['skip'])
return return
for other_ie in other_ies:
if not other_ie.working():
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
return
params = get_params(test_case.get('params', {})) params = get_params(test_case.get('params', {}))
@ -75,61 +90,55 @@ def generator(test_case):
def _hook(status): def _hook(status):
if status['status'] == 'finished': if status['status'] == 'finished':
finished_hook_called.add(status['filename']) finished_hook_called.add(status['filename'])
ydl.fd.add_progress_hook(_hook) ydl.add_progress_hook(_hook)
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
test_cases = test_case.get('playlist', [test_case]) test_cases = test_case.get('playlist', [test_case])
for tc in test_cases: def try_rm_tcs_files():
try_rm(tc['file']) for tc in test_cases:
try_rm(tc['file'] + '.part') tc_filename = get_tc_filename(tc)
try_rm(tc['file'] + '.info.json') try_rm(tc_filename)
try_rm(tc_filename + '.part')
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
try_rm_tcs_files()
try: try:
for retry in range(1, RETRIES + 1): try_num = 1
while True:
try: try:
ydl.download([test_case['url']]) ydl.download([test_case['url']])
except (DownloadError, ExtractorError) as err: except (DownloadError, ExtractorError) as err:
if retry == RETRIES: raise
# Check if the exception is not a network related one # Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
raise raise
print('Retrying: {0} failed tries\n\n##########\n\n'.format(retry)) if try_num == RETRIES:
report_warning(u'Failed due to network errors, skipping...')
return
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
try_num += 1
else: else:
break break
for tc in test_cases: for tc in test_cases:
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False): if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file']) self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
self.assertTrue(tc['file'] in finished_hook_called) self.assertTrue(tc_filename in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json')) info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(os.path.exists(info_json_fn))
if 'md5' in tc: if 'md5' in tc:
md5_for_file = _file_md5(tc['file']) md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5']) self.assertEqual(md5_for_file, tc['md5'])
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(info_dict.get(info_field))
else:
got = info_dict.get(info_field)
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict expect_info_dict(self, tc.get('info_dict', {}), info_dict)
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
for key, value in info_dict.items()
if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key])
finally: finally:
for tc in test_cases: try_rm_tcs_files()
try_rm(tc['file'])
try_rm(tc['file'] + '.part')
try_rm(tc['file'] + '.info.json')
return test_template return test_template

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
import os import os
@ -8,18 +9,39 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup from test.helper import (
global_setup() expect_info_dict,
FakeYDL,
)
from youtube_dl.extractor import ( from youtube_dl.extractor import (
AcademicEarthCourseIE,
DailymotionPlaylistIE, DailymotionPlaylistIE,
DailymotionUserIE, DailymotionUserIE,
VimeoChannelIE, VimeoChannelIE,
VimeoUserIE,
VimeoAlbumIE,
VimeoGroupsIE,
UstreamChannelIE, UstreamChannelIE,
SoundcloudSetIE,
SoundcloudUserIE, SoundcloudUserIE,
LivestreamIE, LivestreamIE,
NHLVideocenterIE, NHLVideocenterIE,
BambuserChannelIE,
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE,
IviCompilationIE,
ImdbListIE,
KhanAcademyIE,
EveryonesMixtapeIE,
RutubeChannelIE,
GoogleSearchIE,
GenericIE,
TEDIE,
ToypicsUserIE,
XTubeUserIE,
InstagramUserIE,
) )
@ -33,39 +55,71 @@ class TestPlaylists(unittest.TestCase):
ie = DailymotionPlaylistIE(dl) ie = DailymotionPlaylistIE(dl)
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q') result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'SPORT') self.assertEqual(result['title'], 'SPORT')
self.assertTrue(len(result['entries']) > 20) self.assertTrue(len(result['entries']) > 20)
def test_dailymotion_user(self): def test_dailymotion_user(self):
dl = FakeYDL() dl = FakeYDL()
ie = DailymotionUserIE(dl) ie = DailymotionUserIE(dl)
result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') result = ie.extract('https://www.dailymotion.com/user/nqtv')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Génération Quoi') self.assertEqual(result['title'], 'Rémi Gaillard')
self.assertTrue(len(result['entries']) >= 26) self.assertTrue(len(result['entries']) >= 100)
def test_vimeo_channel(self): def test_vimeo_channel(self):
dl = FakeYDL() dl = FakeYDL()
ie = VimeoChannelIE(dl) ie = VimeoChannelIE(dl)
result = ie.extract('http://vimeo.com/channels/tributes') result = ie.extract('http://vimeo.com/channels/tributes')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Vimeo Tributes') self.assertEqual(result['title'], 'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24) self.assertTrue(len(result['entries']) > 24)
def test_vimeo_user(self):
dl = FakeYDL()
ie = VimeoUserIE(dl)
result = ie.extract('http://vimeo.com/nkistudio/videos')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Nki')
self.assertTrue(len(result['entries']) > 65)
def test_vimeo_album(self):
dl = FakeYDL()
ie = VimeoAlbumIE(dl)
result = ie.extract('http://vimeo.com/album/2632481')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Staff Favorites: November 2013')
self.assertTrue(len(result['entries']) > 12)
def test_vimeo_groups(self):
dl = FakeYDL()
ie = VimeoGroupsIE(dl)
result = ie.extract('http://vimeo.com/groups/rolexawards')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Rolex Awards for Enterprise')
self.assertTrue(len(result['entries']) > 72)
def test_ustream_channel(self): def test_ustream_channel(self):
dl = FakeYDL() dl = FakeYDL()
ie = UstreamChannelIE(dl) ie = UstreamChannelIE(dl)
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'5124905') self.assertEqual(result['id'], '5124905')
self.assertTrue(len(result['entries']) >= 11) self.assertTrue(len(result['entries']) >= 6)
def test_soundcloud_set(self):
dl = FakeYDL()
ie = SoundcloudSetIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'The Royal Concept EP')
self.assertTrue(len(result['entries']) >= 6)
def test_soundcloud_user(self): def test_soundcloud_user(self):
dl = FakeYDL() dl = FakeYDL()
ie = SoundcloudUserIE(dl) ie = SoundcloudUserIE(dl)
result = ie.extract('https://soundcloud.com/the-concept-band') result = ie.extract('https://soundcloud.com/the-concept-band')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'9615865') self.assertEqual(result['id'], '9615865')
self.assertTrue(len(result['entries']) >= 12) self.assertTrue(len(result['entries']) >= 12)
def test_livestream_event(self): def test_livestream_event(self):
@ -73,7 +127,7 @@ class TestPlaylists(unittest.TestCase):
ie = LivestreamIE(dl) ie = LivestreamIE(dl)
result = ie.extract('http://new.livestream.com/tedx/cityenglish') result = ie.extract('http://new.livestream.com/tedx/cityenglish')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'TEDCity2.0 (English)') self.assertEqual(result['title'], 'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4) self.assertTrue(len(result['entries']) >= 4)
def test_nhl_videocenter(self): def test_nhl_videocenter(self):
@ -81,9 +135,185 @@ class TestPlaylists(unittest.TestCase):
ie = NHLVideocenterIE(dl) ie = NHLVideocenterIE(dl)
result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999') result = ie.extract('http://video.canucks.nhl.com/videocenter/console?catid=999')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'999') self.assertEqual(result['id'], '999')
self.assertEqual(result['title'], u'Highlights') self.assertEqual(result['title'], 'Highlights')
self.assertEqual(len(result['entries']), 12) self.assertEqual(len(result['entries']), 12)
def test_bambuser_channel(self):
dl = FakeYDL()
ie = BambuserChannelIE(dl)
result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'pixelversity')
self.assertTrue(len(result['entries']) >= 60)
def test_bandcamp_album(self):
dl = FakeYDL()
ie = BandcampAlbumIE(dl)
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_community(self):
dl = FakeYDL()
ie = SmotriCommunityIE(dl)
result = ie.extract('http://smotri.com/community/video/kommuna')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'kommuna')
self.assertEqual(result['title'], 'КПРФ')
self.assertTrue(len(result['entries']) >= 4)
def test_smotri_user(self):
dl = FakeYDL()
ie = SmotriUserIE(dl)
result = ie.extract('http://smotri.com/user/inspector')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'inspector')
self.assertEqual(result['title'], 'Inspector')
self.assertTrue(len(result['entries']) >= 9)
def test_AcademicEarthCourse(self):
dl = FakeYDL()
ie = AcademicEarthCourseIE(dl)
result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'laws-of-nature')
self.assertEqual(result['title'], 'Laws of Nature')
self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
self.assertEqual(len(result['entries']), 4)
def test_ivi_compilation(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dezhurnyi_angel')
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
self.assertTrue(len(result['entries']) >= 36)
def test_ivi_compilation_season(self):
dl = FakeYDL()
ie = IviCompilationIE(dl)
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'JFs9NWw6XI0')
self.assertEqual(result['title'], 'March 23, 2012 Releases')
self.assertEqual(len(result['entries']), 7)
def test_khanacademy_topic(self):
dl = FakeYDL()
ie = KhanAcademyIE(dl)
result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'cryptography')
self.assertEqual(result['title'], 'Journey into cryptography')
self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
self.assertTrue(len(result['entries']) >= 3)
def test_EveryonesMixtape(self):
dl = FakeYDL()
ie = EveryonesMixtapeIE(dl)
result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'm7m0jJAbMQi')
self.assertEqual(result['title'], 'Driving')
self.assertEqual(len(result['entries']), 24)
def test_rutube_channel(self):
dl = FakeYDL()
ie = RutubeChannelIE(dl)
result = ie.extract('http://rutube.ru/tags/video/1409')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
def test_GoogleSearch(self):
dl = FakeYDL()
ie = GoogleSearchIE(dl)
result = ie.extract('gvsearch15:python language')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'python language')
self.assertEqual(result['title'], 'python language')
self.assertEqual(len(result['entries']), 15)
def test_generic_rss_feed(self):
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)
def test_ted_playlist(self):
dl = FakeYDL()
ie = TEDIE(dl)
result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10')
self.assertEqual(result['title'], 'Who are the hackers?')
self.assertTrue(len(result['entries']) >= 6)
def test_toypics_user(self):
dl = FakeYDL()
ie = ToypicsUserIE(dl)
result = ie.extract('http://videos.toypics.net/Mikey')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'Mikey')
self.assertTrue(len(result['entries']) >= 17)
def test_xtube_user(self):
dl = FakeYDL()
ie = XTubeUserIE(dl)
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'greenshowers')
self.assertTrue(len(result['entries']) >= 155)
def test_InstagramUser(self):
dl = FakeYDL()
ie = InstagramUserIE(dl)
result = ie.extract('http://instagram.com/porsche')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'porsche')
self.assertTrue(len(result['entries']) >= 2)
test_video = next(
e for e in result['entries']
if e['id'] == '614605558512799803_462752227')
dl.add_default_extra_info(test_video, ie, '(irrelevant URL)')
dl.process_video_result(test_video, download=False)
EXPECTED = {
'id': '614605558512799803_462752227',
'ext': 'mp4',
'title': '#Porsche Intelligent Performance.',
'thumbnail': 're:^https?://.*\.jpg',
'uploader': 'Porsche',
'uploader_id': 'porsche',
'timestamp': 1387486713,
'upload_date': '20131219',
}
expect_info_dict(self, EXPECTED, test_video)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

283
test/test_subtitles.py Normal file
View File

@ -0,0 +1,283 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, md5
from youtube_dl.extractor import (
BlipTVIE,
YoutubeIE,
DailymotionIE,
TEDIE,
VimeoIE,
)
class BaseTestSubtitles(unittest.TestCase):
url = None
IE = None
def setUp(self):
self.DL = FakeYDL()
self.ie = self.IE(self.DL)
def getInfoDict(self):
info_dict = self.ie.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict['subtitles']
class TestYoutubeSubtitles(BaseTestSubtitles):
url = 'QRS8MkLhQmM'
IE = YoutubeIE
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestDailymotionSubtitles(BaseTestSubtitles):
url = 'http://www.dailymotion.com/video/xczg00'
IE = DailymotionIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TEDIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 28)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
class TestBlipTVSubtitles(BaseTestSubtitles):
url = 'http://blip.tv/a/a-6603250'
IE = BlipTVIE
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_allsubtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['en']))
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://vimeo.com/56015672'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,47 @@
from __future__ import unicode_literals
import io
import os
import re
import unittest
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
IGNORED_FILES = [
'setup.py', # http://bugs.python.org/issue13943
]
class TestUnicodeLiterals(unittest.TestCase):
def test_all_files(self):
print('Skipping this test (not yet fully implemented)')
return
for dirpath, _, filenames in os.walk(rootDir):
for basename in filenames:
if not basename.endswith('.py'):
continue
if basename in IGNORED_FILES:
continue
fn = os.path.join(dirpath, basename)
with io.open(fn, encoding='utf-8') as inf:
code = inf.read()
if "'" not in code and '"' not in code:
continue
imps = 'from __future__ import unicode_literals'
self.assertTrue(
imps in code,
' %s missing in %s' % (imps, fn))
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
if m is not None:
self.assertTrue(
m is None,
'u present in %s, around %s' % (
fn, code[m.start() - 10:m.end() + 10]))
if __name__ == '__main__':
unittest.main()

View File

@ -9,21 +9,33 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Various small unit tests # Various small unit tests
import io
import xml.etree.ElementTree import xml.etree.ElementTree
#from youtube_dl.utils import htmlentity_transform #from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import ( from youtube_dl.utils import (
timeconvert,
sanitize_filename,
unescapeHTML,
orderedSet,
DateRange, DateRange,
unified_strdate, encodeFilename,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands,
get_meta_content, get_meta_content,
xpath_with_ns, orderedSet,
PagedList,
parse_duration,
read_batch_urls,
sanitize_filename,
shell_quote,
smuggle_url, smuggle_url,
str_to_int,
struct_unpack,
timeconvert,
unescapeHTML,
unified_strdate,
unsmuggle_url, unsmuggle_url,
url_basename,
urlencode_postdata,
xpath_with_ns,
parse_iso8601,
) )
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
@ -120,6 +132,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('8/7/2009'), '20090708') self.assertEqual(unified_strdate('8/7/2009'), '20090708')
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
def test_find_xpath_attr(self): def test_find_xpath_attr(self):
testxml = u'''<root> testxml = u'''<root>
@ -170,6 +183,94 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_url, url) self.assertEqual(res_url, url)
self.assertEqual(res_data, None) self.assertEqual(res_data, None)
def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')]
self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""")
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
def test_url_basename(self):
self.assertEqual(url_basename(u'http://foo.de/'), u'')
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
self.assertEqual(
url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
u'trailer.mp4')
def test_parse_duration(self):
self.assertEqual(parse_duration(None), None)
self.assertEqual(parse_duration('1'), 1)
self.assertEqual(parse_duration('1337:12'), 80232)
self.assertEqual(parse_duration('9:12:43'), 33163)
self.assertEqual(parse_duration('12:00'), 720)
self.assertEqual(parse_duration('00:01:01'), 61)
self.assertEqual(parse_duration('x:y'), None)
self.assertEqual(parse_duration('3h11m53s'), 11513)
self.assertEqual(parse_duration('62m45s'), 3765)
self.assertEqual(parse_duration('6m59s'), 419)
self.assertEqual(parse_duration('49s'), 49)
self.assertEqual(parse_duration('0h0m0s'), 0)
self.assertEqual(parse_duration('0m0s'), 0)
self.assertEqual(parse_duration('0s'), 0)
def test_fix_xml_ampersands(self):
self.assertEqual(
fix_xml_ampersands('"&x=y&z=a'), '"&amp;x=y&amp;z=a')
self.assertEqual(
fix_xml_ampersands('"&amp;x=y&wrong;&z=a'),
'"&amp;x=y&amp;wrong;&amp;z=a')
self.assertEqual(
fix_xml_ampersands('&amp;&apos;&gt;&lt;&quot;'),
'&amp;&apos;&gt;&lt;&quot;')
self.assertEqual(
fix_xml_ampersands('&#1234;&#x1abC;'), '&#1234;&#x1abC;')
self.assertEqual(fix_xml_ampersands('&#&#'), '&amp;#&amp;#')
def test_paged_list(self):
def testPL(size, pagesize, sliceargs, expected):
def get_page(pagenum):
firstid = pagenum * pagesize
upto = min(size, pagenum * pagesize + pagesize)
for i in range(firstid, upto):
yield i
pl = PagedList(get_page, pagesize)
got = pl.getslice(*sliceargs)
self.assertEqual(got, expected)
testPL(5, 2, (), [0, 1, 2, 3, 4])
testPL(5, 2, (1,), [1, 2, 3, 4])
testPL(5, 2, (2,), [2, 3, 4])
testPL(5, 2, (4,), [4])
testPL(5, 2, (0, 3), [0, 1, 2])
testPL(5, 2, (1, 4), [1, 2, 3])
testPL(5, 2, (2, 99), [2, 3, 4])
testPL(5, 2, (20, 99), [])
def test_struct_unpack(self):
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
def test_read_batch_urls(self):
f = io.StringIO(u'''\xef\xbb\xbf foo
bar\r
baz
# More after this line\r
; or after this
bam''')
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
def test_urlencode_postdata(self):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes))
def test_parse_iso8601(self):
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -7,8 +7,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup, try_rm from test.helper import get_params, try_rm
global_setup()
import io import io

View File

@ -7,8 +7,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_params, global_setup from test.helper import get_params
global_setup()
import io import io
@ -31,9 +30,10 @@ params = get_params({
TEST_ID = 'BaW_jenozKc' TEST_ID = 'BaW_jenozKc'
INFO_JSON_FILE = TEST_ID + '.mp4.info.json' INFO_JSON_FILE = TEST_ID + '.info.json'
DESCRIPTION_FILE = TEST_ID + '.mp4.description' DESCRIPTION_FILE = TEST_ID + '.mp4.description'
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐 EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
test URL: https://github.com/rg3/youtube-dl/issues/1892
This is a test video for youtube-dl. This is a test video for youtube-dl.

View File

@ -6,8 +6,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup from test.helper import FakeYDL
global_setup()
from youtube_dl.extractor import ( from youtube_dl.extractor import (
@ -16,6 +15,8 @@ from youtube_dl.extractor import (
YoutubeIE, YoutubeIE,
YoutubeChannelIE, YoutubeChannelIE,
YoutubeShowIE, YoutubeShowIE,
YoutubeTopListIE,
YoutubeSearchURLIE,
) )
@ -27,10 +28,10 @@ class TestYoutubeLists(unittest.TestCase):
def test_youtube_playlist(self): def test_youtube_playlist(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'ytdl test PL') self.assertEqual(result['title'], 'ytdl test PL')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
def test_youtube_playlist_noplaylist(self): def test_youtube_playlist_noplaylist(self):
@ -39,18 +40,18 @@ class TestYoutubeLists(unittest.TestCase):
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertEqual(result['_type'], 'url') self.assertEqual(result['_type'], 'url')
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg') self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg')
def test_issue_673(self): def test_issue_673(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('PLBB231211A4F62143')[0] result = ie.extract('PLBB231211A4F62143')
self.assertTrue(len(result['entries']) > 25) self.assertTrue(len(result['entries']) > 25)
def test_youtube_playlist_long(self): def test_youtube_playlist_long(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 799) self.assertTrue(len(result['entries']) >= 799)
@ -58,15 +59,15 @@ class TestYoutubeLists(unittest.TestCase):
#651 #651
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']]
self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('pElCt5oNDuI' in ytie_results)
self.assertFalse('KdPEApIVdWM' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results)
def test_youtube_playlist_empty(self): def test_youtube_playlist_empty(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(len(result['entries']), 0) self.assertEqual(len(result['entries']), 0)
@ -74,32 +75,32 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
# TODO find a > 100 (paginating?) videos course # TODO find a > 100 (paginating?) videos course
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
entries = result['entries'] entries = result['entries']
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
self.assertEqual(len(entries), 25) self.assertEqual(len(entries), 25)
self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0') self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
def test_youtube_channel(self): def test_youtube_channel(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeChannelIE(dl) ie = YoutubeChannelIE(dl)
#test paginated channel #test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90) self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel #test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18) self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self): def test_youtube_user(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubeUserIE(dl) ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320) self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self): def test_youtube_safe_search(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
self.assertEqual(len(result['entries']), 2) self.assertEqual(len(result['entries']), 2)
def test_youtube_show(self): def test_youtube_show(self):
@ -108,5 +109,39 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('http://www.youtube.com/show/airdisasters') result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 3) self.assertTrue(len(result) >= 3)
def test_youtube_mix(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
entries = result['entries']
self.assertTrue(len(entries) >= 20)
original_video = entries[0]
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
def test_youtube_toptracks(self):
print('Skipping: The playlist page gives error 500')
return
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
entries = result['entries']
self.assertEqual(len(entries), 100)
def test_youtube_toplist(self):
dl = FakeYDL()
ie = YoutubeTopListIE(dl)
result = ie.extract('yttoplist:music:Trending')
entries = result['entries']
self.assertTrue(len(entries) >= 5)
def test_youtube_search_url(self):
dl = FakeYDL()
ie = YoutubeSearchURLIE(dl)
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
entries = result['entries']
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'youtube-dl test video')
self.assertTrue(len(entries) >= 5)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -6,9 +6,6 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import global_setup
global_setup()
import io import io
import re import re
@ -31,10 +28,10 @@ _TESTS = [
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
), ),
( (
u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf', u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
u'swf', u'js',
82, 90,
u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321' u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
), ),
] ]

View File

@ -1,95 +0,0 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, global_setup, md5
global_setup()
from youtube_dl.extractor import YoutubeIE
class TestYoutubeSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
self.url = 'QRS8MkLhQmM'
def getInfoDict(self):
IE = YoutubeIE(self.DL)
info_dict = IE.extract(self.url)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'vtt'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
self.url = '8YoUxe5ncPo'
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslangs'] = ['it']
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
def test_youtube_multiple_langs(self):
self.url = 'QRS8MkLhQmM'
self.DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
self.DL.params['subtitleslangs'] = langs
subtitles = self.getSubtitles()
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

24
youtube-dl.plugin.zsh Normal file
View File

@ -0,0 +1,24 @@
# This allows the youtube-dl command to be installed in ZSH using antigen.
# Antigen is a bundle manager. It allows you to enhance the functionality of
# your zsh session by installing bundles and themes easily.
# Antigen documentation:
# http://antigen.sharats.me/
# https://github.com/zsh-users/antigen
# Install youtube-dl:
# antigen bundle rg3/youtube-dl
# Bundles installed by antigen are available for use immediately.
# Update youtube-dl (and all other antigen bundles):
# antigen update
# The antigen command will download the git repository to a folder and then
# execute an enabling script (this file). The complete process for loading the
# code is documented here:
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
# This specific script just aliases youtube-dl to the python script that this
# library provides. This requires updating the PYTHONPATH to ensure that the
# full set of code can be located.
alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"

View File

@ -1,616 +1,12 @@
import math # Legacy file for backwards compatibility, use youtube_dl.downloader instead!
import os from .downloader import FileDownloader as RealFileDownloader
import re from .downloader import get_suitable_downloader
import subprocess
import sys
import time
import traceback
if os.name == 'nt':
import ctypes
from .utils import *
class FileDownloader(object):
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk.
File downloaders accept a lot of parameters. In order not to saturate
the object constructor with arguments, it receives a dictionary of
options instead.
Available options:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
"""
params = None
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self.ydl = ydl
self._progress_hooks = []
self.params = params
@staticmethod
def format_bytes(bytes):
if bytes is None:
return 'N/A'
if type(bytes) is str:
bytes = float(bytes)
if bytes == 0.0:
exponent = 0
else:
exponent = int(math.log(bytes, 1024.0))
suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return int(new_max)
if rate < new_min:
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, **kargs)
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
self.to_screen('\033]0;%s\007' % message, skip_eol=True)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
def report_warning(self, *args, **kargs):
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit', None)
if rate_limit is None or byte_counter == 0:
return
now = time.time()
elapsed = now - start_time
if elapsed <= 0.0:
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
self.report_error(u'unable to rename file')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(encodeFilename(filename)):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return filetime
# Ignore obviously invalid dates
if filetime == 0:
return
try:
os.utime(filename, (time.time(), filetime))
except:
pass
return filetime
def report_destination(self, filename):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
eta_str = self.format_eta(eta)
percent_str = self.format_percent(percent)
speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
else:
self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
(clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except (UnicodeEncodeError) as err:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
return False
verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False):
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, args))
except ImportError:
shell_quote = repr
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
retval = subprocess.call(args)
while (retval == 2 or retval == 1) and not test:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
time.sleep(5.0) # This seems to be needed
retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == 1:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == 2 and cursize > 1024:
self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = 0
break
if retval == 0 or (test and retval == 2):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'rtmpdump exited with code %d' % retval)
return False
def _download_with_mplayer(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
try:
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
return False
# Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'mplayer exited with code %d' % retval)
return False
def _download_m3u8_with_ffmpeg(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
# Check for ffmpeg first
try:
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
return False
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False
# This class reproduces the old behaviour of FileDownloader
class FileDownloader(RealFileDownloader):
def _do_download(self, filename, info_dict): def _do_download(self, filename, info_dict):
url = info_dict['url'] real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
# Attempt to download using rtmpdump
if url.startswith('rtmp'):
return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None),
info_dict.get('page_url', None),
info_dict.get('play_path', None),
info_dict.get('tc_url', None))
# Attempt to download using mplayer
if url.startswith('mms') or url.startswith('rtsp'):
return self._download_with_mplayer(filename, url)
# m3u8 manifest are downloaded with ffmpeg
if determine_ext(url) == u'm3u8':
return self._download_m3u8_with_ffmpeg(filename, url)
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False):
request.add_header('Range','bytes=0-10240')
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', False):
self.report_resuming_byte(resume_len)
request.add_header('Range','bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
# Establish connection
try:
if count == 0 and 'urlhandle' in info_dict:
data = info_dict['urlhandle']
data = compat_urllib_request.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = compat_urllib_request.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
open_mode = 'wb'
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if count > retries:
self.report_error(u'giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
while True:
# Download and write
before = time.time()
data_block = data.read(block_size)
after = time.time()
if len(data_block) == 0:
break
byte_counter += len(data_block)
# Open file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error(u'unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError) as err:
self.to_stderr(u"\n")
self.report_error(u'unable to write data: %s' % str(err))
return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
eta = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if stream is None:
self.to_stderr(u"\n")
self.report_error(u'Did not get any data blocks')
return False
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True
def _hook_progress(self, status):
for ph in self._progress_hooks: for ph in self._progress_hooks:
ph(status) real_fd.add_progress_hook(ph)
return real_fd.download(filename, info_dict)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)

View File

@ -1,4 +0,0 @@
# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
from .extractor.common import InfoExtractor, SearchInfoExtractor
from .extractor import gen_extractors, get_info_extractor

File diff suppressed because it is too large Load Diff

View File

@ -32,65 +32,78 @@ __authors__ = (
'Ismael Mejía', 'Ismael Mejía',
'Steffan \'Ruirize\' James', 'Steffan \'Ruirize\' James',
'Andras Elso', 'Andras Elso',
'Jelle van der Waa',
'Marcin Cieślak',
'Anton Larionov',
'Takuya Tsuchida',
'Sergey M.',
'Michael Orlitzky',
'Chris Gahan',
'Saimadhav Heblikar',
'Mike Col',
'Oleg Prutz',
'pulpe',
'Andreas Schmitz',
'Michael Kaiser',
'Niklas Laxström',
'David Triendl',
'Anthony Weems',
'David Wagner',
'Juan C. Olivares',
'Mattias Harrysson',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
import codecs import codecs
import collections import io
import getpass import locale
import optparse import optparse
import os import os
import random import random
import re import re
import shlex import shlex
import socket
import subprocess
import sys import sys
import traceback
import platform
from .utils import ( from .utils import (
compat_cookiejar, compat_getpass,
compat_print, compat_print,
compat_str,
compat_urllib_request,
DateRange, DateRange,
decodeOption, decodeOption,
determine_ext, get_term_width,
DownloadError, DownloadError,
get_cachedir, get_cachedir,
make_HTTPS_handler,
MaxDownloadsReached, MaxDownloadsReached,
platform_name,
preferredencoding, preferredencoding,
read_batch_urls,
SameFileError, SameFileError,
setproctitle,
std_headers, std_headers,
write_string, write_string,
YoutubeDLHandler,
) )
from .update import update_self from .update import update_self
from .version import __version__
from .FileDownloader import ( from .FileDownloader import (
FileDownloader, FileDownloader,
) )
from .extractor import gen_extractors from .extractor import gen_extractors
from .version import __version__
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
from .PostProcessor import ( from .postprocessor import (
FFmpegMetadataPP, FFmpegMetadataPP,
FFmpegVideoConvertor, FFmpegVideoConvertor,
FFmpegExtractAudioPP, FFmpegExtractAudioPP,
FFmpegEmbedSubtitlePP, FFmpegEmbedSubtitlePP,
XAttrMetadataPP,
) )
def parseOpts(overrideArguments=None): def parseOpts(overrideArguments=None):
def _readOptions(filename_bytes): def _readOptions(filename_bytes, default=[]):
try: try:
optionf = open(filename_bytes) optionf = open(filename_bytes)
except IOError: except IOError:
return [] # silently skip if file is not present return default # silently skip if file is not present
try: try:
res = [] res = []
for l in optionf: for l in optionf:
@ -99,6 +112,43 @@ def parseOpts(overrideArguments=None):
optionf.close() optionf.close()
return res return res
def _readUserConf():
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
userConf = _readOptions(userConfFile, None)
if userConf is None:
appdata_dir = os.environ.get('appdata')
if appdata_dir:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
default=None)
if userConf is None:
userConf = _readOptions(
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
default=None)
if userConf is None:
userConf = []
return userConf
def _format_option_string(option): def _format_option_string(option):
''' ('-o', '--option') -> -o, --format METAVAR''' ''' ('-o', '--option') -> -o, --format METAVAR'''
@ -118,22 +168,9 @@ def parseOpts(overrideArguments=None):
def _comma_separated_values_options_callback(option, opt_str, value, parser): def _comma_separated_values_options_callback(option, opt_str, value, parser):
setattr(parser.values, option.dest, value.split(',')) setattr(parser.values, option.dest, value.split(','))
def _find_term_columns():
columns = os.environ.get('COLUMNS', None)
if columns:
return int(columns)
try:
sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out,err = sp.communicate()
return int(out.split()[1])
except:
pass
return None
def _hide_login_info(opts): def _hide_login_info(opts):
opts = list(opts) opts = list(opts)
for private_opt in ['-p', '--password', '-u', '--username']: for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
try: try:
i = opts.index(private_opt) i = opts.index(private_opt)
opts[i+1] = '<PRIVATE>' opts[i+1] = '<PRIVATE>'
@ -145,7 +182,7 @@ def parseOpts(overrideArguments=None):
max_help_position = 80 max_help_position = 80
# No need to wrap help messages if we're on a wide console # No need to wrap help messages if we're on a wide console
columns = _find_term_columns() columns = get_term_width()
if columns: max_width = columns if columns: max_width = columns
fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
@ -178,7 +215,7 @@ def parseOpts(overrideArguments=None):
general.add_option('-U', '--update', general.add_option('-U', '--update',
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors', general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False) action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
general.add_option('--abort-on-error', general.add_option('--abort-on-error',
action='store_false', dest='ignoreerrors', action='store_false', dest='ignoreerrors',
help='Abort downloading of further videos (in the playlist or the command line) if an error occurs') help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
@ -196,36 +233,78 @@ def parseOpts(overrideArguments=None):
general.add_option('--extractor-descriptions', general.add_option('--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions', action='store_true', dest='list_extractor_descriptions',
help='Output descriptions of all supported extractors', default=False) help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL') general.add_option(
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.') general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--prefer-insecure', action='store_true', dest='prefer_insecure',
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
general.add_option( general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR', '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .') help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
general.add_option( general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir', '--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching') help='Disable filesystem caching')
general.add_option(
'--socket-timeout', dest='socket_timeout',
type=float, default=None, help=u'Time to wait before giving up, in seconds')
general.add_option(
'--bidi-workaround', dest='bidi_workaround', action='store_true',
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
general.add_option('--default-search',
dest='default_search', metavar='PREFIX',
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
general.add_option(
'--ignore-config',
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
selection.add_option(
selection.add_option('--playlist-start', '--playlist-start',
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1) dest='playliststart', metavar='NUMBER', default=1, type=int,
selection.add_option('--playlist-end', help='playlist video to start at (default is %default)')
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
help='playlist video to end at (default is last)')
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) selection.add_option('--max-downloads', metavar='NUMBER',
dest='max_downloads', type=int, default=None,
help='Abort after downloading NUMBER files')
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None) selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None) selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None) selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None) selection.add_option(
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None) '--datebefore', metavar='DATE', dest='datebefore', default=None,
help='download only videos uploaded on or before this date (i.e. inclusive)')
selection.add_option(
'--dateafter', metavar='DATE', dest='dateafter', default=None,
help='download only videos uploaded on or after this date (i.e. inclusive)')
selection.add_option(
'--min-views', metavar='COUNT', dest='min_views',
default=None, type=int,
help="Do not download any videos with less than COUNT views",)
selection.add_option(
'--max-views', metavar='COUNT', dest='max_views',
default=None, type=int,
help="Do not download any videos with more than COUNT views",)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False) selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
selection.add_option('--age-limit', metavar='YEARS', dest='age_limit', selection.add_option('--age-limit', metavar='YEARS', dest='age_limit',
help='download only videos suitable for the given age', help='download only videos suitable for the given age',
default=None, type=int) default=None, type=int)
selection.add_option('--download-archive', metavar='FILE', selection.add_option('--download-archive', metavar='FILE',
dest='download_archive', dest='download_archive',
help='Download only videos not present in the archive file. Record all downloaded videos in it.') help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
selection.add_option(
'--include-ads', dest='include_ads',
action='store_true',
help='Download advertisements as well (experimental)')
selection.add_option(
'--youtube-include-dash-manifest', action='store_true',
dest='youtube_include_dash_manifest', default=False,
help='Try to download the DASH manifest on YouTube videos (experimental)')
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username') dest='username', metavar='USERNAME', help='account username')
@ -234,12 +313,12 @@ def parseOpts(overrideArguments=None):
authentication.add_option('-n', '--netrc', authentication.add_option('-n', '--netrc',
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
authentication.add_option('--video-password', authentication.add_option('--video-password',
dest='videopassword', metavar='PASSWORD', help='video password (vimeo only)') dest='videopassword', metavar='PASSWORD', help='video password (vimeo, smotri)')
video_format.add_option('-f', '--format', video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', default='best', action='store', dest='format', metavar='FORMAT', default=None,
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported') help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
video_format.add_option('--all-formats', video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all') action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats', video_format.add_option('--prefer-free-formats',
@ -247,7 +326,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('--max-quality', video_format.add_option('--max-quality',
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
video_format.add_option('-F', '--list-formats', video_format.add_option('-F', '--list-formats',
action='store_true', dest='listformats', help='list all available formats (currently youtube only)') action='store_true', dest='listformats', help='list all available formats')
subtitles.add_option('--write-sub', '--write-srt', subtitles.add_option('--write-sub', '--write-srt',
action='store_true', dest='writesubtitles', action='store_true', dest='writesubtitles',
@ -298,12 +377,18 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--get-description', verbosity.add_option('--get-description',
action='store_true', dest='getdescription', action='store_true', dest='getdescription',
help='simulate, quiet but print video description', default=False) help='simulate, quiet but print video description', default=False)
verbosity.add_option('--get-duration',
action='store_true', dest='getduration',
help='simulate, quiet but print video length', default=False)
verbosity.add_option('--get-filename', verbosity.add_option('--get-filename',
action='store_true', dest='getfilename', action='store_true', dest='getfilename',
help='simulate, quiet but print output filename', default=False) help='simulate, quiet but print output filename', default=False)
verbosity.add_option('--get-format', verbosity.add_option('--get-format',
action='store_true', dest='getformat', action='store_true', dest='getformat',
help='simulate, quiet but print output format', default=False) help='simulate, quiet but print output format', default=False)
verbosity.add_option('-j', '--dump-json',
action='store_true', dest='dumpjson',
help='simulate, quiet but print JSON information', default=False)
verbosity.add_option('--newline', verbosity.add_option('--newline',
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
verbosity.add_option('--no-progress', verbosity.add_option('--no-progress',
@ -315,10 +400,16 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='verbose', help='print various debugging information', default=False) action='store_true', dest='verbose', help='print various debugging information', default=False)
verbosity.add_option('--dump-intermediate-pages', verbosity.add_option('--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False, action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems(very verbose)') help='print downloaded pages to debug problems (very verbose)')
verbosity.add_option('--write-pages',
action='store_true', dest='write_pages', default=False,
help='Write downloaded intermediary pages to files in the current directory to debug problems')
verbosity.add_option('--youtube-print-sig-code', verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False, action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP) help=optparse.SUPPRESS_HELP)
verbosity.add_option('--print-traffic',
dest='debug_printtraffic', action='store_true', default=False,
help='Display sent and read HTTP traffic')
filesystem.add_option('-t', '--title', filesystem.add_option('-t', '--title',
@ -336,25 +427,31 @@ def parseOpts(overrideArguments=None):
'%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
'%(autonumber)s to get an automatically incremented number, ' '%(autonumber)s to get an automatically incremented number, '
'%(ext)s for the filename extension, ' '%(ext)s for the filename extension, '
'%(format)s for the format description (like "22 - 1280x720" or "HD")' '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
'%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), '
'%(upload_date)s for the upload date (YYYYMMDD), ' '%(upload_date)s for the upload date (YYYYMMDD), '
'%(extractor)s for the provider (youtube, metacafe, etc), ' '%(extractor)s for the provider (youtube, metacafe, etc), '
'%(id)s for the video id , %(playlist)s for the playlist the video is in, ' '%(id)s for the video id, %(playlist)s for the playlist the video is in, '
'%(playlist_index)s for the position in the playlist and %% for a literal percent. ' '%(playlist_index)s for the position in the playlist and %% for a literal percent. '
'%(height)s and %(width)s for the width and height of the video format. '
'%(resolution)s for a textual description of the resolution of the video format. '
'Use - to output to stdout. Can also be used to download to a different directory, ' 'Use - to output to stdout. Can also be used to download to a different directory, '
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
filesystem.add_option('--autonumber-size', filesystem.add_option('--autonumber-size',
dest='autonumber_size', metavar='NUMBER', dest='autonumber_size', metavar='NUMBER',
help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given') help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
filesystem.add_option('--restrict-filenames', filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames', action='store_true', dest='restrictfilenames',
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file', filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('--load-info',
dest='load_info_filename', metavar='FILE',
help='json file containing the video information (created with the "--write-json" option)')
filesystem.add_option('-w', '--no-overwrites', filesystem.add_option('-w', '--no-overwrites',
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
filesystem.add_option('-c', '--continue', filesystem.add_option('-c', '--continue',
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True) action='store_true', dest='continue_dl', help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.', default=True)
filesystem.add_option('--no-continue', filesystem.add_option('--no-continue',
action='store_false', dest='continue_dl', action='store_false', dest='continue_dl',
help='do not resume partially downloaded files (restart from beginning)') help='do not resume partially downloaded files (restart from beginning)')
@ -394,7 +491,13 @@ def parseOpts(overrideArguments=None):
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)') help='embed subtitles in the video (only for mp4 videos)')
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
help='add metadata to the files') help='write metadata to the video file')
postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
postproc.add_option('--prefer-avconv', action='store_false', dest='prefer_ffmpeg',
help='Prefer avconv over ffmpeg for running the postprocessors (default)')
postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
help='Prefer ffmpeg over avconv for running the postprocessors')
parser.add_option_group(general) parser.add_option_group(general)
@ -412,48 +515,39 @@ def parseOpts(overrideArguments=None):
if opts.verbose: if opts.verbose:
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n') write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else: else:
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
commandLineConf = sys.argv[1:] commandLineConf = sys.argv[1:]
if '--ignore-config' in commandLineConf:
systemConf = []
userConf = []
else:
systemConf = _readOptions('/etc/youtube-dl.conf')
if '--ignore-config' in systemConf:
userConf = []
else:
userConf = _readUserConf()
argv = systemConf + userConf + commandLineConf argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv) opts, args = parser.parse_args(argv)
if opts.verbose: if opts.verbose:
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
return parser, opts, args return parser, opts, args
def _real_main(argv=None): def _real_main(argv=None):
# Compatibility fixes for Windows # Compatibility fixes for Windows
if sys.platform == 'win32': if sys.platform == 'win32':
# https://github.com/rg3/youtube-dl/issues/820 # https://github.com/rg3/youtube-dl/issues/820
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
setproctitle(u'youtube-dl')
parser, opts, args = parseOpts(argv) parser, opts, args = parseOpts(argv)
# Open appropriate CookieJar
if opts.cookiefile is None:
jar = compat_cookiejar.CookieJar()
else:
try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.access(opts.cookiefile, os.R_OK):
jar.load()
except (IOError, OSError) as err:
if opts.verbose:
traceback.print_exc()
write_string(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent # Set user agent
if opts.user_agent is not None: if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent std_headers['User-Agent'] = opts.user_agent
@ -468,24 +562,22 @@ def _real_main(argv=None):
sys.exit(0) sys.exit(0)
# Batch file verification # Batch file verification
batchurls = [] batch_urls = []
if opts.batchfile is not None: if opts.batchfile is not None:
try: try:
if opts.batchfile == '-': if opts.batchfile == '-':
batchfd = sys.stdin batchfd = sys.stdin
else: else:
batchfd = open(opts.batchfile, 'r') batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
batchurls = batchfd.readlines() batch_urls = read_batch_urls(batchfd)
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
if opts.verbose: if opts.verbose:
write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
except IOError: except IOError:
sys.exit(u'ERROR: batch file could not be read') sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args all_urls = batch_urls + args
all_urls = [url.strip() for url in all_urls] all_urls = [url.strip() for url in all_urls]
_enc = preferredencoding()
opener = _setup_opener(jar=jar, opts=opts) all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
extractors = gen_extractors() extractors = gen_extractors()
@ -493,7 +585,6 @@ def _real_main(argv=None):
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()): for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)] matchedUrls = [url for url in all_urls if ie.suitable(url)]
all_urls = [url for url in all_urls if url not in matchedUrls]
for mu in matchedUrls: for mu in matchedUrls:
compat_print(u' ' + mu) compat_print(u' ' + mu)
sys.exit(0) sys.exit(0)
@ -516,13 +607,13 @@ def _real_main(argv=None):
if opts.usenetrc and (opts.username is not None or opts.password is not None): if opts.usenetrc and (opts.username is not None or opts.password is not None):
parser.error(u'using .netrc conflicts with giving username/password') parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None: if opts.password is not None and opts.username is None:
parser.error(u' account username missing\n') parser.error(u'account username missing\n')
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
parser.error(u'using output template conflicts with using title, video ID or auto number') parser.error(u'using output template conflicts with using title, video ID or auto number')
if opts.usetitle and opts.useid: if opts.usetitle and opts.useid:
parser.error(u'using title conflicts with using video ID') parser.error(u'using title conflicts with using video ID')
if opts.username is not None and opts.password is None: if opts.username is not None and opts.password is None:
opts.password = getpass.getpass(u'Type account password and press return:') opts.password = compat_getpass(u'Type account password and press [Return]: ')
if opts.ratelimit is not None: if opts.ratelimit is not None:
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
if numeric_limit is None: if numeric_limit is None:
@ -541,25 +632,17 @@ def _real_main(argv=None):
if opts.retries is not None: if opts.retries is not None:
try: try:
opts.retries = int(opts.retries) opts.retries = int(opts.retries)
except (TypeError, ValueError) as err: except (TypeError, ValueError):
parser.error(u'invalid retry count specified') parser.error(u'invalid retry count specified')
if opts.buffersize is not None: if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None: if numeric_buffersize is None:
parser.error(u'invalid buffer size specified') parser.error(u'invalid buffer size specified')
opts.buffersize = numeric_buffersize opts.buffersize = numeric_buffersize
try: if opts.playliststart <= 0:
opts.playliststart = int(opts.playliststart) raise ValueError(u'Playlist start must be positive')
if opts.playliststart <= 0: if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
raise ValueError(u'Playlist start must be positive') raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError) as err:
parser.error(u'invalid playlist start number specified')
try:
opts.playlistend = int(opts.playlistend)
if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
raise ValueError(u'Playlist end must be greater than playlist start')
except (TypeError, ValueError) as err:
parser.error(u'invalid playlist end number specified')
if opts.extractaudio: if opts.extractaudio:
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
parser.error(u'invalid audio format specified') parser.error(u'invalid audio format specified')
@ -574,6 +657,12 @@ def _real_main(argv=None):
date = DateRange.day(opts.date) date = DateRange.day(opts.date)
else: else:
date = DateRange(opts.dateafter, opts.datebefore) date = DateRange(opts.dateafter, opts.datebefore)
if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
# Do not download videos when there are audio-only formats
if opts.extractaudio and not opts.keepvideo and opts.format is None:
opts.format = 'bestaudio/best'
# --all-sub automatically sets --write-sub if --write-auto-sub is not given # --all-sub automatically sets --write-sub if --write-auto-sub is not given
# this was the old behaviour if only --all-sub was given. # this was the old behaviour if only --all-sub was given.
@ -592,27 +681,31 @@ def _real_main(argv=None):
or (opts.useid and u'%(id)s.%(ext)s') or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(title)s-%(id)s.%(ext)s') or u'%(title)s-%(id)s.%(ext)s')
if '%(ext)s' not in outtmpl and opts.extractaudio: if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error(u'Cannot download a video and extract audio into the same' parser.error(u'Cannot download a video and extract audio into the same'
u' file! Use "%%(ext)s" instead of %r' % u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
determine_ext(outtmpl, u'')) u' template'.format(outtmpl))
# YoutubeDL any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
ydl = YoutubeDL({ download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
ydl_opts = {
'usenetrc': opts.usenetrc, 'usenetrc': opts.usenetrc,
'username': opts.username, 'username': opts.username,
'password': opts.password, 'password': opts.password,
'videopassword': opts.videopassword, 'videopassword': opts.videopassword,
'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'quiet': (opts.quiet or any_printing),
'forceurl': opts.geturl, 'forceurl': opts.geturl,
'forcetitle': opts.gettitle, 'forcetitle': opts.gettitle,
'forceid': opts.getid, 'forceid': opts.getid,
'forcethumbnail': opts.getthumbnail, 'forcethumbnail': opts.getthumbnail,
'forcedescription': opts.getdescription, 'forcedescription': opts.getdescription,
'forceduration': opts.getduration,
'forcefilename': opts.getfilename, 'forcefilename': opts.getfilename,
'forceformat': opts.getformat, 'forceformat': opts.getformat,
'forcejson': opts.dumpjson,
'simulate': opts.simulate, 'simulate': opts.simulate,
'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 'skip_download': (opts.skip_download or opts.simulate or any_printing),
'format': opts.format, 'format': opts.format,
'format_limit': opts.format_limit, 'format_limit': opts.format_limit,
'listformats': opts.listformats, 'listformats': opts.listformats,
@ -651,111 +744,71 @@ def _real_main(argv=None):
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose, 'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages, 'dump_intermediate_pages': opts.dump_intermediate_pages,
'write_pages': opts.write_pages,
'test': opts.test, 'test': opts.test,
'keepvideo': opts.keepvideo, 'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize, 'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize, 'max_filesize': opts.max_filesize,
'min_views': opts.min_views,
'max_views': opts.max_views,
'daterange': date, 'daterange': date,
'cachedir': opts.cachedir, 'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code, 'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit, 'age_limit': opts.age_limit,
'download_archive': opts.download_archive, 'download_archive': download_archive_fn,
}) 'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure,
'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround,
'debug_printtraffic': opts.debug_printtraffic,
'prefer_ffmpeg': opts.prefer_ffmpeg,
'include_ads': opts.include_ads,
'default_search': opts.default_search,
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
}
with YoutubeDL(ydl_opts) as ydl:
ydl.print_debug_header()
ydl.add_default_info_extractors()
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
if opts.xattrs:
ydl.add_post_processor(XAttrMetadataPP())
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None):
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
if opts.verbose:
write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try: try:
sp = subprocess.Popen( if opts.load_info_filename is not None:
['git', 'rev-parse', '--short', 'HEAD'], retcode = ydl.download_with_info_file(opts.load_info_filename)
stdout=subprocess.PIPE, stderr=subprocess.PIPE, else:
cwd=os.path.dirname(os.path.abspath(__file__))) retcode = ydl.download(all_urls)
out, err = sp.communicate() except MaxDownloadsReached:
out = out.decode().strip() ydl.to_screen(u'--max-download limit reached, aborting.')
if re.match('[0-9a-f]+', out): retcode = 101
write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
proxy_map = {}
for handler in opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
ydl.add_default_info_extractors()
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
if not opts.update_self:
parser.error(u'you must provide at least one URL')
else:
sys.exit()
try:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
# Dump cookie jar if requested
if opts.cookiefile is not None:
try:
jar.save()
except (IOError, OSError):
sys.exit(u'ERROR: unable to save cookie jar')
sys.exit(retcode) sys.exit(retcode)
def _setup_opener(jar=None, opts=None, timeout=300):
if opts is None:
FakeOptions = collections.namedtuple(
'FakeOptions', ['proxy', 'no_check_certificate'])
opts = FakeOptions(proxy=None, no_check_certificate=False)
cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
if opts.proxy is not None:
if opts.proxy == '':
proxies = {}
else:
proxies = {'http': opts.proxy, 'https': opts.proxy}
else:
proxies = compat_urllib_request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(
https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders = []
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(timeout)
return opener
def main(argv=None): def main(argv=None):
try: try:
_real_main(argv) _real_main(argv)

View File

@ -1,4 +1,4 @@
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text'] __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
import base64 import base64
from math import ceil from math import ceil
@ -32,6 +32,31 @@ def aes_ctr_decrypt(data, key, counter):
return decrypted_data return decrypted_data
def aes_cbc_decrypt(data, key, iv):
"""
Decrypt with aes in CBC mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {int[]} iv 16-Byte IV
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
decrypted_data=[]
previous_cipher_block = iv
for i in range(block_count):
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
block += [0]*(BLOCK_SIZE_BYTES - len(block))
decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
def key_expansion(data): def key_expansion(data):
""" """
Generate key schedule Generate key schedule
@ -75,7 +100,7 @@ def aes_encrypt(data, expanded_key):
@returns {int[]} 16-Byte cipher @returns {int[]} 16-Byte cipher
""" """
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1): for i in range(1, rounds+1):
data = sub_bytes(data) data = sub_bytes(data)
@ -83,6 +108,26 @@ def aes_encrypt(data, expanded_key):
if i != rounds: if i != rounds:
data = mix_columns(data) data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]) data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
return data
def aes_decrypt(data, expanded_key):
"""
Decrypt one block with aes
@param {int[]} data 16-Byte cipher
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte state
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
for i in range(rounds, 0, -1):
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
if i != rounds:
data = mix_columns_inv(data)
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data return data
@ -139,14 +184,69 @@ SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
MIX_COLUMN_MATRIX = ((2,3,1,1), SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
(1,2,3,1), 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
(1,1,2,3), 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
(3,1,1,2)) 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
(0x1,0x2,0x3,0x1),
(0x1,0x1,0x2,0x3),
(0x3,0x1,0x1,0x2))
MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
(0x9,0xE,0xB,0xD),
(0xD,0x9,0xE,0xB),
(0xB,0xD,0x9,0xE))
RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
def sub_bytes(data): def sub_bytes(data):
return [SBOX[x] for x in data] return [SBOX[x] for x in data]
def sub_bytes_inv(data):
return [SBOX_INV[x] for x in data]
def rotate(data): def rotate(data):
return data[1:] + [data[0]] return data[1:] + [data[0]]
@ -160,30 +260,31 @@ def key_schedule_core(data, rcon_iteration):
def xor(data1, data2): def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)] return [x^y for x, y in zip(data1, data2)]
def mix_column(data): def rijndael_mul(a, b):
if(a==0 or b==0):
return 0
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
def mix_column(data, matrix):
data_mixed = [] data_mixed = []
for row in range(4): for row in range(4):
mixed = 0 mixed = 0
for column in range(4): for column in range(4):
addend = data[column] # xor is (+) and (-)
if MIX_COLUMN_MATRIX[row][column] in (2,3): mixed ^= rijndael_mul(data[column], matrix[row][column])
addend <<= 1
if addend > 0xff:
addend &= 0xff
addend ^= 0x1b
if MIX_COLUMN_MATRIX[row][column] == 3:
addend ^= data[column]
mixed ^= addend & 0xff
data_mixed.append(mixed) data_mixed.append(mixed)
return data_mixed return data_mixed
def mix_columns(data): def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
data_mixed = [] data_mixed = []
for i in range(4): for i in range(4):
column = data[i*4 : (i+1)*4] column = data[i*4 : (i+1)*4]
data_mixed += mix_column(column) data_mixed += mix_column(column, matrix)
return data_mixed return data_mixed
def mix_columns_inv(data):
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
def shift_rows(data): def shift_rows(data):
data_shifted = [] data_shifted = []
for column in range(4): for column in range(4):
@ -191,6 +292,13 @@ def shift_rows(data):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] ) data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted return data_shifted
def shift_rows_inv(data):
data_shifted = []
for column in range(4):
for row in range(4):
data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
return data_shifted
def inc(data): def inc(data):
data = data[:] # copy data = data[:] # copy
for i in range(len(data)-1,-1,-1): for i in range(len(data)-1,-1,-1):

View File

@ -0,0 +1,29 @@
from __future__ import unicode_literals
from .common import FileDownloader
from .hls import HlsFD
from .http import HttpFD
from .mplayer import MplayerFD
from .rtmp import RtmpFD
from .f4m import F4mFD
from ..utils import (
determine_ext,
)
def get_suitable_downloader(info_dict):
"""Get the downloader class that can handle the info dict."""
url = info_dict['url']
protocol = info_dict.get('protocol')
if url.startswith('rtmp'):
return RtmpFD
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
return HlsFD
if url.startswith('mms') or url.startswith('rtsp'):
return MplayerFD
if determine_ext(url) == 'f4m':
return F4mFD
else:
return HttpFD

View File

@ -0,0 +1,316 @@
import os
import re
import sys
import time
from ..utils import (
encodeFilename,
timeconvert,
format_bytes,
)
class FileDownloader(object):
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
actual video file and writing it to disk.
File downloaders accept a lot of parameters. In order not to saturate
the object constructor with arguments, it receives a dictionary of
options instead.
Available options:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
Subclasses of this one must re-define the real_download method.
"""
params = None
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
self.ydl = ydl
self._progress_hooks = []
self.params = params
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return None
rate = float(current) / dif
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
return int(new_max)
if rate < new_min:
return int(new_min)
return int(rate)
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, **kargs)
def to_stderr(self, message):
self.ydl.to_screen(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
def trouble(self, *args, **kargs):
self.ydl.trouble(*args, **kargs)
def report_warning(self, *args, **kargs):
self.ydl.report_warning(*args, **kargs)
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
def slow_down(self, start_time, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit', None)
if rate_limit is None or byte_counter == 0:
return
now = time.time()
elapsed = now - start_time
if elapsed <= 0.0:
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
except (IOError, OSError) as err:
self.report_error(u'unable to rename file: %s' % str(err))
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
if last_modified_hdr is None:
return
if not os.path.isfile(encodeFilename(filename)):
return
timestr = last_modified_hdr
if timestr is None:
return
filetime = timeconvert(timestr)
if filetime is None:
return filetime
# Ignore obviously invalid dates
if filetime == 0:
return
try:
os.utime(filename, (time.time(), filetime))
except:
pass
return filetime
def report_destination(self, filename):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def _report_progress_status(self, msg, is_last_line=False):
fullmsg = u'[download] ' + msg
if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg)
else:
if os.name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length',
0)
if prev_len > len(fullmsg):
fullmsg += u' ' * (prev_len - len(fullmsg))
self._report_progress_prev_line_length = len(fullmsg)
clear_line = u'\r'
else:
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title(u'youtube-dl ' + msg)
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
if eta is not None:
eta_str = self.format_eta(eta)
else:
eta_str = 'Unknown ETA'
if percent is not None:
percent_str = self.format_percent(percent)
else:
percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
msg = (u'%s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
self._report_progress_status(msg)
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
if self.params.get('noprogress', False):
return
downloaded_str = format_bytes(downloaded_data_len)
speed_str = self.format_speed(speed)
elapsed_str = FileDownloader.format_seconds(elapsed)
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
self._report_progress_status(msg)
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
self._report_progress_status(
(u'100%% of %s in %s' %
(data_len_str, self.format_seconds(tot_time))),
is_last_line=True)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def download(self, filename, info_dict):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
return self.real_download(filename, info_dict)
def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses."""
raise NotImplementedError(u'This method must be implemented by sublcasses')
def _hook_progress(self, status):
for ph in self._progress_hooks:
ph(status)
def add_progress_hook(self, ph):
""" ph gets called on download progress, with a dictionary with the entries
* filename: The final filename
* status: One of "downloading" and "finished"
It can also have some of the following entries:
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
"""
self._progress_hooks.append(ph)

View File

@ -0,0 +1,314 @@
from __future__ import unicode_literals
import base64
import io
import itertools
import os
import time
import xml.etree.ElementTree as etree
from .common import FileDownloader
from .http import HttpFD
from ..utils import (
struct_pack,
struct_unpack,
compat_urlparse,
format_bytes,
encodeFilename,
sanitize_open,
)
class FlvReader(io.BytesIO):
"""
Reader for Flv files
The file format is documented in https://www.adobe.com/devnet/f4v.html
"""
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
return struct_unpack('!Q', self.read(8))[0]
def read_unsigned_int(self):
return struct_unpack('!I', self.read(4))[0]
def read_unsigned_char(self):
return struct_unpack('!B', self.read(1))[0]
def read_string(self):
res = b''
while True:
char = self.read(1)
if char == b'\x00':
break
res += char
return res
def read_box_info(self):
"""
Read a box and return the info as a tuple: (box_size, box_type, box_data)
"""
real_size = size = self.read_unsigned_int()
box_type = self.read(4)
header_end = 8
if size == 1:
real_size = self.read_unsigned_long_long()
header_end = 16
return real_size, box_type, self.read(real_size-header_end)
def read_asrt(self):
# version
self.read_unsigned_char()
# flags
self.read(3)
quality_entry_count = self.read_unsigned_char()
# QualityEntryCount
for i in range(quality_entry_count):
self.read_string()
segment_run_count = self.read_unsigned_int()
segments = []
for i in range(segment_run_count):
first_segment = self.read_unsigned_int()
fragments_per_segment = self.read_unsigned_int()
segments.append((first_segment, fragments_per_segment))
return {
'segment_run': segments,
}
def read_afrt(self):
# version
self.read_unsigned_char()
# flags
self.read(3)
# time scale
self.read_unsigned_int()
quality_entry_count = self.read_unsigned_char()
# QualitySegmentUrlModifiers
for i in range(quality_entry_count):
self.read_string()
fragments_count = self.read_unsigned_int()
fragments = []
for i in range(fragments_count):
first = self.read_unsigned_int()
first_ts = self.read_unsigned_long_long()
duration = self.read_unsigned_int()
if duration == 0:
discontinuity_indicator = self.read_unsigned_char()
else:
discontinuity_indicator = None
fragments.append({
'first': first,
'ts': first_ts,
'duration': duration,
'discontinuity_indicator': discontinuity_indicator,
})
return {
'fragments': fragments,
}
def read_abst(self):
# version
self.read_unsigned_char()
# flags
self.read(3)
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
self.read(1)
# time scale
self.read_unsigned_int()
# CurrentMediaTime
self.read_unsigned_long_long()
# SmpteTimeCodeOffset
self.read_unsigned_long_long()
self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char()
# ServerEntryTable
for i in range(server_count):
self.read_string()
quality_count = self.read_unsigned_char()
# QualityEntryTable
for i in range(quality_count):
self.read_string()
# DrmData
self.read_string()
# MetaData
self.read_string()
segments_count = self.read_unsigned_char()
segments = []
for i in range(segments_count):
box_size, box_type, box_data = self.read_box_info()
assert box_type == b'asrt'
segment = FlvReader(box_data).read_asrt()
segments.append(segment)
fragments_run_count = self.read_unsigned_char()
fragments = []
for i in range(fragments_run_count):
box_size, box_type, box_data = self.read_box_info()
assert box_type == b'afrt'
fragments.append(FlvReader(box_data).read_afrt())
return {
'segments': segments,
'fragments': fragments,
}
def read_bootstrap_info(self):
total_size, box_type, box_data = self.read_box_info()
assert box_type == b'abst'
return FlvReader(box_data).read_abst()
def read_bootstrap_info(bootstrap_bytes):
return FlvReader(bootstrap_bytes).read_bootstrap_info()
def build_fragments_list(boot_info):
""" Return a list of (segment, fragment) for each fragment in the video """
res = []
segment_run_table = boot_info['segments'][0]
# I've only found videos with one segment
segment_run_entry = segment_run_table['segment_run'][0]
n_frags = segment_run_entry[1]
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
first_frag_number = fragment_run_entry_table[0]['first']
for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)):
res.append((1, frag_number))
return res
def write_flv_header(stream, metadata):
"""Writes the FLV header and the metadata to stream"""
# FLV header
stream.write(b'FLV\x01')
stream.write(b'\x05')
stream.write(b'\x00\x00\x00\x09')
# FLV File body
stream.write(b'\x00\x00\x00\x00')
# FLVTAG
# Script data
stream.write(b'\x12')
# Size of the metadata with 3 bytes
stream.write(struct_pack('!L', len(metadata))[1:])
stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
stream.write(metadata)
# Magic numbers extracted from the output files produced by AdobeHDS.php
#(https://github.com/K-S-V/Scripts)
stream.write(b'\x00\x00\x01\x73')
def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
class F4mFD(FileDownloader):
"""
A downloader for f4m manifests or AdobeHDS.
"""
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
http_dl = HttpQuietDownloader(self.ydl,
{
'continuedl': True,
'quiet': True,
'noprogress': True,
'test': self.params.get('test', False),
})
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False):
# We only download the first fragment
fragments_list = fragments_list[:1]
total_frags = len(fragments_list)
tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
write_flv_header(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress
# hook
state = {
'downloaded_bytes': 0,
'frag_counter': 0,
}
start = time.time()
def frag_progress_hook(status):
frag_total_bytes = status.get('total_bytes', 0)
estimated_size = (state['downloaded_bytes'] +
(total_frags - state['frag_counter']) * frag_total_bytes)
if status['status'] == 'finished':
state['downloaded_bytes'] += frag_total_bytes
state['frag_counter'] += 1
progress = self.calc_percent(state['frag_counter'], total_frags)
byte_counter = state['downloaded_bytes']
else:
frag_downloaded_bytes = status['downloaded_bytes']
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
frag_progress = self.calc_percent(frag_downloaded_bytes,
frag_total_bytes)
progress = self.calc_percent(state['frag_counter'], total_frags)
progress += frag_progress / float(total_frags)
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
self.report_progress(progress, format_bytes(estimated_size),
status.get('speed'), eta)
http_dl.add_progress_hook(frag_progress_hook)
frags_filenames = []
for (seg_i, frag_i) in fragments_list:
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
url = base_url + name
frag_filename = '%s-%s' % (tmpfilename, name)
success = http_dl.download(frag_filename, {'url': url})
if not success:
return False
with open(frag_filename, 'rb') as down:
down_data = down.read()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
frags_filenames.append(frag_filename)
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
self.try_rename(tmpfilename, filename)
for frag_file in frags_filenames:
os.remove(frag_file)
fsize = os.path.getsize(encodeFilename(filename))
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True

View File

@ -0,0 +1,44 @@
import os
import subprocess
from .common import FileDownloader
from ..utils import (
encodeFilename,
)
class HlsFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
'-bsf:a', 'aac_adtstoasc', tmpfilename]
for program in ['avconv', 'ffmpeg']:
try:
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
break
except (OSError, IOError):
pass
else:
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (cmd[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False

View File

@ -0,0 +1,187 @@
import os
import time
from .common import FileDownloader
from ..utils import (
compat_urllib_request,
compat_urllib_error,
ContentTooShortError,
encodeFilename,
sanitize_open,
format_bytes,
)
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
tmpfilename = self.temp_name(filename)
stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
if 'user_agent' in info_dict:
headers['Youtubedl-user-agent'] = info_dict['user_agent']
basic_request = compat_urllib_request.Request(url, None, headers)
request = compat_urllib_request.Request(url, None, headers)
if self.params.get('test', False):
request.add_header('Range', 'bytes=0-10240')
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', False):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
# Establish connection
try:
data = self.ydl.urlopen(request)
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
raise
elif err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = self.ydl.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# changing the file size slightly and causing problems for some users. So
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'filename': filename,
'status': 'finished',
})
return True
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
resume_len = 0
open_mode = 'wb'
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if count > retries:
self.report_error(u'giving up after %s retries' % retries)
return False
data_len = data.info().get('Content-length', None)
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
while True:
# Download and write
before = time.time()
data_block = data.read(block_size)
after = time.time()
if len(data_block) == 0:
break
byte_counter += len(data_block)
# Open file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error(u'unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError) as err:
self.to_stderr(u"\n")
self.report_error(u'unable to write data: %s' % str(err))
return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
# Apply rate limit
self.slow_down(start, byte_counter - resume_len)
if stream is None:
self.to_stderr(u"\n")
self.report_error(u'Did not get any data blocks')
return False
stream.close()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
})
return True

View File

@ -0,0 +1,40 @@
import os
import subprocess
from .common import FileDownloader
from ..utils import (
encodeFilename,
)
class MplayerFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
try:
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0])
return False
# Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'mplayer exited with code %d' % retval)
return False

View File

@ -0,0 +1,195 @@
from __future__ import unicode_literals
import os
import re
import subprocess
import sys
import time
from .common import FileDownloader
from ..utils import (
encodeFilename,
format_bytes,
)
class RtmpFD(FileDownloader):
def real_download(self, filename, info_dict):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = ''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
data_len_str = '~' + format_bytes(data_len)
self.report_progress(percent, data_len_str, speed, eta)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1))*1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'speed': speed,
})
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] '+line)
proc.wait()
if not cursor_in_new_line:
self.to_screen('')
return proc.returncode
url = info_dict['url']
player_url = info_dict.get('player_url', None)
page_url = info_dict.get('page_url', None)
app = info_dict.get('app', None)
play_path = info_dict.get('play_path', None)
tc_url = info_dict.get('tc_url', None)
flash_version = info_dict.get('flash_version', None)
live = info_dict.get('rtmp_live', False)
conn = info_dict.get('rtmp_conn', None)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
test = self.params.get('test', False)
# Check for rtmpdump first
try:
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error('RTMP download detected but "rtmpdump" could not be run')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
# the connection was interrumpted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
basic_args += ['--pageUrl', page_url]
if app is not None:
basic_args += ['--app', app]
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
if test:
basic_args += ['--stop', '1']
if flash_version is not None:
basic_args += ['--flashVer', flash_version]
if live:
basic_args += ['--live']
if conn:
basic_args += ['--conn', conn]
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
if sys.platform == 'win32' and sys.version_info < (3, 0):
# Windows subprocess module does not actually support Unicode
# on Python 2.x
# See http://stackoverflow.com/a/9951851/35070
subprocess_encoding = sys.getfilesystemencoding()
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
else:
subprocess_encoding = None
if self.params.get('verbose', False):
if subprocess_encoding:
str_args = [
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
for a in args]
else:
str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
except ImportError:
shell_quote = repr
self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
RD_SUCCESS = 0
RD_FAILED = 1
RD_INCOMPLETE = 2
RD_NO_CONNECT = 3
retval = run_rtmpdump(args)
if retval == RD_NO_CONNECT:
self.report_error('[rtmpdump] Could not connect to RTMP server.')
return False
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == RD_FAILED:
break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
retval = RD_SUCCESS
break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr('\n')
self.report_error('rtmpdump exited with code %d' % retval)
return False

View File

@ -1,100 +1,193 @@
from .appletrailers import AppleTrailersIE from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aol import AolIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .ard import ARDIE from .ard import ARDIE
from .arte import ( from .arte import (
ArteTvIE, ArteTvIE,
ArteTVPlus7IE, ArteTVPlus7IE,
ArteTVCreativeIE, ArteTVCreativeIE,
ArteTVConcertIE,
ArteTVFutureIE, ArteTVFutureIE,
ArteTVDDCIE,
) )
from .auengine import AUEngineIE from .auengine import AUEngineIE
from .bandcamp import BandcampIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .br import BRIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .c56 import C56IE from .c56 import C56IE
from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE from .cinemassacre import CinemassacreIE
from .cnn import CNNIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE
from .cmt import CMTIE
from .cnn import (
CNNIE,
CNNBlogsIE,
)
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE from .cspan import CSpanIE
from .d8 import D8IE
from .dailymotion import ( from .dailymotion import (
DailymotionIE, DailymotionIE,
DailymotionPlaylistIE, DailymotionPlaylistIE,
DailymotionUserIE, DailymotionUserIE,
) )
from .daum import DaumIE from .daum import DaumIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .dropbox import DropboxIE
from .ebaumsworld import EbaumsWorldIE from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE from .ehow import EHowIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
from .eitb import EitbIE
from .elpais import ElPaisIE
from .engadget import EngadgetIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .faz import FazIE from .faz import FazIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
from .fktv import ( from .fktv import (
FKTVIE, FKTVIE,
FKTVPosteckeIE, FKTVPosteckeIE,
) )
from .flickr import FlickrIE from .flickr import FlickrIE
from .fourtube import FourTubeIE
from .franceinter import FranceInterIE
from .francetv import ( from .francetv import (
PluzzIE, PluzzIE,
FranceTvInfoIE, FranceTvInfoIE,
France2IE, FranceTVIE,
GenerationQuoiIE GenerationQuoiIE,
CultureboxIE,
) )
from .freesound import FreesoundIE from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE
from .gamespot import GameSpotIE from .gamespot import GameSpotIE
from .gametrailers import GametrailersIE from .gametrailers import GametrailersIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE from .generic import GenericIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .hark import HarkIE from .hark import HarkIE
from .helsinki import HelsinkiIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .huffpost import HuffPostIE
from .hypem import HypemIE from .hypem import HypemIE
from .ign import IGNIE, OneUPIE from .ign import IGNIE, OneUPIE
from .imdb import (
ImdbIE,
ImdbListIE
)
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE, InstagramUserIE
from .internetvideoarchive import InternetVideoArchiveIE from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
from .ivi import (
IviIE,
IviCompilationIE
)
from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE
from .kankan import KankanIE from .kankan import KankanIE
from .keezmovies import KeezMoviesIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .keek import KeekIE from .keek import KeekIE
from .kontrtube import KontrTubeIE
from .la7 import LA7IE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE from .livestream import LivestreamIE, LivestreamOriginalIE
from .lynda import (
LyndaIE,
LyndaCourseIE
)
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mtv import MTVIE from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .mtv import (
MTVIE,
MTVIggyIE,
)
from .muzu import MuzuTVIE from .muzu import MuzuTVIE
from .myspace import MySpaceIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE from .nbc import (
NBCIE,
NBCNewsIE,
)
from .ndr import NDRIE
from .ndtv import NDTVIE
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nfb import NFBIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .normalboots import NormalbootsIE
from .novamov import NovaMovIE
from .nowness import NownessIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import ORFIE
from .parliamentliveuk import ParliamentLiveUKIE
from .pbs import PBSIE from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .playvid import PlayvidIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import PornHubIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .radiofrance import RadioFranceIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
@ -102,65 +195,126 @@ from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rutube import RutubeIE from .rts import RTSIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
RutubeMovieIE,
RutubePersonIE,
)
from .rutv import RUTVIE
from .savefrom import SaveFromIE
from .servingsys import ServingSysIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE from .slashdot import SlashdotIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .smotri import (
SmotriIE,
SmotriCommunityIE,
SmotriUserIE,
SmotriBroadcastIE,
)
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import SouthParkStudiosIE from .southparkstudios import (
SouthParkStudiosIE,
SouthparkDeIE,
)
from .space import SpaceIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .spike import SpikeIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE from .statigram import StatigramIE
from .steam import SteamIE from .steam import SteamIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .testurl import TestURLIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .theplatform import ThePlatformIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .tinypic import TinyPicIE
from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .trutube import TruTubeIE
from .tube8 import Tube8IE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tutv import TutvIE from .tutv import TutvIE
from .tvigle import TvigleIE
from .tvp import TvpIE
from .udemy import (
UdemyIE,
UdemyCourseIE
)
from .unistra import UnistraIE from .unistra import UnistraIE
from .ustream import UstreamIE, UstreamChannelIE from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veehd import VeeHDIE from .veehd import VeeHDIE
from .veoh import VeohIE from .veoh import VeohIE
from .vesti import VestiIE
from .vevo import VevoIE from .vevo import VevoIE
from .vice import ViceIE from .vice import ViceIE
from .viddler import ViddlerIE from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .vimeo import VimeoIE, VimeoChannelIE from .vimeo import (
VimeoIE,
VimeoChannelIE,
VimeoUserIE,
VimeoAlbumIE,
VimeoGroupsIE,
VimeoReviewIE,
)
from .vine import VineIE from .vine import VineIE
from .viki import VikiIE
from .vk import VKIE
from .vube import VubeIE
from .wat import WatIE from .wat import WatIE
from .websurg import WeBSurgIE from .wdr import WDRIE
from .weibo import WeiboIE from .weibo import WeiboIE
from .wimp import WimpIE from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .xbef import XBefIE
from .xhamster import XHamsterIE from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE
from .yahoo import YahooIE, YahooSearchIE from .xtube import XTubeUserIE, XTubeIE
from .yahoo import (
YahooIE,
YahooNewsIE,
YahooSearchIE,
)
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE
from .youtube import ( from .youtube import (
YoutubeIE, YoutubeIE,
YoutubePlaylistIE,
YoutubeSearchIE,
YoutubeUserIE,
YoutubeChannelIE, YoutubeChannelIE,
YoutubeFavouritesIE,
YoutubeHistoryIE,
YoutubePlaylistIE,
YoutubeRecommendedIE,
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
YoutubeShowIE, YoutubeShowIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeRecommendedIE, YoutubeTopListIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
YoutubeFavouritesIE,
) )
from .zdf import ZDFIE from .zdf import ZDFIE

View File

@ -0,0 +1,32 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course'
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
playlist_id = m.group('id')
webpage = self._download_webpage(url, playlist_id)
title = self._html_search_regex(
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
description = self._html_search_regex(
r'<p class="excerpt"[^>]*?>(.*?)</p>',
webpage, u'description', fatal=False)
urls = re.findall(
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
webpage)
entries = [self.url_result(u) for u in urls]
return {
'_type': 'playlist',
'id': playlist_id,
'title': title,
'description': description,
'entries': entries,
}

View File

@ -13,12 +13,12 @@ from ..utils import (
class AddAnimeIE(InfoExtractor): class AddAnimeIE(InfoExtractor):
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)' _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'AddAnime' IE_NAME = u'AddAnime'
_TEST = { _TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
u'file': u'24MR3YO5SAS9.flv', u'file': u'24MR3YO5SAS9.mp4',
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1', u'md5': u'72954ea10bc979ab5e2eb288b21425a0',
u'info_dict': { u'info_dict': {
u"description": u"One Piece 606", u"description": u"One Piece 606",
u"title": u"One Piece 606" u"title": u"One Piece 606"
@ -31,7 +31,8 @@ class AddAnimeIE(InfoExtractor):
video_id = mobj.group('video_id') video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
except ExtractorError as ee: except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError): if not isinstance(ee.cause, compat_HTTPError) or \
ee.cause.code != 503:
raise raise
redir_webpage = ee.cause.read().decode('utf-8') redir_webpage = ee.cause.read().decode('utf-8')
@ -60,16 +61,26 @@ class AddAnimeIE(InfoExtractor):
note=u'Confirming after redirect') note=u'Confirming after redirect')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r"var normal_video_file = '(.*?)';", formats = []
webpage, u'video file URL') for format_id in ('normal', 'hq'):
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
video_url = self._search_regex(rex, webpage, u'video file URLx',
fatal=False)
if not video_url:
continue
formats.append({
'format_id': format_id,
'url': video_url,
})
if not formats:
raise ExtractorError(u'Cannot find any video format!')
video_title = self._og_search_title(webpage) video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage) video_description = self._og_search_description(webpage)
return { return {
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'url': video_url, 'formats': formats,
'ext': 'flv',
'title': video_title, 'title': video_title,
'description': video_description 'description': video_description
} }

View File

@ -0,0 +1,69 @@
# encoding: utf-8
from __future__ import unicode_literals
import datetime
import re
from .common import InfoExtractor
class AftonbladetIE(InfoExtractor):
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
_TEST = {
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
'info_dict': {
'id': 'article36015',
'ext': 'mp4',
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
'upload_date': '20140306',
},
}
def _real_extract(self, url):
mobj = re.search(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
# find internal video meta data
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
internal_meta_id = self._html_search_regex(
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
internal_meta_url = META_URL % internal_meta_id
internal_meta_json = self._download_json(
internal_meta_url, video_id, 'Downloading video meta data')
# find internal video formats
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
internal_video_id = internal_meta_json['videoId']
internal_formats_url = FORMATS_URL % internal_video_id
internal_formats_json = self._download_json(
internal_formats_url, video_id, 'Downloading video formats')
formats = []
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
p = fmt['paths'][0]
formats.append({
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
'ext': 'mp4',
'width': fmt['width'],
'height': fmt['height'],
'tbr': fmt['bitrate'],
'protocol': 'http',
})
self._sort_formats(formats)
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
upload_date = timestamp.strftime('%Y%m%d')
return {
'id': video_id,
'title': internal_meta_json['title'],
'formats': formats,
'thumbnail': internal_meta_json['imageUrl'],
'description': internal_meta_json['shortPreamble'],
'upload_date': upload_date,
'duration': internal_meta_json['duration'],
'view_count': internal_meta_json['views'],
}

View File

@ -0,0 +1,53 @@
import re
from .common import InfoExtractor
class AnitubeIE(InfoExtractor):
IE_NAME = u'anitube.se'
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.anitube.se/video/36621',
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
u'file': u'36621.mp4',
u'info_dict': {
u'id': u'36621',
u'ext': u'mp4',
u'title': u'Recorder to Randoseru 01',
},
u'skip': u'Blocked in the US',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
webpage, u'key')
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
key)
video_title = config_xml.find('title').text
formats = []
video_url = config_xml.find('file')
if video_url is not None:
formats.append({
'format_id': 'sd',
'url': video_url.text,
})
video_url = config_xml.find('filehd')
if video_url is not None:
formats.append({
'format_id': 'hd',
'url': video_url.text,
})
return {
'id': video_id,
'title': video_title,
'formats': formats
}

View File

@ -0,0 +1,28 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .fivemin import FiveMinIE
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
_VALID_URL = r'http://on\.aol\.com/video/.*-(?P<id>\d+)($|\?)'
_TEST = {
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': {
'id': '518167793',
'ext': 'mp4',
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
},
'add_ie': ['FiveMin'],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
self.to_screen('Downloading 5min.com video %s' % video_id)
return FiveMinIE._build_result(video_id)

View File

@ -0,0 +1,56 @@
#coding: utf-8
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
HEADRequest,
)
class AparatIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = {
u'url': u'http://www.aparat.com/v/wP8On',
u'file': u'wP8On.mp4',
u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
u'info_dict': {
u"title": u"تیم گلکسی 11 - زومیت",
},
#u'skip': u'Extremely unreliable',
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
video_id + u'/vt/frame')
webpage = self._download_webpage(embed_url, video_id)
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url)
res = self._request_webpage(
req, video_id, note=u'Testing video URL %d' % i, errnote=False)
if res:
break
else:
raise ExtractorError(u'No working video URLs found')
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
thumbnail = self._search_regex(
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View File

@ -1,5 +1,6 @@
from __future__ import unicode_literals
import re import re
import xml.etree.ElementTree
import json import json
from .common import InfoExtractor from .common import InfoExtractor
@ -10,48 +11,48 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor): class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = { _TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [ "playlist": [
{ {
u"file": u"manofsteel-trailer4.mov", "file": "manofsteel-trailer4.mov",
u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
u"info_dict": { "info_dict": {
u"duration": 111, "duration": 111,
u"title": u"Trailer 4", "title": "Trailer 4",
u"upload_date": u"20130523", "upload_date": "20130523",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
}, },
{ {
u"file": u"manofsteel-trailer3.mov", "file": "manofsteel-trailer3.mov",
u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", "md5": "b8017b7131b721fb4e8d6f49e1df908c",
u"info_dict": { "info_dict": {
u"duration": 182, "duration": 182,
u"title": u"Trailer 3", "title": "Trailer 3",
u"upload_date": u"20130417", "upload_date": "20130417",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
}, },
{ {
u"file": u"manofsteel-trailer.mov", "file": "manofsteel-trailer.mov",
u"md5": u"d0f1e1150989b9924679b441f3404d48", "md5": "d0f1e1150989b9924679b441f3404d48",
u"info_dict": { "info_dict": {
u"duration": 148, "duration": 148,
u"title": u"Trailer", "title": "Trailer",
u"upload_date": u"20121212", "upload_date": "20121212",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
}, },
{ {
u"file": u"manofsteel-teaser.mov", "file": "manofsteel-teaser.mov",
u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", "md5": "5fe08795b943eb2e757fa95cb6def1cb",
u"info_dict": { "info_dict": {
u"duration": 93, "duration": 93,
u"title": u"Teaser", "title": "Teaser",
u"upload_date": u"20120721", "upload_date": "20120721",
u"uploader_id": u"wb", "uploader_id": "wb",
}, },
} }
] ]
@ -65,18 +66,18 @@ class AppleTrailersIE(InfoExtractor):
uploader_id = mobj.group('company') uploader_id = mobj.group('company')
playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
playlist_snippet = self._download_webpage(playlist_url, movie) def fix_html(s):
playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet) s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned) s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed # The ' in the onClick attributes are not escaped, it couldn't be parsed
# with xml.etree.ElementTree.fromstring # like: http://trailers.apple.com/trailers/wb/gravity/
# like: http://trailers.apple.com/trailers/wb/gravity/ def _clean_json(m):
def _clean_json(m): return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;') s = re.sub(self._JSON_RE, _clean_json, s)
playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned) s = u'<html>' + s + u'</html>'
playlist_html = u'<html>' + playlist_cleaned + u'</html>' return s
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = [] playlist = []
for li in doc.findall('./div/ul/li'): for li in doc.findall('./div/ul/li'):
on_click = li.find('.//a').attrib['onClick'] on_click = li.find('.//a').attrib['onClick']
@ -111,9 +112,10 @@ class AppleTrailersIE(InfoExtractor):
'width': format['width'], 'width': format['width'],
'height': int(format['height']), 'height': int(format['height']),
}) })
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = { self._sort_formats(formats)
playlist.append({
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -124,12 +126,7 @@ class AppleTrailersIE(InfoExtractor):
'upload_date': upload_date, 'upload_date': upload_date,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'user_agent': 'QuickTime compatible (youtube-dl)', 'user_agent': 'QuickTime compatible (youtube-dl)',
} })
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['ext']
playlist.append(info)
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@ -1,9 +1,10 @@
from __future__ import unicode_literals
import json import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
unified_strdate, unified_strdate,
) )
@ -11,25 +12,24 @@ from ..utils import (
class ArchiveOrgIE(InfoExtractor): class ArchiveOrgIE(InfoExtractor):
IE_NAME = 'archive.org' IE_NAME = 'archive.org'
IE_DESC = 'archive.org videos' IE_DESC = 'archive.org videos'
_VALID_URL = r'(?:https?://)?(?:www\.)?archive.org/details/(?P<id>[^?/]+)(?:[?].*)?$' _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
_TEST = { _TEST = {
u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
u'md5': u'8af1d4cf447933ed3c7f4871162602db', 'md5': '8af1d4cf447933ed3c7f4871162602db',
u'info_dict': { 'info_dict': {
u"title": u"1968 Demo - FJCC Conference Presentation Reel #1", "title": "1968 Demo - FJCC Conference Presentation Reel #1",
u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>", "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
u"upload_date": u"19681210", "upload_date": "19681210",
u"uploader": u"SRI International" "uploader": "SRI International"
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
json_url = url + (u'?' if u'?' in url else '&') + u'output=json' json_url = url + ('?' if '?' in url else '&') + 'output=json'
json_data = self._download_webpage(json_url, video_id) json_data = self._download_webpage(json_url, video_id)
data = json.loads(json_data) data = json.loads(json_data)
@ -38,18 +38,18 @@ class ArchiveOrgIE(InfoExtractor):
uploader = data['metadata']['creator'][0] uploader = data['metadata']['creator'][0]
upload_date = unified_strdate(data['metadata']['date'][0]) upload_date = unified_strdate(data['metadata']['date'][0])
formats = [{ formats = [
{
'format': fdata['format'], 'format': fdata['format'],
'url': 'http://' + data['server'] + data['dir'] + fn, 'url': 'http://' + data['server'] + data['dir'] + fn,
'file_size': int(fdata['size']), 'file_size': int(fdata['size']),
} }
for fn,fdata in data['files'].items() for fn, fdata in data['files'].items()
if 'Video' in fdata['format']] if 'Video' in fdata['format']]
formats.sort(key=lambda fdata: fdata['file_size'])
for f in formats:
f['ext'] = determine_ext(f['url'])
info = { self._sort_formats(formats)
return {
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -57,12 +57,5 @@ class ArchiveOrgIE(InfoExtractor):
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'upload_date': upload_date, 'upload_date': upload_date,
'thumbnail': data.get('misc', {}).get('image'),
} }
thumbnail = data.get('misc', {}).get('image')
if thumbnail:
info['thumbnail'] = thumbnail
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -1,22 +1,28 @@
# coding: utf-8
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
) )
class ARDIE(InfoExtractor): class ARDIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
_TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
_MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
_TEST = { _TEST = {
u'url': u'http://www.ardmediathek.de/das-erste/tagesschau-in-100-sek?documentId=14077640', 'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786',
u'file': u'14077640.mp4', 'file': '19288786.mp4',
u'md5': u'6ca8824255460c787376353f9e20bbd8', 'md5': '515bf47ce209fb3f5a61b7aad364634c',
u'info_dict': { 'info_dict': {
u"title": u"11.04.2013 09:23 Uhr - Tagesschau in 100 Sekunden" 'title': 'Edward Snowden im Interview - Held oder Verräter?',
'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.',
'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
}, },
u'skip': u'Requires rtmpdump' 'skip': 'Blocked outside of Germany',
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -29,26 +35,49 @@ class ARDIE(InfoExtractor):
else: else:
video_id = m.group('video_id') video_id = m.group('video_id')
# determine title and media streams from webpage webpage = self._download_webpage(url, video_id)
html = self._download_webpage(url, video_id)
title = re.search(self._TITLE, html).group('title') title = self._html_search_regex(
streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)] r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
description = self._html_search_meta(
'dcterms.abstract', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
streams = [
mo.groupdict()
for mo in re.finditer(
r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
if not streams: if not streams:
assert '"fsk"' in html if '"fsk"' in webpage:
raise ExtractorError(u'This video is only available after 8:00 pm') raise ExtractorError('This video is only available after 20:00')
# choose default media type and highest quality for now formats = []
stream = max([s for s in streams if int(s["media_type"]) == 0], for s in streams:
key=lambda s: int(s["quality"])) format = {
'quality': int(s['quality']),
}
if s.get('rtmp_url'):
format['protocol'] = 'rtmp'
format['url'] = s['rtmp_url']
format['playpath'] = s['video_url']
else:
format['url'] = s['video_url']
# there's two possibilities: RTMP stream or HTTP download quality_name = self._search_regex(
info = {'id': video_id, 'title': title, 'ext': 'mp4'} r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
if stream['rtmp_url']: 'quality name', default='NA')
self.to_screen(u'RTMP download detected') format['format_id'] = '%s-%s-%s-%s' % (
assert stream['video_url'].startswith('mp4:') determine_ext(format['url']), quality_name, s['media_type'],
info["url"] = stream["rtmp_url"] s['quality'])
info["play_path"] = stream['video_url']
else: formats.append(format)
assert stream["video_url"].endswith('.mp4')
info["url"] = stream["video_url"] self._sort_formats(formats)
return [info]
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -1,7 +1,8 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import re import re
import json import json
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -10,6 +11,8 @@ from ..utils import (
unified_strdate, unified_strdate,
determine_ext, determine_ext,
get_element_by_id, get_element_by_id,
compat_str,
get_element_by_attribute,
) )
# There are different sources of video in arte.tv, the extraction process # There are different sources of video in arte.tv, the extraction process
@ -17,11 +20,11 @@ from ..utils import (
# add tests. # add tests.
class ArteTvIE(InfoExtractor): class ArteTvIE(InfoExtractor):
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' _VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)' _LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$' _LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv' IE_NAME = 'arte.tv'
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -36,7 +39,7 @@ class ArteTvIE(InfoExtractor):
# r'src="(.*?/videothek_js.*?\.js)', # r'src="(.*?/videothek_js.*?\.js)',
# 0, # 0,
# [ # [
# (1, 'url', u'Invalid URL: %s' % url) # (1, 'url', 'Invalid URL: %s' % url)
# ] # ]
# ) # )
# http_host = url.split('/')[2] # http_host = url.split('/')[2]
@ -48,12 +51,12 @@ class ArteTvIE(InfoExtractor):
# '(rtmp://.*?)\'', # '(rtmp://.*?)\'',
# re.DOTALL, # re.DOTALL,
# [ # [
# (1, 'path', u'could not extract video path: %s' % url), # (1, 'path', 'could not extract video path: %s' % url),
# (2, 'player', u'could not extract video player: %s' % url), # (2, 'player', 'could not extract video player: %s' % url),
# (3, 'url', u'could not extract video url: %s' % url) # (3, 'url', 'could not extract video url: %s' % url)
# ] # ]
# ) # )
# video_url = u'%s/%s' % (info.get('url'), info.get('path')) # video_url = '%s/%s' % (info.get('url'), info.get('path'))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VIDEOS_URL, url) mobj = re.match(self._VIDEOS_URL, url)
@ -68,20 +71,23 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang') lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang) return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, video_id) is not None: if re.search(self._LIVE_URL, url) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry') raise ExtractorError('Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url) # self.extractLiveStream(url)
# return # return
raise ExtractorError('No video found')
def _extract_video(self, url, video_id, lang): def _extract_video(self, url, video_id, lang):
"""Extract from videos.arte.tv""" """Extract from videos.arte.tv"""
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata') ref_xml_doc = self._download_xml(
ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml) ref_xml_url, video_id, note='Downloading metadata')
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
config_xml_url = config_node.attrib['ref'] config_xml_url = config_node.attrib['ref']
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration') config_xml = self._download_webpage(
config_xml_url, video_id, note='Downloading configuration')
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml)) video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
def _key(m): def _key(m):
@ -107,14 +113,13 @@ class ArteTvIE(InfoExtractor):
def _extract_liveweb(self, url, name, lang): def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/""" """Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information') video_id, 'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event') event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd') url_node = event_doc.find('video').find('urlHd')
if url_node is None: if url_node is None:
url_node = video_doc.find('urlSd') url_node = event_doc.find('urlSd')
return {'id': video_id, return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text, 'title': event_doc.find('name%s' % lang.capitalize()).text,
@ -125,8 +130,8 @@ class ArteTvIE(InfoExtractor):
class ArteTVPlus7IE(InfoExtractor): class ArteTVPlus7IE(InfoExtractor):
IE_NAME = u'arte.tv:+7' IE_NAME = 'arte.tv:+7'
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?' _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
@classmethod @classmethod
def _extract_url_info(cls, url): def _extract_url_info(cls, url):
@ -144,7 +149,9 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_from_webpage(self, webpage, video_id, lang): def _extract_from_webpage(self, webpage, video_id, lang):
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)
def _extract_from_json_url(self, json_url, video_id, lang):
json_info = self._download_webpage(json_url, video_id, 'Downloading info json') json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
self.report_extraction(video_id) self.report_extraction(video_id)
info = json.loads(json_info) info = json.loads(json_info)
@ -158,7 +165,9 @@ class ArteTVPlus7IE(InfoExtractor):
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
} }
formats = player_info['VSR'].values() all_formats = player_info['VSR'].values()
# Some formats use the m3u8 protocol
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
def _match_lang(f): def _match_lang(f):
if f.get('versionCode') is None: if f.get('versionCode') is None:
return True return True
@ -170,35 +179,52 @@ class ArteTVPlus7IE(InfoExtractor):
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes) return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url # Some formats may not be in the same language as the url
formats = filter(_match_lang, formats) formats = filter(_match_lang, all_formats)
# Some formats use the m3u8 protocol
formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats)
# We order the formats by quality
formats = list(formats) # in python3 filter returns an iterator formats = list(formats) # in python3 filter returns an iterator
if not formats:
# Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German
if all(f['versionCode'] == 'VO' for f in all_formats):
formats = all_formats
else:
raise ExtractorError(u'The formats list is empty')
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) def sort_key(f):
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
else: else:
sort_key = lambda f: int(f.get('height',-1)) def sort_key(f):
return (
# Sort first by quality
int(f.get('height',-1)),
int(f.get('bitrate',-1)),
# The original version with subtitles has lower relevance
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
# The version with sourds/mal subtitles has also lower relevance
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
# Prefer http downloads over m3u8
0 if f['url'].endswith('m3u8') else 1,
)
formats = sorted(formats, key=sort_key) formats = sorted(formats, key=sort_key)
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
# Pick the best quality
def _format(format_info): def _format(format_info):
quality = format_info['quality'] quality = ''
m_quality = re.match(r'\w*? - (\d*)p', quality) height = format_info.get('height')
if m_quality is not None: if height is not None:
quality = m_quality.group(1) quality = compat_str(height)
bitrate = format_info.get('bitrate')
if bitrate is not None:
quality += '-%d' % bitrate
if format_info.get('versionCode') is not None: if format_info.get('versionCode') is not None:
format_id = u'%s-%s' % (quality, format_info['versionCode']) format_id = '%s-%s' % (quality, format_info['versionCode'])
else: else:
format_id = quality format_id = quality
info = { info = {
'format_id': format_id, 'format_id': format_id,
'format_note': format_info.get('versionLibelle'), 'format_note': format_info.get('versionLibelle'),
'width': format_info.get('width'), 'width': format_info.get('width'),
'height': format_info.get('height'), 'height': height,
} }
if format_info['mediaType'] == u'rtmp': if format_info['mediaType'] == 'rtmp':
info['url'] = format_info['streamer'] info['url'] = format_info['streamer']
info['play_path'] = 'mp4:' + format_info['url'] info['play_path'] = 'mp4:' + format_info['url']
info['ext'] = 'flv' info['ext'] = 'flv'
@ -213,27 +239,29 @@ class ArteTVPlus7IE(InfoExtractor):
# It also uses the arte_vp_url url from the webpage to extract the information # It also uses the arte_vp_url url from the webpage to extract the information
class ArteTVCreativeIE(ArteTVPlus7IE): class ArteTVCreativeIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:creative' IE_NAME = 'arte.tv:creative'
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)' _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
_TEST = { _TEST = {
u'url': u'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
u'file': u'050489-002.mp4', 'info_dict': {
u'info_dict': { 'id': '050489-002',
u'title': u'Agentur Amateur / Agence Amateur #2 : Corporate Design', 'ext': 'mp4',
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
}, },
} }
class ArteTVFutureIE(ArteTVPlus7IE): class ArteTVFutureIE(ArteTVPlus7IE):
IE_NAME = u'arte.tv:future' IE_NAME = 'arte.tv:future'
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)' _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
u'file': u'050940-003.mp4', 'info_dict': {
u'info_dict': { 'id': '050940-003',
u'title': u'Les champignons au secours de la planète', 'ext': 'mp4',
'title': 'Les champignons au secours de la planète',
}, },
} }
@ -242,3 +270,38 @@ class ArteTVFutureIE(ArteTVPlus7IE):
webpage = self._download_webpage(url, anchor_id) webpage = self._download_webpage(url, anchor_id)
row = get_element_by_id(anchor_id, webpage) row = get_element_by_id(anchor_id, webpage)
return self._extract_from_webpage(row, anchor_id, lang) return self._extract_from_webpage(row, anchor_id, lang)
class ArteTVDDCIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:ddc'
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
def _real_extract(self, url):
video_id, lang = self._extract_url_info(url)
if lang == 'folge':
lang = 'de'
elif lang == 'emission':
lang = 'fr'
webpage = self._download_webpage(url, video_id)
scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
return self._extract_from_json_url(json_url, video_id, lang)
class ArteTVConcertIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:concert'
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
_TEST = {
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
'info_dict': {
'id': '186',
'ext': 'mp4',
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
'upload_date': '20140128',
'description': 'md5:486eb08f991552ade77439fe6d82c305',
},
}

View File

@ -1,46 +1,52 @@
import os.path from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
compat_urllib_parse_urlparse, determine_ext,
ExtractorError,
) )
class AUEngineIE(InfoExtractor): class AUEngineIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370', 'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
u'file': u'lfvlytY6.mp4', 'file': 'lfvlytY6.mp4',
u'md5': u'48972bdbcf1a3a2f5533e62425b41d4f', 'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
u'info_dict': { 'info_dict': {
u"title": u"[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]" 'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
} }
} }
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1) video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
webpage, u'title') webpage, 'title')
title = title.strip() title = title.strip()
links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage) links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
links = [compat_urllib_parse.unquote(l) for l in links] links = map(compat_urllib_parse.unquote, links)
thumbnail = None
video_url = None
for link in links: for link in links:
root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path) if link.endswith('.png'):
if pathext == '.png':
thumbnail = link thumbnail = link
elif pathext == '.mp4': elif '/videos/' in link:
url = link video_url = link
ext = pathext if not video_url:
raise ExtractorError(u'Could not find video URL')
ext = '.' + determine_ext(video_url)
if ext == title[-len(ext):]: if ext == title[-len(ext):]:
title = title[:-len(ext)] title = title[:-len(ext)]
ext = ext[1:]
return [{ return {
'id': video_id, 'id': video_id,
'url': url, 'url': video_url,
'ext': ext,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
}] }

View File

@ -0,0 +1,88 @@
from __future__ import unicode_literals
import re
import json
import itertools
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
)
class BambuserIE(InfoExtractor):
IE_NAME = 'bambuser'
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
_API_KEY = '005f64509e19a868399060af746a00aa'
_TEST = {
'url': 'http://bambuser.com/v/4050584',
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
#u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
'info_dict': {
'id': '4050584',
'ext': 'flv',
'title': 'Education engineering days - lightning talks',
'duration': 3741,
'uploader': 'pixelversity',
'uploader_id': '344706',
},
'params': {
# It doesn't respect the 'Range' header, it would download the whole video
# caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
info_json = self._download_webpage(info_url, video_id)
info = json.loads(info_json)['result']
return {
'id': video_id,
'title': info['title'],
'url': info['url'],
'thumbnail': info.get('preview'),
'duration': int(info['length']),
'view_count': int(info['views_total']),
'uploader': info['username'],
'uploader_id': info['uid'],
}
class BambuserChannelIE(InfoExtractor):
IE_NAME = 'bambuser:channel'
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
# The maximum number we can get with each request
_STEP = 50
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
urls = []
last_id = ''
for i in itertools.count(1):
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
'&method=broadcast&format=json&vid_older_than={last}'
).format(user=user, count=self._STEP, last=last_id)
req = compat_urllib_request.Request(req_url)
# Without setting this header, we wouldn't get any result
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
info_json = self._download_webpage(req, user,
'Downloading page %d' % i)
results = json.loads(info_json)['result']
if len(results) == 0:
break
last_id = results[-1]['vid']
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
return {
'_type': 'playlist',
'title': user,
'entries': urls,
}

View File

@ -1,23 +1,28 @@
from __future__ import unicode_literals
import json import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str,
compat_urlparse,
ExtractorError, ExtractorError,
) )
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)' _VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
_TEST = { _TESTS = [{
u'url': u'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
u'file': u'1812978515.mp3', 'file': '1812978515.mp3',
u'md5': u'cdeb30cdae1921719a3cbcab696ef53c', 'md5': 'c557841d5e50261777a6585648adf439',
u'info_dict': { 'info_dict': {
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad" "title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
"duration": 10,
}, },
u'skip': u'There is a limit of 200 free downloads / month for the test song' '_skip': 'There is a limit of 200 free downloads / month for the test song'
} }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -26,38 +31,114 @@ class BandcampIE(InfoExtractor):
# We get the link to the free download page # We get the link to the free download page
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if m_download is None: if m_download is None:
raise ExtractorError(u'No free songs found') m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)
d = data[0]
duration = int(round(d['duration']))
formats = []
for format_id, format_url in d['file'].items():
ext, _, abr_str = format_id.partition('-')
formats.append({
'format_id': format_id,
'url': format_url,
'ext': format_id.partition('-')[0],
'vcodec': 'none',
'acodec': format_id.partition('-')[0],
'abr': int(format_id.partition('-')[2]),
})
self._sort_formats(formats)
return {
'id': compat_str(d['id']),
'title': d['title'],
'formats': formats,
'duration': duration,
}
else:
raise ExtractorError('No free songs found')
download_link = m_download.group(1) download_link = m_download.group(1)
id = re.search(r'var TralbumData = {(.*?)id: (?P<id>\d*?)$', video_id = re.search(
webpage, re.MULTILINE|re.DOTALL).group('id') r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
webpage, re.MULTILINE | re.DOTALL).group('id')
download_webpage = self._download_webpage(download_link, id, download_webpage = self._download_webpage(download_link, video_id,
'Downloading free downloads page') 'Downloading free downloads page')
# We get the dictionary of the track from some javascrip code # We get the dictionary of the track from some javascrip code
info = re.search(r'items: (.*?),$', info = re.search(r'items: (.*?),$',
download_webpage, re.MULTILINE).group(1) download_webpage, re.MULTILINE).group(1)
info = json.loads(info)[0] info = json.loads(info)[0]
# We pick mp3-320 for now, until format selection can be easily implemented. # We pick mp3-320 for now, until format selection can be easily implemented.
mp3_info = info[u'downloads'][u'mp3-320'] mp3_info = info['downloads']['mp3-320']
# If we try to use this url it says the link has expired # If we try to use this url it says the link has expired
initial_url = mp3_info[u'url'] initial_url = mp3_info['url']
re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$'
m_url = re.match(re_url, initial_url) m_url = re.match(re_url, initial_url)
#We build the url we will use to get the final track url #We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js # This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), id, m_url.group('ts')) request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, id, 'Requesting download url') final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be # If we could correctly generate the .rand field the url would be
#in the "download_url" key #in the "download_url" key
final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1)
track_info = {'id':id, return {
'title' : info[u'title'], 'id': video_id,
'ext' : 'mp3', 'title': info['title'],
'url' : final_url, 'ext': 'mp3',
'thumbnail' : info[u'thumb_url'], 'vcodec': 'none',
'uploader' : info[u'artist'] 'url': final_url,
} 'thumbnail': info.get('thumb_url'),
'uploader': info.get('artist'),
}
return [track_info]
class BandcampAlbumIE(InfoExtractor):
IE_NAME = 'Bandcamp:album'
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
_TEST = {
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
'playlist': [
{
'file': '1353101989.mp3',
'md5': '39bc1eded3476e927c724321ddf116cf',
'info_dict': {
'title': 'Intro',
}
},
{
'file': '38097443.mp3',
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
'info_dict': {
'title': 'Kero One - Keep It Alive (Blazo remix)',
}
},
],
'params': {
'playlistend': 2
},
'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
if not tracks_paths:
raise ExtractorError('The page doesn\'t contain any tracks')
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
for t_path in tracks_paths]
title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
return {
'_type': 'playlist',
'title': title,
'entries': entries,
}

View File

@ -0,0 +1,223 @@
from __future__ import unicode_literals
import re
from .subtitles import SubtitlesInfoExtractor
from ..utils import ExtractorError
class BBCCoUkIE(SubtitlesInfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
_TESTS = [
{
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
'info_dict': {
'id': 'b039d07m',
'ext': 'flv',
'title': 'Kaleidoscope: Leonard Cohen',
'description': 'md5:db4755d7a665ae72343779f7dacb402c',
'duration': 1740,
},
'params': {
# rtmp download
'skip_download': True,
}
},
{
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
'info_dict': {
'id': 'b00yng1d',
'ext': 'flv',
'title': 'The Man in Black: Series 3: The Printed Name',
'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
'duration': 1800,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Episode is no longer available on BBC iPlayer Radio',
},
{
'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
'info_dict': {
'id': 'b00yng1d',
'ext': 'flv',
'title': 'The Voice UK: Series 3: Blind Auditions 5',
'description': "Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.",
'duration': 5100,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
}
]
def _extract_asx_playlist(self, connection, programme_id):
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
def _extract_connection(self, connection, programme_id):
formats = []
protocol = connection.get('protocol')
supplier = connection.get('supplier')
if protocol == 'http':
href = connection.get('href')
# ASX playlist
if supplier == 'asx':
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
formats.append({
'url': ref,
'format_id': 'ref%s_%s' % (i, supplier),
})
# Direct link
else:
formats.append({
'url': href,
'format_id': supplier,
})
elif protocol == 'rtmp':
application = connection.get('application', 'ondemand')
auth_string = connection.get('authString')
identifier = connection.get('identifier')
server = connection.get('server')
formats.append({
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
'play_path': identifier,
'app': '%s?%s' % (application, auth_string),
'page_url': 'http://www.bbc.co.uk',
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
'rtmp_live': False,
'ext': 'flv',
'format_id': supplier,
})
return formats
def _extract_items(self, playlist):
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
def _extract_medias(self, media_selection):
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
def _extract_connections(self, media):
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection')
def _extract_video(self, media, programme_id):
formats = []
vbr = int(media.get('bitrate'))
vcodec = media.get('encoding')
service = media.get('service')
width = int(media.get('width'))
height = int(media.get('height'))
file_size = int(media.get('media_file_size'))
for connection in self._extract_connections(media):
conn_formats = self._extract_connection(connection, programme_id)
for format in conn_formats:
format.update({
'format_id': '%s_%s' % (service, format['format_id']),
'width': width,
'height': height,
'vbr': vbr,
'vcodec': vcodec,
'filesize': file_size,
})
formats.extend(conn_formats)
return formats
def _extract_audio(self, media, programme_id):
formats = []
abr = int(media.get('bitrate'))
acodec = media.get('encoding')
service = media.get('service')
for connection in self._extract_connections(media):
conn_formats = self._extract_connection(connection, programme_id)
for format in conn_formats:
format.update({
'format_id': '%s_%s' % (service, format['format_id']),
'abr': abr,
'acodec': acodec,
})
formats.extend(conn_formats)
return formats
def _extract_captions(self, media, programme_id):
subtitles = {}
for connection in self._extract_connections(media):
captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
srt = ''
for pos, p in enumerate(ps):
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
p.text.strip() if p.text is not None else '')
subtitles[lang] = srt
return subtitles
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
group_id = mobj.group('id')
webpage = self._download_webpage(url, group_id, 'Downloading video page')
if re.search(r'id="emp-error" class="notinuk">', webpage):
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
expected=True)
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % group_id
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % group_id
else:
msg = 'Episode %s is not available: %s' % (group_id, reason)
raise ExtractorError(msg, expected=True)
formats = []
subtitles = None
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme':
continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
programme_id = item.get('identifier')
duration = int(item.get('duration'))
media_selection = self._download_xml(
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
programme_id, 'Downloading media selection XML')
for media in self._extract_medias(media_selection):
kind = media.get('kind')
if kind == 'audio':
formats.extend(self._extract_audio(media, programme_id))
elif kind == 'video':
formats.extend(self._extract_video(media, programme_id))
elif kind == 'captions':
subtitles = self._extract_captions(media, programme_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(programme_id, subtitles)
return
self._sort_formats(formats)
return {
'id': programme_id,
'title': title,
'description': description,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}

View File

@ -0,0 +1,95 @@
from __future__ import unicode_literals
import datetime
import json
import re
from .common import InfoExtractor
from ..utils import (
remove_start,
)
class BlinkxIE(InfoExtractor):
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
IE_NAME = 'blinkx'
_TEST = {
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
'file': '8aQUy7GV.mp4',
'md5': '2e9a07364af40163a908edbf10bb2492',
'info_dict': {
"title": "Police Car Rolls Away",
"uploader": "stupidvideos.com",
"upload_date": "20131215",
"description": "A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
"duration": 14.886,
"thumbnails": [{
"width": 100,
"height": 76,
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
}],
},
}
def _real_extract(self, rl):
m = re.match(self._VALID_URL, rl)
video_id = m.group('id')
display_id = video_id[:8]
api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0]
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
pload_date = dt.strftime('%Y%m%d')
duration = None
thumbnails = []
formats = []
for m in data['media']:
if m['type'] == 'jpg':
thumbnails.append({
'url': m['link'],
'width': int(m['w']),
'height': int(m['h']),
})
elif m['type'] == 'original':
duration = m['d']
elif m['type'] == 'youtube':
yt_id = m['link']
self.to_screen(u'Youtube video detected: %s' % yt_id)
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
format_id = (u'%s-%sk-%s' %
(vcodec,
tbr,
m['w']))
formats.append({
'format_id': format_id,
'url': m['link'],
'vcodec': vcodec,
'acodec': acodec,
'abr': int(m['abr']) // 1000,
'vbr': int(m['vbr']) // 1000,
'tbr': tbr,
'width': int(m['w']),
'height': int(m['h']),
})
self._sort_formats(formats)
return {
'id': display_id,
'fullid': video_id,
'title': data['title'],
'formats': formats,
'uploader': data['channel_name'],
'upload_date': pload_date,
'description': data.get('description'),
'thumbnails': thumbnails,
'duration': duration,
}

View File

@ -1,160 +1,145 @@
from __future__ import unicode_literals
import datetime import datetime
import json
import os
import re import re
import socket
from .common import InfoExtractor from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import ( from ..utils import (
compat_http_client,
compat_parse_qs,
compat_str, compat_str,
compat_urllib_error,
compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
ExtractorError,
unescapeHTML, unescapeHTML,
) )
class BlipTVIE(InfoExtractor): class BlipTVIE(SubtitlesInfoExtractor):
"""Information extractor for blip.tv""" """Information extractor for blip.tv"""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
_URL_EXT = r'^.*\.([a-z0-9]+)$'
IE_NAME = u'blip.tv'
_TEST = {
u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
u'file': u'5779306.m4v',
u'md5': u'80baf1ec5c3d2019037c1c707d676b9f',
u'info_dict': {
u"upload_date": u"20111205",
u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596",
u"uploader": u"Comic Book Resources - CBR TV",
u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3"
}
}
def report_direct_download(self, title): _TESTS = [{
"""Report information extraction.""" 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
self.to_screen(u'%s: Direct download detected' % title) 'md5': 'c6934ad0b6acf2bd920720ec888eb812',
'info_dict': {
'id': '5779306',
'ext': 'mov',
'upload_date': '20111205',
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
'uploader': 'Comic Book Resources - CBR TV',
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
}
}, {
# https://github.com/rg3/youtube-dl/pull/2274
'note': 'Video with subtitles',
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
'md5': '309f9d25b820b086ca163ffac8031806',
'info_dict': {
'id': '6586561',
'ext': 'mp4',
'uploader': 'Red vs. Blue',
'description': 'One-Zero-One',
'upload_date': '20130614',
'title': 'Red vs. Blue Season 11 Episode 1',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: presumptive_id = mobj.group('presumptive_id')
raise ExtractorError(u'Invalid URL: %s' % url)
# See https://github.com/rg3/youtube-dl/issues/857 # See https://github.com/rg3/youtube-dl/issues/857
api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url) embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
if api_mobj is not None: if embed_mobj:
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
urlp = compat_urllib_parse_urlparse(url) info_page = self._download_webpage(info_url, embed_mobj.group(1))
if urlp.path.startswith('/play/'): video_id = self._search_regex(
request = compat_urllib_request.Request(url) r'data-episode-id="([0-9]+)', info_page, 'video_id')
response = compat_urllib_request.urlopen(request) return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
redirecturl = response.geturl()
rurlp = compat_urllib_parse_urlparse(redirecturl) cchar = '&' if '?' in url else '?'
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
url = 'http://blip.tv/a/a-' + file_id
return self._real_extract(url)
if '?' in url:
cchar = '&'
else:
cchar = '?'
json_url = url + cchar + 'skin=json&version=2&no_wrap=1' json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
request = compat_urllib_request.Request(json_url) request = compat_urllib_request.Request(json_url)
request.add_header('User-Agent', 'iTunes/10.6.1') request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1))
info = None
try:
urlh = compat_urllib_request.urlopen(request)
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1]
title,ext = os.path.splitext(basename)
title = title.decode('UTF-8')
ext = ext.replace('.', '')
self.report_direct_download(title)
info = {
'id': title,
'url': url,
'uploader': None,
'upload_date': None,
'title': title,
'ext': ext,
'urlhandle': urlh
}
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
if info is None: # Regular URL
try:
json_code_bytes = urlh.read()
json_code = json_code_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
try: json_data = self._download_json(request, video_id=presumptive_id)
json_data = json.loads(json_code)
if 'Post' in json_data:
data = json_data['Post']
else:
data = json_data
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') if 'Post' in json_data:
if 'additionalMedia' in data: data = json_data['Post']
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) else:
best_format = formats[-1] data = json_data
video_url = best_format['url']
else:
video_url = data['media']['url']
umobj = re.match(self._URL_EXT, video_url)
if umobj is None:
raise ValueError('Can not determine filename extension')
ext = umobj.group(1)
info = { video_id = compat_str(data['item_id'])
'id': compat_str(data['item_id']), upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
'url': video_url, subtitles = {}
'uploader': data['display_name'], formats = []
'upload_date': upload_date, if 'additionalMedia' in data:
'title': data['title'], for f in data['additionalMedia']:
'ext': ext, if f.get('file_type_srt') == 1:
'format': data['media']['mimeType'], LANGS = {
'thumbnail': data['thumbnailUrl'], 'english': 'en',
'description': data['description'], }
'player_url': data['embedUrl'], lang = f['role'].rpartition('-')[-1].strip().lower()
'user_agent': 'iTunes/10.6.1', langcode = LANGS.get(lang, lang)
} subtitles[langcode] = f['url']
except (ValueError,KeyError) as err: continue
raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) if not int(f['media_width']): # filter m3u8
continue
formats.append({
'url': f['url'],
'format_id': f['role'],
'width': int(f['media_width']),
'height': int(f['media_height']),
})
else:
formats.append({
'url': data['media']['url'],
'width': int(data['media']['width']),
'height': int(data['media']['height']),
})
self._sort_formats(formats)
return [info] # subtitles
video_subtitles = self.extract_subtitles(video_id, subtitles)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id, subtitles)
return
return {
'id': video_id,
'uploader': data['display_name'],
'upload_date': upload_date,
'title': data['title'],
'thumbnail': data['thumbnailUrl'],
'description': data['description'],
'user_agent': 'iTunes/10.6.1',
'formats': formats,
'subtitles': video_subtitles,
}
def _download_subtitle_url(self, sub_lang, url):
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
req.add_header('Youtubedl-user-agent', 'youtube-dl')
return self._download_webpage(req, None, note=False)
class BlipTVUserIE(InfoExtractor): class BlipTVUserIE(InfoExtractor):
"""Information Extractor for blip.tv users."""
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
_PAGE_SIZE = 12 _PAGE_SIZE = 12
IE_NAME = u'blip.tv:user' IE_NAME = 'blip.tv:user'
def _real_extract(self, url): def _real_extract(self, url):
# Extract username
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
username = mobj.group(1) username = mobj.group(1)
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
page = self._download_webpage(url, username, u'Downloading user page') page = self._download_webpage(url, username, 'Downloading user page')
mobj = re.search(r'data-users-id="([^"]+)"', page) mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1) page_base = page_base % mobj.group(1)
# Download video ids using BlipTV Ajax calls. Result size per # Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query # query is limited (currently to 12 videos) so we need to query
# page by page until there are no video ids - it means we got # page by page until there are no video ids - it means we got
@ -165,8 +150,8 @@ class BlipTVUserIE(InfoExtractor):
while True: while True:
url = page_base + "&page=" + str(pagenum) url = page_base + "&page=" + str(pagenum)
page = self._download_webpage(url, username, page = self._download_webpage(
u'Downloading video ids from page %d' % pagenum) url, username, 'Downloading video ids from page %d' % pagenum)
# Extract video identifiers # Extract video identifiers
ids_in_page = [] ids_in_page = []
@ -188,6 +173,6 @@ class BlipTVUserIE(InfoExtractor):
pagenum += 1 pagenum += 1
urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls] url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
return [self.playlist_result(url_entries, playlist_title = username)] return [self.playlist_result(url_entries, playlist_title=username)]

View File

@ -1,10 +1,11 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE
class BloombergIE(InfoExtractor): class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?).html' _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
_TEST = { _TEST = {
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
@ -23,5 +24,7 @@ class BloombergIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
name = mobj.group('name') name = mobj.group('name')
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
ooyala_url = self._og_search_video_url(webpage) embed_code = self._search_regex(
return self.url_result(ooyala_url, ie='Ooyala') r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
'embed code')
return OoyalaIE._build_url_result(embed_code)

View File

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class BRIE(InfoExtractor):
IE_DESC = "Bayerischer Rundfunk Mediathek"
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
_BASE_URL = "http://www.br.de"
_TESTS = [
{
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
"info_dict": {
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
"ext": "mp4",
"title": "Feiern und Verzichten",
"description": "Anselm Grün: Feiern und Verzichten",
"uploader": "BR/Birgit Baier",
"upload_date": "20140301"
}
},
{
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
"info_dict": {
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
"ext": "mp4",
"title": "Über den Pass",
"description": "Die Eroberung der Alpen: Über den Pass",
"uploader": None,
"upload_date": None
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = []
for xml_video in xml.findall("video"):
video = {
"id": xml_video.get("externalId"),
"title": xml_video.find("title").text,
"formats": self._extract_formats(xml_video.find("assets")),
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
"webpage_url": xml_video.find("permalink").text
}
if xml_video.find("author").text:
video["uploader"] = xml_video.find("author").text
if xml_video.find("broadcastDate").text:
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
videos.append(video)
if len(videos) > 1:
self._downloader.report_warning(
'found multiple videos; please '
'report this with the video URL to http://yt-dl.org/bug')
if not videos:
raise ExtractorError('No video entries found')
return videos[0]
def _extract_formats(self, assets):
formats = [{
"url": asset.find("downloadUrl").text,
"ext": asset.find("mediaType").text,
"format_id": asset.get("type"),
"width": int(asset.find("frameWidth").text),
"height": int(asset.find("frameHeight").text),
"tbr": int(asset.find("bitrateVideo").text),
"abr": int(asset.find("bitrateAudio").text),
"vcodec": asset.find("codecVideo").text,
"container": asset.find("mediaType").text,
"filesize": int(asset.find("size").text),
} for asset in assets.findall("asset")
if asset.find("downloadUrl") is not None]
self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants):
thumbnails = [{
"url": self._BASE_URL + variant.find("url").text,
"width": int(variant.find("width").text),
"height": int(variant.find("height").text),
} for variant in variants.findall("variant")]
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
return thumbnails

View File

@ -1,18 +1,20 @@
from __future__ import unicode_literals
import re import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext
class BreakIE(InfoExtractor): class BreakIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)' _VALID_URL = r'http://(?:www\.)?break\.com/video/([^/]+)'
_TEST = { _TEST = {
u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056', 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
u'file': u'2468056.mp4', 'md5': 'a3513fb1547fba4fb6cfac1bffc6c46b',
u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b', 'info_dict': {
u'info_dict': { 'id': '2468056',
u"title": u"When Girls Act Like D-Bags" 'ext': 'mp4',
'title': 'When Girls Act Like D-Bags',
} }
} }
@ -21,18 +23,17 @@ class BreakIE(InfoExtractor):
video_id = mobj.group(1).split("-")[-1] video_id = mobj.group(1).split("-")[-1]
embed_url = 'http://www.break.com/embed/%s' % video_id embed_url = 'http://www.break.com/embed/%s' % video_id
webpage = self._download_webpage(embed_url, video_id) webpage = self._download_webpage(embed_url, video_id)
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage, info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
u'info json', flags=re.DOTALL) webpage, 'info json', flags=re.DOTALL)
info = json.loads(info_json) info = json.loads(info_json)
video_url = info['videoUri'] video_url = info['videoUri']
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
if m_youtube is not None: if m_youtube is not None:
return self.url_result(m_youtube.group(1), 'Youtube') return self.url_result(m_youtube.group(1), 'Youtube')
final_url = video_url + '?' + info['AuthToken'] final_url = video_url + '?' + info['AuthToken']
return [{ return {
'id': video_id, 'id': video_id,
'url': final_url, 'url': final_url,
'ext': determine_ext(final_url), 'title': info['contentName'],
'title': info['contentName'],
'thumbnail': info['thumbUri'], 'thumbnail': info['thumbUri'],
}] }

View File

@ -1,4 +1,5 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import re import re
import json import json
@ -8,39 +9,68 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands,
compat_urlparse, compat_urlparse,
compat_str,
compat_urllib_request,
compat_parse_qs,
ExtractorError, ExtractorError,
unsmuggle_url,
unescapeHTML,
) )
class BrightcoveIE(InfoExtractor): class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
_PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
_TESTS = [ _TESTS = [
{ {
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4', 'file': '2371591881001.mp4',
u'md5': u'9e80619e0a94663f0bdc849b4566af19', 'md5': '5423e113865d26e40624dce2e4b45d95',
u'note': u'Test Brightcove downloads and detection in GenericIE', 'note': 'Test Brightcove downloads and detection in GenericIE',
u'info_dict': { 'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
u'uploader': u'8TV', 'uploader': '8TV',
u'description': u'md5:a950cc4285c43e44d763d036710cd9cd', 'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
} }
}, },
{ {
# From http://medianetwork.oracle.com/video/player/1785452137001 # From http://medianetwork.oracle.com/video/player/1785452137001
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
u'file': u'1785452137001.flv', 'file': '1785452137001.flv',
u'info_dict': { 'info_dict': {
u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
u'description': u'John Rose speaks at the JVM Language Summit, August 1, 2012.', 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
u'uploader': u'Oracle', 'uploader': 'Oracle',
}, },
}, },
{
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
'info_dict': {
'id': '2750934548001',
'ext': 'mp4',
'title': 'This Bracelet Acts as a Personal Thermostat',
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
'uploader': 'Mashable',
},
},
{
# test that the default referer works
# from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
'info_dict': {
'id': '2878862109001',
'ext': 'mp4',
'title': 'Lost in Motion II',
'description': 'md5:363109c02998fee92ec02211bd8000df',
'uploader': 'National Ballet of Canada',
},
}
] ]
@classmethod @classmethod
@ -54,53 +84,120 @@ class BrightcoveIE(InfoExtractor):
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>', object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
lambda m: m.group(1) + '/>', object_str) lambda m: m.group(1) + '/>', object_str)
# Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
object_str = object_str.replace(u'<--', u'<!--') object_str = object_str.replace('<--', '<!--')
object_str = fix_xml_ampersands(object_str)
object_doc = xml.etree.ElementTree.fromstring(object_str) object_doc = xml.etree.ElementTree.fromstring(object_str)
assert u'BrightcoveExperience' in object_doc.attrib['class']
params = {'flashID': object_doc.attrib['id'], fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'], if fv_el is not None:
} flashvars = dict(
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey') (k, v[0])
for k, v in compat_parse_qs(fv_el.attrib['value']).items())
else:
flashvars = {}
def find_param(name):
if name in flashvars:
return flashvars[name]
node = find_xpath_attr(object_doc, './param', 'name', name)
if node is not None:
return node.attrib['value']
return None
params = {}
playerID = find_param('playerID')
if playerID is None:
raise ExtractorError('Cannot find player ID')
params['playerID'] = playerID
playerKey = find_param('playerKey')
# Not all pages define this value # Not all pages define this value
if playerKey is not None: if playerKey is not None:
params['playerKey'] = playerKey.attrib['value'] params['playerKey'] = playerKey
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer') # The three fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
if videoPlayer is not None: if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value'] params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL')
if linkBase is not None:
params['linkBaseURL'] = linkBase
data = compat_urllib_parse.urlencode(params) data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data return cls._FEDERATED_URL_TEMPLATE % data
@classmethod
def _extract_brightcove_url(cls, webpage):
"""Try to extract the brightcove url from the webpage, returns None
if it can't be found
"""
urls = cls._extract_brightcove_urls(webpage)
return urls[0] if urls else None
@classmethod
def _extract_brightcove_urls(cls, webpage):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m:
return [unescapeHTML(url_m.group(1))]
matches = re.findall(
r'''(?sx)<object
(?:
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''',
webpage)
return [cls._build_brighcove_url(m) for m in matches]
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
# Change the 'videoId' and others field to '@videoPlayer'
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
query_str = mobj.group('query') query_str = mobj.group('query')
query = compat_urlparse.parse_qs(query_str) query = compat_urlparse.parse_qs(query_str)
videoPlayer = query.get('@videoPlayer') videoPlayer = query.get('@videoPlayer')
if videoPlayer: if videoPlayer:
return self._get_video_info(videoPlayer[0], query_str) # We set the original url as the default 'Referer' header
referer = smuggled_data.get('Referer', url)
return self._get_video_info(
videoPlayer[0], query_str, query, referer=referer)
else: else:
player_key = query['playerKey'] player_key = query['playerKey']
return self._get_playlist_info(player_key[0]) return self._get_playlist_info(player_key[0])
def _get_video_info(self, video_id, query): def _get_video_info(self, video_id, query_str, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query request_url = self._FEDERATED_URL_TEMPLATE % query_str
webpage = self._download_webpage(request_url, video_id) req = compat_urllib_request.Request(request_url)
linkBase = query.get('linkBaseURL')
if linkBase is not None:
referer = linkBase[0]
if referer is not None:
req.add_header('Referer', referer)
webpage = self._download_webpage(req, video_id)
self.report_extraction(video_id) self.report_extraction(video_id)
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
info = json.loads(info)['data'] info = json.loads(info)['data']
video_info = info['programmedContent']['videoPlayer']['mediaDTO'] video_info = info['programmedContent']['videoPlayer']['mediaDTO']
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
return self._extract_video_info(video_info) return self._extract_video_info(video_info)
def _get_playlist_info(self, player_key): def _get_playlist_info(self, player_key):
playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
player_key, u'Downloading playlist information') playlist_info = self._download_webpage(
info_url, player_key, 'Downloading playlist information')
json_data = json.loads(playlist_info) json_data = json.loads(playlist_info)
if 'videoList' not in json_data: if 'videoList' not in json_data:
raise ExtractorError(u'Empty playlist') raise ExtractorError('Empty playlist')
playlist_info = json_data['videoList'] playlist_info = json_data['videoList']
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
@ -109,8 +206,8 @@ class BrightcoveIE(InfoExtractor):
def _extract_video_info(self, video_info): def _extract_video_info(self, video_info):
info = { info = {
'id': video_info['id'], 'id': compat_str(video_info['id']),
'title': video_info['displayName'], 'title': video_info['displayName'].strip(),
'description': video_info.get('shortDescription'), 'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
'uploader': video_info.get('publisherName'), 'uploader': video_info.get('publisherName'),
@ -119,16 +216,32 @@ class BrightcoveIE(InfoExtractor):
renditions = video_info.get('renditions') renditions = video_info.get('renditions')
if renditions: if renditions:
renditions = sorted(renditions, key=lambda r: r['size']) renditions = sorted(renditions, key=lambda r: r['size'])
best_format = renditions[-1] info['formats'] = [{
info.update({ 'url': rend['defaultURL'],
'url': best_format['defaultURL'], 'height': rend.get('frameHeight'),
'ext': 'mp4', 'width': rend.get('frameWidth'),
}) } for rend in renditions]
elif video_info.get('FLVFullLengthURL') is not None: elif video_info.get('FLVFullLengthURL') is not None:
info.update({ info.update({
'url': video_info['FLVFullLengthURL'], 'url': video_info['FLVFullLengthURL'],
'ext': 'flv',
}) })
else:
raise ExtractorError(u'Unable to extract video url for %s' % info['id']) if self._downloader.params.get('include_ads', False):
adServerURL = video_info.get('_youtubedl_adServerURL')
if adServerURL:
ad_info = {
'_type': 'url',
'url': adServerURL,
}
if 'url' in info:
return {
'_type': 'playlist',
'title': info['title'],
'entries': [ad_info, info],
}
else:
return ad_info
if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info

View File

@ -1,21 +1,21 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
import re import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext
class C56IE(InfoExtractor): class C56IE(InfoExtractor):
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)' _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
IE_NAME = u'56.com' IE_NAME = '56.com'
_TEST = {
_TEST ={ 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', 'file': '93440716.flv',
u'file': u'93440716.flv', 'md5': 'e59995ac63d0457783ea05f93f12a866',
u'md5': u'e59995ac63d0457783ea05f93f12a866', 'info_dict': {
u'info_dict': { 'title': '网事知多少 第32期车怒',
u'title': u'网事知多少 第32期车怒',
}, },
} }
@ -23,14 +23,18 @@ class C56IE(InfoExtractor):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
text_id = mobj.group('textid') text_id = mobj.group('textid')
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id, info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
text_id, u'Downloading video info') text_id, 'Downloading video info')
info = json.loads(info_page)['info'] info = json.loads(info_page)['info']
best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1] formats = [{
video_url = best_format['url'] 'format_id': f['type'],
'filesize': int(f['filesize']),
'url': f['url']
} for f in info['rfiles']]
self._sort_formats(formats)
return {'id': info['vid'], return {
'title': info['Subject'], 'id': info['vid'],
'url': video_url, 'title': info['Subject'],
'ext': determine_ext(video_url), 'formats': formats,
'thumbnail': info.get('bimg') or info.get('img'), 'thumbnail': info.get('bimg') or info.get('img'),
} }

View File

@ -0,0 +1,48 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class Canal13clIE(InfoExtractor):
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'md5': '4cb1fa38adcad8fea88487a078831755',
'info_dict': {
'id': '1403022125',
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
'ext': 'mp4',
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
title = self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'twitter:description', webpage, 'description')
url = self._html_search_regex(
r'articuloVideo = \"(.*?)\"', webpage, 'url')
real_id = self._search_regex(
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
thumbnail = self._html_search_regex(
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
return {
'id': real_id,
'display_id': display_id,
'url': url,
'title': title,
'description': description,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View File

@ -1,4 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -6,19 +8,22 @@ from .common import InfoExtractor
class Canalc2IE(InfoExtractor): class Canalc2IE(InfoExtractor):
IE_NAME = 'canalc2.tv' IE_NAME = 'canalc2.tv'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui' _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', 'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
u'file': u'12163.mp4', 'md5': '060158428b650f896c542dfbb3d6487f',
u'md5': u'060158428b650f896c542dfbb3d6487f', 'info_dict': {
u'info_dict': { 'id': '12163',
u'title': u'Terrasses du Numérique' 'ext': 'mp4',
'title': 'Terrasses du Numérique'
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = re.match(self._VALID_URL, url).group(1) video_id = re.match(self._VALID_URL, url).group('id')
# We need to set the voir field for getting the file name
url = 'http://www.canalc2.tv/video.asp?idVideo=%s&voir=oui' % video_id
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
file_name = self._search_regex( file_name = self._search_regex(
r"so\.addVariable\('file','(.*?)'\);", r"so\.addVariable\('file','(.*?)'\);",
@ -26,10 +31,11 @@ class Canalc2IE(InfoExtractor):
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
title = self._html_search_regex( title = self._html_search_regex(
r'class="evenement8">(.*?)</a>', webpage, u'title') r'class="evenement8">(.*?)</a>', webpage, 'title')
return {'id': video_id, return {
'ext': 'mp4', 'id': video_id,
'url': video_url, 'ext': 'mp4',
'title': title, 'url': video_url,
} 'title': title,
}

View File

@ -1,10 +1,10 @@
# encoding: utf-8 # encoding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import unified_strdate
class CanalplusIE(InfoExtractor): class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))' _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
@ -25,16 +25,15 @@ class CanalplusIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.groupdict().get('id')
if video_id is None: if video_id is None:
webpage = self._download_webpage(url, mobj.group('path')) webpage = self._download_webpage(url, mobj.group('path'))
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id') video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id info_url = self._VIDEO_INFO_TEMPLATE % video_id
info_page = self._download_webpage(info_url,video_id, doc = self._download_xml(info_url,video_id,
u'Downloading video info') u'Downloading video info')
self.report_extraction(video_id) self.report_extraction(video_id)
doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
video_info = [video for video in doc if video.find('ID').text == video_id][0] video_info = [video for video in doc if video.find('ID').text == video_id][0]
infos = video_info.find('INFOS') infos = video_info.find('INFOS')
media = video_info.find('MEDIA') media = video_info.find('MEDIA')

View File

@ -0,0 +1,30 @@
import re
from .common import InfoExtractor
class CBSIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*'
_TEST = {
u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
u'file': u'4JUVEwq3wUT7.flv',
u'info_dict': {
u'title': u'Connect Chat feat. Garth Brooks',
u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
u'duration': 1495,
},
u'params': {
# rtmp download
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';",
webpage, u'real video ID')
return self.url_result(u'theplatform:%s' % real_id)

View File

@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
compat_urllib_parse,
compat_urllib_parse_urlparse,
ExtractorError,
)
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
_TESTS = [
{
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
'info_dict': {
'id': '213512120230004',
'ext': 'flv',
'title': 'První republika: Španělská chřipka',
'duration': 3107.4,
},
'params': {
'skip_download': True, # requires rtmpdump
},
'skip': 'Works only from Czech Republic.',
},
{
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
'info_dict': {
'id': '20138143440',
'ext': 'flv',
'title': 'Tsatsiki, maminka a policajt',
'duration': 6754.1,
},
'params': {
'skip_download': True, # requires rtmpdump
},
'skip': 'Works only from Czech Republic.',
},
{
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
'info_dict': {
'id': '14716',
'ext': 'flv',
'title': 'První republika: Zpěvačka z Dupárny Bobina',
'duration': 90,
},
'params': {
'skip_download': True, # requires rtmpdump
},
},
]
def _real_extract(self, url):
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
data = {
'playlist[0][type]': typ,
'playlist[0][id]': episode_id,
'requestUrl': compat_urllib_parse_urlparse(url).path,
'requestSource': 'iVysilani',
}
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
data=compat_urllib_parse.urlencode(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
req.add_header('x-addr', '127.0.0.1')
req.add_header('X-Requested-With', 'XMLHttpRequest')
req.add_header('Referer', url)
playlistpage = self._download_json(req, video_id)
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
req.add_header('Referer', url)
playlist = self._download_xml(req, video_id)
formats = []
for i in playlist.find('smilRoot/body'):
if 'AD' not in i.attrib['id']:
base_url = i.attrib['base']
parsedurl = compat_urllib_parse_urlparse(base_url)
duration = i.attrib['duration']
for video in i.findall('video'):
if video.attrib['label'] != 'AD':
format_id = video.attrib['label']
play_path = video.attrib['src']
vbr = int(video.attrib['system-bitrate'])
formats.append({
'format_id': format_id,
'url': base_url,
'vbr': vbr,
'play_path': play_path,
'app': parsedurl.path[1:] + '?' + parsedurl.query,
'rtmp_live': True,
'ext': 'flv',
})
self._sort_formats(formats)
return {
'id': episode_id,
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
'duration': float(duration),
'formats': formats,
}

View File

@ -0,0 +1,273 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class Channel9IE(InfoExtractor):
'''
Common extractor for channel9.msdn.com.
The type of provided URL (video or playlist) is determined according to
meta Search.PageType from web page HTML rather than URL itself, as it is
not always possible to do.
'''
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?'
_TESTS = [
{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
'info_dict': {
'id': 'Events/TechEd/Australia/2013/KOS002',
'ext': 'mp4',
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
'duration': 4576,
'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg',
'session_code': 'KOS002',
'session_day': 'Day 1',
'session_room': 'Arena 1A',
'session_speakers': [ 'Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen' ],
},
},
{
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
'info_dict': {
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
'ext': 'mp4',
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'duration': 1540,
'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg',
'authors': [ 'Mike Wilmot' ],
},
}
]
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
# Sorted by quality
_known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4']
def _restore_bytes(self, formatted_size):
if not formatted_size:
return 0
m = re.match(r'^(?P<size>\d+(?:\.\d+)?)\s+(?P<units>[a-zA-Z]+)', formatted_size)
if not m:
return 0
units = m.group('units')
try:
exponent = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'].index(units.upper())
except ValueError:
return 0
size = float(m.group('size'))
return int(size * (1024 ** exponent))
def _formats_from_html(self, html):
FORMAT_REGEX = r'''
(?x)
<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
(?:<div\s+class="popup\s+rounded">\s*
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
</div>)? # File size part may be missing
'''
# Extract known formats
formats = [{
'url': x.group('url'),
'format_id': x.group('quality'),
'format_note': x.group('note'),
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
'filesize': self._restore_bytes(x.group('filesize')), # File size is approximate
'preference': self._known_formats.index(x.group('quality')),
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
} for x in list(re.finditer(FORMAT_REGEX, html)) if x.group('quality') in self._known_formats]
self._sort_formats(formats)
return formats
def _extract_title(self, html):
title = self._html_search_meta('title', html, 'title')
if title is None:
title = self._og_search_title(html)
TITLE_SUFFIX = ' (Channel 9)'
if title is not None and title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
return title
def _extract_description(self, html):
DESCRIPTION_REGEX = r'''(?sx)
<div\s+class="entry-content">\s*
<div\s+id="entry-body">\s*
(?P<description>.+?)\s*
</div>\s*
</div>
'''
m = re.search(DESCRIPTION_REGEX, html)
if m is not None:
return m.group('description')
return self._html_search_meta('description', html, 'description')
def _extract_duration(self, html):
m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
def _extract_slides(self, html):
m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
return m.group('slidesurl') if m is not None else None
def _extract_zip(self, html):
m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
return m.group('zipurl') if m is not None else None
def _extract_avg_rating(self, html):
m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
return float(m.group('avgrating')) if m is not None else 0
def _extract_rating_count(self, html):
m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
def _extract_view_count(self, html):
m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
def _extract_comment_count(self, html):
m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
def _fix_count(self, count):
return int(str(count).replace(',', '')) if count is not None else None
def _extract_authors(self, html):
m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
if m is None:
return None
return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
def _extract_session_code(self, html):
m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
return m.group('code') if m is not None else None
def _extract_session_day(self, html):
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
return m.group('day') if m is not None else None
def _extract_session_room(self, html):
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
return m.group('room') if m is not None else None
def _extract_session_speakers(self, html):
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
def _extract_content(self, html, content_path):
# Look for downloadable content
formats = self._formats_from_html(html)
slides = self._extract_slides(html)
zip_ = self._extract_zip(html)
# Nothing to download
if len(formats) == 0 and slides is None and zip_ is None:
self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
return
# Extract meta
title = self._extract_title(html)
description = self._extract_description(html)
thumbnail = self._og_search_thumbnail(html)
duration = self._extract_duration(html)
avg_rating = self._extract_avg_rating(html)
rating_count = self._extract_rating_count(html)
view_count = self._extract_view_count(html)
comment_count = self._extract_comment_count(html)
common = {'_type': 'video',
'id': content_path,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'avg_rating': avg_rating,
'rating_count': rating_count,
'view_count': view_count,
'comment_count': comment_count,
}
result = []
if slides is not None:
d = common.copy()
d.update({ 'title': title + '-Slides', 'url': slides })
result.append(d)
if zip_ is not None:
d = common.copy()
d.update({ 'title': title + '-Zip', 'url': zip_ })
result.append(d)
if len(formats) > 0:
d = common.copy()
d.update({ 'title': title, 'formats': formats })
result.append(d)
return result
def _extract_entry_item(self, html, content_path):
contents = self._extract_content(html, content_path)
if contents is None:
return contents
authors = self._extract_authors(html)
for content in contents:
content['authors'] = authors
return contents
def _extract_session(self, html, content_path):
contents = self._extract_content(html, content_path)
if contents is None:
return contents
session_meta = {'session_code': self._extract_session_code(html),
'session_day': self._extract_session_day(html),
'session_room': self._extract_session_room(html),
'session_speakers': self._extract_session_speakers(html),
}
for content in contents:
content.update(session_meta)
return contents
def _extract_list(self, content_path):
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
entries = [self.url_result(session_url.text, 'Channel9')
for session_url in rss.findall('./channel/item/link')]
title_text = rss.find('./channel/title').text
return self.playlist_result(entries, content_path, title_text)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
content_path = mobj.group('contentpath')
webpage = self._download_webpage(url, content_path, 'Downloading web page')
page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage)
if page_type_m is None:
raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True)
page_type = page_type_m.group('pagetype')
if page_type == 'List': # List page, may contain list of 'item'-like objects
return self._extract_list(content_path)
elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content
return self._extract_entry_item(webpage, content_path)
elif page_type == 'Session': # Event session page, may contain downloadable content
return self._extract_session(webpage, content_path)
else:
raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True)

View File

@ -0,0 +1,97 @@
from __future__ import unicode_literals
import re
import base64
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError
)
class ChilloutzoneIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+)\.html'
_TESTS = [{
'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
'md5': 'a76f3457e813ea0037e5244f509e66d1',
'info_dict': {
'id': 'enemene-meck-alle-katzen-weg',
'ext': 'mp4',
'title': 'Enemene Meck - Alle Katzen weg',
'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?',
},
}, {
'note': 'Video hosted at YouTube',
'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html',
'info_dict': {
'id': '1YVQaAgHyRU',
'ext': 'mp4',
'title': '16 Photos Taken 1 Second Before Disaster',
'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814',
'uploader': 'BuzzFeedVideo',
'uploader_id': 'BuzzFeedVideo',
'upload_date': '20131105',
},
}, {
'note': 'Video hosted at Vimeo',
'url': 'http://www.chilloutzone.net/video/icon-blending.html',
'md5': '2645c678b8dc4fefcc0e1b60db18dac1',
'info_dict': {
'id': '85523671',
'ext': 'mp4',
'title': 'The Sunday Times - Icons',
'description': 'md5:3e1c0dc6047498d6728dcdaad0891762',
'uploader': 'Us',
'uploader_id': 'usfilms',
'upload_date': '20140131'
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data')
decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
video_info_dict = json.loads(decoded_video_info)
# get video information from dict
video_url = video_info_dict['mediaUrl']
description = clean_html(video_info_dict.get('description'))
title = video_info_dict['title']
native_platform = video_info_dict['nativePlatform']
native_video_id = video_info_dict['nativeVideoId']
source_priority = video_info_dict['sourcePriority']
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None:
youtube_url = self._html_search_regex(
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
webpage, 'fallback video URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, ie='Youtube')
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN
if source_priority == 'native':
if native_platform == 'youtube':
return self.url_result(native_video_id, ie='Youtube')
if native_platform == 'vimeo':
return self.url_result(
'http://vimeo.com/' + native_video_id, ie='Vimeo')
if not video_url:
raise ExtractorError('No video found')
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'description': description,
}

View File

@ -1,4 +1,5 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -8,77 +9,69 @@ from ..utils import (
class CinemassacreIE(InfoExtractor): class CinemassacreIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?' _VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
_TESTS = [{ _TESTS = [
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', {
u'file': u'19911.flv', 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
u'info_dict': { 'file': '19911.mp4',
u'upload_date': u'20121110', 'md5': 'fde81fbafaee331785f58cd6c0d46190',
u'title': u'“Angry Video Game Nerd: The Movie” Trailer', 'info_dict': {
u'description': u'md5:fb87405fcb42a331742a0dce2708560b', 'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
}, },
u'params': { {
# rtmp download 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
u'skip_download': True, 'file': '521be8ef82b16.mp4',
}, 'md5': 'd72f10cd39eac4215048f62ab477a511',
}, 'info_dict': {
{ 'upload_date': '20131002',
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', 'title': 'The Mummys Hand (1940)',
u'file': u'521be8ef82b16.flv', },
u'info_dict': { }
u'upload_date': u'20131002', ]
u'title': u'The Mummys Hand (1940)',
},
u'params': {
# rtmp download
u'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://' + mobj.group('url') webpage = self._download_webpage(url, None) # Don't know video id yet
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
if not mobj: if not mobj:
raise ExtractorError(u'Can\'t extract embed url and video id') raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group(u'embed_url') playerdata_url = mobj.group('embed_url')
video_id = mobj.group(u'video_id') video_id = mobj.group('video_id')
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|', video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
webpage, u'title') webpage, 'title')
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>', video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
webpage, u'description', flags=re.DOTALL, fatal=False) webpage, 'description', flags=re.DOTALL, fatal=False)
if len(video_description) == 0: if len(video_description) == 0:
video_description = None video_description = None
playerdata = self._download_webpage(playerdata_url, video_id) playerdata = self._download_webpage(playerdata_url, video_id)
base_url = self._html_search_regex(r'\'streamer\': \'(?P<base_url>rtmp://.*?)/(?:vod|Cinemassacre)\'',
playerdata, u'base_url') sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
base_url += '/Cinemassacre/' hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
# Important: The file names in playerdata are not used by the player and even wrong for some videos video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
sd_file = 'Cinemassacre-%s_high.mp4' % video_id
hd_file = 'Cinemassacre-%s.mp4' % video_id
video_thumbnail = 'http://image.screenwavemedia.com/Cinemassacre/Cinemassacre-%s_thumb_640x360.jpg' % video_id
formats = [ formats = [
{ {
'url': base_url + sd_file, 'url': sd_url,
'ext': 'flv', 'ext': 'mp4',
'format': 'sd', 'format': 'sd',
'format_id': 'sd', 'format_id': 'sd',
}, },
{ {
'url': base_url + hd_file, 'url': hd_url,
'ext': 'flv', 'ext': 'mp4',
'format': 'hd', 'format': 'hd',
'format_id': 'hd', 'format_id': 'hd',
}, },
] ]
info = { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'formats': formats, 'formats': formats,
@ -86,6 +79,3 @@ class CinemassacreIE(InfoExtractor):
'upload_date': video_date, 'upload_date': video_date,
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,58 @@
import re
import time
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import ExtractorError
class ClipfishIE(InfoExtractor):
IE_NAME = u'clipfish'
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
_TEST = {
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
u'file': u'3966754.mp4',
u'md5': u'2521cd644e862936cf2e698206e47385',
u'info_dict': {
u'title': u'FIFA 14 - E3 2013 Trailer',
u'duration': 82,
},
u'skip': 'Blocked in the US'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
(video_id, int(time.time())))
doc = self._download_xml(
info_url, video_id, note=u'Downloading info page')
title = doc.find('title').text
video_url = doc.find('filename').text
if video_url is None:
xml_bytes = xml.etree.ElementTree.tostring(doc)
raise ExtractorError(u'Cannot find video URL in document %r' %
xml_bytes)
thumbnail = doc.find('imageurl').text
duration_str = doc.find('duration').text
m = re.match(
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
duration_str)
if m:
duration = (
(int(m.group('hours')) * 60 * 60) +
(int(m.group('minutes')) * 60) +
(int(m.group('seconds')))
)
else:
duration = None
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@ -0,0 +1,56 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
translation_table = {
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
'y': 'l', 'z': 'i',
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
}
class CliphunterIE(InfoExtractor):
IE_NAME = 'cliphunter'
_VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/
(?P<id>[0-9]+)/
(?P<seo>.+?)(?:$|[#\?])
'''
_TEST = {
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
'file': '1012420.flv',
'md5': '15e7740f30428abf70f4223478dc1225',
'info_dict': {
'title': 'Fun Jynx Maze solo',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
pl_fiji = self._search_regex(
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
pl_c_qual = self._search_regex(
r'pl_c_qual = "(.)"', webpage, 'video quality')
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
video_url = ''.join(translation_table.get(c, c) for c in pl_fiji)
formats = [{
'url': video_url,
'format_id': pl_c_qual,
}]
return {
'id': video_id,
'title': video_title,
'formats': formats,
}

View File

@ -0,0 +1,50 @@
import re
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
fix_xml_ampersands
)
class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
u'info_dict': {
u'id': u'4629301',
u'ext': u'mp4',
u'title': u'Brick Briscoe',
u'duration': 612,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
js_player = self._download_webpage(
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
video_id, u'Downlaoding player')
# it includes a required token
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
pdoc = self._download_xml(
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
video_id, u'Downloading video info',
transform_source=fix_xml_ampersands)
track_doc = pdoc.find('trackList/track')
def find_param(name):
node = find_xpath_attr(track_doc, './/param', 'name', name)
if node is not None:
return node.attrib['value']
return {
'id': video_id,
'title': find_param('title'),
'url': track_doc.find('location').text,
'thumbnail': find_param('thumbnail'),
'duration': int(find_param('duration')),
}

View File

@ -0,0 +1,19 @@
from .mtv import MTVIE
class CMTIE(MTVIE):
IE_NAME = u'cmt.com'
_VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
_TESTS = [
{
u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
u'info_dict': {
u'id': u'989124',
u'ext': u'mp4',
u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
u'description': u'Blame It All On My Roots',
},
},
]

View File

@ -1,30 +1,38 @@
from __future__ import unicode_literals
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext from ..utils import (
int_or_none,
parse_duration,
url_basename,
)
class CNNIE(InfoExtractor): class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/ _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{ _TESTS = [{
u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4', 'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
u'md5': u'3e6121ea48df7e2259fe73a0628605c4', 'md5': '3e6121ea48df7e2259fe73a0628605c4',
u'info_dict': { 'info_dict': {
u'title': u'Nadal wins 8th French Open title', 'title': 'Nadal wins 8th French Open title',
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
'duration': 135,
'upload_date': '20130609',
}, },
}, },
{ {
u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e", "md5": "b5cc60c60a3477d185af8f19a2a26f4e",
u"info_dict": { "info_dict": {
u"title": "Student's epic speech stuns new freshmen", "title": "Student's epic speech stuns new freshmen",
u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"" "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
"upload_date": "20130821",
} }
}] }]
@ -32,27 +40,87 @@ class CNNIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
path = mobj.group('path') path = mobj.group('path')
page_title = mobj.group('title') page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path info_url = 'http://cnn.com/video/data/3.0/%s/index.xml' % path
info_xml = self._download_webpage(info_url, page_title) info = self._download_xml(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
formats = [] formats = []
rex = re.compile(r'''(?x)
(?P<width>[0-9]+)x(?P<height>[0-9]+)
(?:_(?P<bitrate>[0-9]+)k)?
''')
for f in info.findall('files/file'): for f in info.findall('files/file'):
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate']) video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip())
if mf is not None: fdct = {
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text)) 'format_id': f.attrib['bitrate'],
formats = sorted(formats) 'url': video_url,
(_,_,_, video_path) = formats[-1] }
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
mf = rex.match(f.attrib['bitrate'])
if mf:
fdct['width'] = int(mf.group('width'))
fdct['height'] = int(mf.group('height'))
fdct['tbr'] = int_or_none(mf.group('bitrate'))
else:
mf = rex.search(f.text)
if mf:
fdct['width'] = int(mf.group('width'))
fdct['height'] = int(mf.group('height'))
fdct['tbr'] = int_or_none(mf.group('bitrate'))
else:
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
if mi:
if mi.group(1) == 'audio':
fdct['vcodec'] = 'none'
fdct['ext'] = 'm4a'
else:
fdct['tbr'] = int(mi.group(1))
formats.append(fdct)
self._sort_formats(formats)
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
return {'id': info.attrib['id'], metas_el = info.find('metas')
'title': info.find('headline').text, upload_date = (
'url': video_url, metas_el.attrib.get('version') if metas_el is not None else None)
'ext': determine_ext(video_url),
'thumbnail': thumbnails[-1][1], duration_el = info.find('length')
'thumbnails': thumbs_dict, duration = parse_duration(duration_el.text)
'description': info.find('description').text,
} return {
'id': info.attrib['id'],
'title': info.find('headline').text,
'formats': formats,
'thumbnail': thumbnails[-1][1],
'thumbnails': thumbs_dict,
'description': info.find('description').text,
'duration': duration,
'upload_date': upload_date,
}
class CNNBlogsIE(InfoExtractor):
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return {
'_type': 'url',
'url': cnn_url,
'ie_key': CNNIE.ie_key(),
}

View File

@ -1,87 +1,102 @@
from __future__ import unicode_literals
import json
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import int_or_none
compat_urllib_parse_urlparse,
determine_ext,
ExtractorError,
)
class CollegeHumorIE(InfoExtractor): class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TESTS = [{ _TESTS = [{
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
u'file': u'6902724.mp4', 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
u'md5': u'1264c12ad95dca142a9f0bf7968105a0', 'info_dict': {
u'info_dict': { 'id': '6902724',
u'title': u'Comic-Con Cosplay Catastrophe', 'ext': 'mp4',
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.', 'title': 'Comic-Con Cosplay Catastrophe',
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
'age_limit': 13,
'duration': 187,
}, },
}, },
{ {
u'url': u'http://www.collegehumor.com/video/3505939/font-conference', 'url': 'http://www.collegehumor.com/video/3505939/font-conference',
u'file': u'3505939.mp4', 'md5': '72fa701d8ef38664a4dbb9e2ab721816',
u'md5': u'c51ca16b82bb456a4397987791a835f5', 'info_dict': {
u'info_dict': { 'id': '3505939',
u'title': u'Font Conference', 'ext': 'mp4',
u'description': u'This video wasn\'t long enough, so we made it double-spaced.', 'title': 'Font Conference',
'description': "This video wasn't long enough, so we made it double-spaced.",
'age_limit': 10,
'duration': 179,
}, },
}] },
# embedded youtube video
{
'url': 'http://www.collegehumor.com/embed/6950306',
'info_dict': {
'id': 'Z-bao9fg6Yc',
'ext': 'mp4',
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
'uploader': 'Mark Dice',
'uploader_id': 'MarkDice',
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
'upload_date': '20140127',
},
'params': {
'skip_download': True,
},
'add_ie': ['Youtube'],
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
info = { jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json'
'id': video_id, data = json.loads(self._download_webpage(
'uploader': None, jsonUrl, video_id, 'Downloading info JSON'))
'upload_date': None, vdata = data['video']
} if vdata.get('youtubeId') is not None:
return {
'_type': 'url',
'url': vdata['youtubeId'],
'ie_key': 'Youtube',
}
self.report_extraction(video_id) AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id rating = vdata.get('rating')
metaXml = self._download_webpage(xmlUrl, video_id, if rating:
u'Downloading info XML', age_limit = AGE_LIMITS.get(rating.lower())
u'Unable to download video info XML')
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try:
videoNode = mdoc.findall('./video')[0]
youtubeIdNode = videoNode.find('./youtubeID')
if youtubeIdNode is not None:
return self.url_result(youtubeIdNode.text, 'Youtube')
info['description'] = videoNode.findall('./description')[0].text
info['title'] = videoNode.findall('./caption')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
next_url = videoNode.findall('./file')[0].text
except IndexError:
raise ExtractorError(u'Invalid metadata XML file')
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
adoc = xml.etree.ElementTree.fromstring(manifestXml)
try:
media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
node_id = media_node.attrib['url']
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err:
raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
info['ext'] = 'mp4'
else: else:
# Old-style direct links age_limit = None # None = No idea
info['url'] = next_url
info['ext'] = determine_ext(info['url'])
return info PREFS = {'high_quality': 2, 'low_quality': 0}
formats = []
for format_key in ('mp4', 'webm'):
for qname, qurl in vdata.get(format_key, {}).items():
formats.append({
'format_id': format_key + '_' + qname,
'url': qurl,
'format': format_key,
'preference': PREFS.get(qname),
})
self._sort_formats(formats)
duration = int_or_none(vdata.get('duration'), 1000)
like_count = int_or_none(vdata.get('likes'))
return {
'id': video_id,
'title': vdata['title'],
'description': vdata.get('description'),
'thumbnail': vdata.get('thumbnail'),
'formats': formats,
'age_limit': age_limit,
'duration': duration,
'like_count': like_count,
}

View File

@ -1,7 +1,9 @@
from __future__ import unicode_literals
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from .mtv import MTVServicesInfoExtractor
from ..utils import ( from ..utils import (
compat_str, compat_str,
compat_urllib_parse, compat_urllib_parse,
@ -11,8 +13,26 @@ from ..utils import (
) )
class ComedyCentralIE(InfoExtractor): class ComedyCentralIE(MTVServicesInfoExtractor):
IE_DESC = u'The Daily Show / Colbert Report' _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
(video-clips|episodes|cc-studios|video-collections)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TEST = {
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': '4167875aae411f903b751a21f357f1ee',
'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
'ext': 'mp4',
'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
},
}
class ComedyCentralShowsIE(InfoExtractor):
IE_DESC = 'The Daily Show / Colbert Report'
# urls can be abbreviations like :thedailyshow or :colbert # urls can be abbreviations like :thedailyshow or :colbert
# urls for episodes like: # urls for episodes like:
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
@ -29,14 +49,14 @@ class ComedyCentralIE(InfoExtractor):
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?))) extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
$""" $"""
_TEST = { _TEST = {
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart', 'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
u'file': u'422212.mp4', 'file': '422212.mp4',
u'md5': u'4e2f5cb088a83cd8cdb7756132f9739d', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
u'info_dict': { 'info_dict': {
u"upload_date": u"20121214", "upload_date": "20121214",
u"description": u"Kristen Stewart", "description": "Kristen Stewart",
u"uploader": u"thedailyshow", "uploader": "thedailyshow",
u"title": u"thedailyshow-kristen-stewart part 1" "title": "thedailyshow-kristen-stewart part 1"
} }
} }
@ -66,22 +86,22 @@ class ComedyCentralIE(InfoExtractor):
@staticmethod @staticmethod
def _transform_rtmp_url(rtmp_video_url): def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
if not m: if not m:
raise ExtractorError(u'Cannot transform RTMP url') raise ExtractorError('Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid') return base + m.group('finalid')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None: if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError('Invalid URL: %s' % url)
if mobj.group('shortname'): if mobj.group('shortname'):
if mobj.group('shortname') in ('tds', 'thedailyshow'): if mobj.group('shortname') in ('tds', 'thedailyshow'):
url = u'http://www.thedailyshow.com/full-episodes/' url = 'http://www.thedailyshow.com/full-episodes/'
else: else:
url = u'http://www.colbertnation.com/full-episodes/' url = 'http://www.colbertnation.com/full-episodes/'
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
assert mobj is not None assert mobj is not None
@ -107,9 +127,9 @@ class ComedyCentralIE(InfoExtractor):
url = htmlHandle.geturl() url = htmlHandle.geturl()
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None: if mobj is None:
raise ExtractorError(u'Invalid redirected URL: ' + url) raise ExtractorError('Invalid redirected URL: ' + url)
if mobj.group('episode') == '': if mobj.group('episode') == '':
raise ExtractorError(u'Redirected URL is still not specific: ' + url) raise ExtractorError('Redirected URL is still not specific: ' + url)
epTitle = mobj.group('episode') epTitle = mobj.group('episode')
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
@ -121,19 +141,18 @@ class ComedyCentralIE(InfoExtractor):
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage) altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
if len(altMovieParams) == 0: if len(altMovieParams) == 0:
raise ExtractorError(u'unable to find Flash URL in webpage ' + url) raise ExtractorError('unable to find Flash URL in webpage ' + url)
else: else:
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
uri = mMovieParams[0][1] uri = mMovieParams[0][1]
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
indexXml = self._download_webpage(indexUrl, epTitle, idoc = self._download_xml(indexUrl, epTitle,
u'Downloading show index', 'Downloading show index',
u'unable to download episode index') 'unable to download episode index')
results = [] results = []
idoc = xml.etree.ElementTree.fromstring(indexXml)
itemEls = idoc.findall('.//item') itemEls = idoc.findall('.//item')
for partNum,itemEl in enumerate(itemEls): for partNum,itemEl in enumerate(itemEls):
mediaId = itemEl.findall('./guid')[0].text mediaId = itemEl.findall('./guid')[0].text
@ -144,17 +163,16 @@ class ComedyCentralIE(InfoExtractor):
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
compat_urllib_parse.urlencode({'uri': mediaId})) compat_urllib_parse.urlencode({'uri': mediaId}))
configXml = self._download_webpage(configUrl, epTitle, cdoc = self._download_xml(configUrl, epTitle,
u'Downloading configuration for %s' % shortMediaId) 'Downloading configuration for %s' % shortMediaId)
cdoc = xml.etree.ElementTree.fromstring(configXml)
turls = [] turls = []
for rendition in cdoc.findall('.//rendition'): for rendition in cdoc.findall('.//rendition'):
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
turls.append(finfo) turls.append(finfo)
if len(turls) == 0: if len(turls) == 0:
self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found') self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
continue continue
formats = [] formats = []
@ -168,8 +186,8 @@ class ComedyCentralIE(InfoExtractor):
'width': w, 'width': w,
}) })
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
info = { results.append({
'id': shortMediaId, 'id': shortMediaId,
'formats': formats, 'formats': formats,
'uploader': showId, 'uploader': showId,
@ -177,11 +195,6 @@ class ComedyCentralIE(InfoExtractor):
'title': effTitle, 'title': effTitle,
'thumbnail': None, 'thumbnail': None,
'description': compat_str(officialTitle), 'description': compat_str(officialTitle),
} })
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
results.append(info)
return results return results

View File

@ -1,22 +1,28 @@
import base64 import base64
import hashlib
import json
import os import os
import re import re
import socket import socket
import sys import sys
import netrc import netrc
import xml.etree.ElementTree
from ..utils import ( from ..utils import (
compat_http_client, compat_http_client,
compat_urllib_error, compat_urllib_error,
compat_urllib_request, compat_urllib_parse_urlparse,
compat_str, compat_str,
clean_html, clean_html,
compiled_regex_type, compiled_regex_type,
ExtractorError, ExtractorError,
RegexNotFoundError, RegexNotFoundError,
sanitize_filename,
unescapeHTML, unescapeHTML,
) )
_NO_DEFAULT = object()
class InfoExtractor(object): class InfoExtractor(object):
"""Information Extractor class. """Information Extractor class.
@ -32,44 +38,81 @@ class InfoExtractor(object):
The dictionaries must include the following fields: The dictionaries must include the following fields:
id: Video identifier. id: Video identifier.
url: Final video URL.
title: Video title, unescaped. title: Video title, unescaped.
ext: Video filename extension.
Instead of url and ext, formats can also specified. Additionally, it must contain either a formats entry or a url one:
formats: A list of dictionaries for each format available, ordered
from worst to best quality.
Potential fields:
* url Mandatory. The URL of the video file
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height.
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19").
Technically optional, but strongly recommended.
* format_note Additional info about the format
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
* resolution Textual description of width and height
* tbr Average bitrate of audio and video in KBit/s
* abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz
* vbr Average video bitrate in KBit/s
* vcodec Name of the video codec in use
* container Name of the container format
* filesize The number of bytes, if known in advance
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
"http", "https", "rtsp", "rtmp", "m3u8" or so.
* preference Order number of this format. If this field is
present and not None, the formats get sorted
by this field, regardless of all other values.
-1 for default (order by other properties),
-2 or smaller for less than default.
* quality Order number of the video quality of this
format, irrespective of the file format.
-1 for default (order by other properties),
-2 or smaller for less than default.
url: Final video URL.
ext: Video filename extension.
format: The video format, defaults to ext (used for --get-format)
player_url: SWF Player URL (used for rtmpdump).
The following fields are optional: The following fields are optional:
format: The video format, defaults to ext (used for --get-format) display_id An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats",
and display_id "dancing-naked-mole-rats"
thumbnails: A list of dictionaries (with the entries "resolution" and thumbnails: A list of dictionaries (with the entries "resolution" and
"url") for the varying thumbnails "url") for the varying thumbnails
thumbnail: Full URL to a video thumbnail image. thumbnail: Full URL to a video thumbnail image.
description: One-line video description. description: One-line video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
timestamp: UNIX timestamp of the moment the video became available.
upload_date: Video upload date (YYYYMMDD). upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader. uploader_id: Nickname or id of the video uploader.
location: Physical location of the video. location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents as a dictionary in the format subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}. {language: subtitles}.
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file, like_count: Number of positive ratings of the video
like returned by urllib.request.urlopen dislike_count: Number of negative ratings of the video
comment_count: Number of comments on the video
age_limit: Age restriction for the video, as an integer (years) age_limit: Age restriction for the video, as an integer (years)
formats: A list of dictionaries for each format available, it must webpage_url: The url to the video webpage, if given to youtube-dl it
be ordered from worst to best quality. Potential fields: should allow to get the same result again. (It will be set
* url Mandatory. The URL of the video file by YoutubeDL if it's missing)
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from the format_id, width, height
and format_note fields if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19")
* format_note Additional info about the format
("3D" or "DASH video")
* width Width of the video, if known
* height Height of the video, if known
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.
@ -77,9 +120,6 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp. _real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors. Probably, they should also be added to the list of extractors.
_real_extract() must return a *list* of information dictionaries as
described above.
Finally, the _WORKING attribute should be set to False for broken IEs Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests. in order to warn the users and skip the tests.
""" """
@ -141,27 +181,40 @@ class InfoExtractor(object):
def IE_NAME(self): def IE_NAME(self):
return type(self).__name__[:-2] return type(self).__name__[:-2]
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the response handle """ """ Returns the response handle """
if note is None: if note is None:
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
elif note is not False: elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note)) if video_id is None:
self.to_screen(u'%s' % (note,))
else:
self.to_screen(u'%s: %s' % (video_id, note))
try: try:
return compat_urllib_request.urlopen(url_or_request) return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is False:
return False
if errnote is None: if errnote is None:
errnote = u'Unable to download webpage' errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err) errmsg = u'%s: %s' % (errnote, compat_str(err))
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
self._downloader.report_warning(errmsg)
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns a tuple (page content as string, URL handle) """ """ Returns a tuple (page content as string, URL handle) """
# Strip hashes from the URL (#1038) # Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)): if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0] url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote) urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal)
if urlh is False:
assert not fatal
return False
content_type = urlh.headers.get('Content-Type', '') content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read() webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@ -172,6 +225,8 @@ class InfoExtractor(object):
webpage_bytes[:1024]) webpage_bytes[:1024])
if m: if m:
encoding = m.group(1).decode('ascii') encoding = m.group(1).decode('ascii')
elif webpage_bytes.startswith(b'\xff\xfe'):
encoding = 'utf-16'
else: else:
encoding = 'utf-8' encoding = 'utf-8'
if self._downloader.params.get('dump_intermediate_pages', False): if self._downloader.params.get('dump_intermediate_pages', False):
@ -182,12 +237,57 @@ class InfoExtractor(object):
self.to_screen(u'Dumping request to ' + url) self.to_screen(u'Dumping request to ' + url)
dump = base64.b64encode(webpage_bytes).decode('ascii') dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump) self._downloader.to_screen(dump)
if self._downloader.params.get('write_pages', False):
try:
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
if len(url) > 200:
h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
url = url[:200 - len(h)] + h
raw_filename = ('%s_%s.dump' % (video_id, url))
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen(u'Saving request to ' + filename)
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
content = webpage_bytes.decode(encoding, 'replace') content = webpage_bytes.decode(encoding, 'replace')
return (content, urlh) return (content, urlh)
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None): def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
""" Returns the data of the page as a string """ """ Returns the data of the page as a string """
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
if res is False:
return res
else:
content, _ = res
return content
def _download_xml(self, url_or_request, video_id,
note=u'Downloading XML', errnote=u'Unable to download XML',
transform_source=None):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
if transform_source:
xml_string = transform_source(xml_string)
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
def _download_json(self, url_or_request, video_id,
note=u'Downloading JSON metadata',
errnote=u'Unable to download JSON metadata',
transform_source=None):
json_string = self._download_webpage(url_or_request, video_id, note, errnote)
if transform_source:
json_string = transform_source(json_string)
try:
return json.loads(json_string)
except ValueError as ve:
raise ExtractorError('Failed to download JSON', cause=ve)
def report_warning(self, msg, video_id=None):
idstr = u'' if video_id is None else u'%s: ' % video_id
self._downloader.report_warning(
u'[%s] %s%s' % (self.IE_NAME, idstr, msg))
def to_screen(self, msg): def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'""" """Print msg to screen, prefixing it with '[ie_name]'"""
@ -210,14 +310,18 @@ class InfoExtractor(object):
self.to_screen(u'Logging in') self.to_screen(u'Logging in')
#Methods for following #608 #Methods for following #608
def url_result(self, url, ie=None): @staticmethod
def url_result(url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed""" """Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info #TODO: ie should be the class used for getting the info
video_info = {'_type': 'url', video_info = {'_type': 'url',
'url': url, 'url': url,
'ie_key': ie} 'ie_key': ie}
if video_id is not None:
video_info['id'] = video_id
return video_info return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None): @staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
"""Returns a playlist""" """Returns a playlist"""
video_info = {'_type': 'playlist', video_info = {'_type': 'playlist',
'entries': entries} 'entries': entries}
@ -227,7 +331,7 @@ class InfoExtractor(object):
video_info['title'] = playlist_title video_info['title'] = playlist_title
return video_info return video_info
def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0): def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
""" """
Perform a regex search on the given string, using a single or a list of Perform a regex search on the given string, using a single or a list of
patterns returning the first matching group. patterns returning the first matching group.
@ -241,7 +345,7 @@ class InfoExtractor(object):
mobj = re.search(p, string, flags) mobj = re.search(p, string, flags)
if mobj: break if mobj: break
if sys.stderr.isatty() and os.name != 'nt': if os.name != 'nt' and sys.stderr.isatty():
_name = u'\033[0;34m%s\033[0m' % name _name = u'\033[0;34m%s\033[0m' % name
else: else:
_name = name _name = name
@ -249,7 +353,7 @@ class InfoExtractor(object):
if mobj: if mobj:
# return the first matching group # return the first matching group
return next(g for g in mobj.groups() if g is not None) return next(g for g in mobj.groups() if g is not None)
elif default is not None: elif default is not _NO_DEFAULT:
return default return default
elif fatal: elif fatal:
raise RegexNotFoundError(u'Unable to extract %s' % _name) raise RegexNotFoundError(u'Unable to extract %s' % _name)
@ -258,7 +362,7 @@ class InfoExtractor(object):
u'please report this issue on http://yt-dl.org/bug' % _name) u'please report this issue on http://yt-dl.org/bug' % _name)
return None return None
def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0): def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
""" """
Like _search_regex, but strips HTML tags and unescapes entities. Like _search_regex, but strips HTML tags and unescapes entities.
""" """
@ -300,13 +404,21 @@ class InfoExtractor(object):
# Helper functions for extracting OpenGraph info # Helper functions for extracting OpenGraph info
@staticmethod @staticmethod
def _og_regex(prop): def _og_regexes(prop):
return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop) content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
template = r'<meta[^>]+?%s[^>]+?%s'
return [
template % (property_re, content_re),
template % (content_re, property_re),
]
def _og_search_property(self, prop, html, name=None, **kargs): def _og_search_property(self, prop, html, name=None, **kargs):
if name is None: if name is None:
name = 'OpenGraph %s' % prop name = 'OpenGraph %s' % prop
escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs) escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
if escaped is None:
return None
return unescapeHTML(escaped) return unescapeHTML(escaped)
def _og_search_thumbnail(self, html, **kargs): def _og_search_thumbnail(self, html, **kargs):
@ -318,10 +430,22 @@ class InfoExtractor(object):
def _og_search_title(self, html, **kargs): def _og_search_title(self, html, **kargs):
return self._og_search_property('title', html, **kargs) return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', **kargs): def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
return self._html_search_regex([self._og_regex('video:secure_url'), regexes = self._og_regexes('video')
self._og_regex('video')], if secure: regexes = self._og_regexes('video:secure_url') + regexes
html, name, **kargs) return self._html_search_regex(regexes, html, name, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False):
if display_name is None:
display_name = name
return self._html_search_regex(
r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=fatal)
def _dc_search_uploader(self, html):
return self._html_search_meta('dc.creator', html, 'uploader')
def _rta_search(self, html): def _rta_search(self, html):
# See http://www.rtalabel.org/index.php?content=howtofaq#single # See http://www.rtalabel.org/index.php?content=howtofaq#single
@ -331,6 +455,82 @@ class InfoExtractor(object):
return 18 return 18
return 0 return 0
def _media_rating_search(self, html):
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
rating = self._html_search_meta('rating', html)
if not rating:
return None
RATING_TABLE = {
'safe for kids': 0,
'general': 8,
'14 years': 14,
'mature': 17,
'restricted': 19,
}
return RATING_TABLE.get(rating.lower(), None)
def _twitter_search_player(self, html):
return self._html_search_meta('twitter:player', html,
'twitter card player')
def _sort_formats(self, formats):
if not formats:
raise ExtractorError(u'No video formats found')
def _formats_key(f):
# TODO remove the following workaround
from ..utils import determine_ext
if not f.get('ext') and 'url' in f:
f['ext'] = determine_ext(f['url'])
preference = f.get('preference')
if preference is None:
proto = f.get('protocol')
if proto is None:
proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme
preference = 0 if proto in ['http', 'https'] else -0.1
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
preference -= 0.5
if f.get('vcodec') == 'none': # audio only
if self._downloader.params.get('prefer_free_formats'):
ORDER = [u'aac', u'mp3', u'm4a', u'webm', u'ogg', u'opus']
else:
ORDER = [u'webm', u'opus', u'ogg', u'mp3', u'aac', u'm4a']
ext_preference = 0
try:
audio_ext_preference = ORDER.index(f['ext'])
except ValueError:
audio_ext_preference = -1
else:
if self._downloader.params.get('prefer_free_formats'):
ORDER = [u'flv', u'mp4', u'webm']
else:
ORDER = [u'webm', u'flv', u'mp4']
try:
ext_preference = ORDER.index(f['ext'])
except ValueError:
ext_preference = -1
audio_ext_preference = 0
return (
preference,
f.get('quality') if f.get('quality') is not None else -1,
f.get('height') if f.get('height') is not None else -1,
f.get('width') if f.get('width') is not None else -1,
ext_preference,
f.get('tbr') if f.get('tbr') is not None else -1,
f.get('vbr') if f.get('vbr') is not None else -1,
f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference,
f.get('filesize') if f.get('filesize') is not None else -1,
f.get('format_id'),
)
formats.sort(key=_formats_key)
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@ -1,4 +1,5 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
import re import re
import json import json
@ -20,30 +21,31 @@ class CondeNastIE(InfoExtractor):
# The keys are the supported sites and the values are the name to be shown # The keys are the supported sites and the values are the name to be shown
# to the user and in the extractor description. # to the user and in the extractor description.
_SITES = {'wired': u'WIRED', _SITES = {
'gq': u'GQ', 'wired': 'WIRED',
'vogue': u'Vogue', 'gq': 'GQ',
'glamour': u'Glamour', 'vogue': 'Vogue',
'wmagazine': u'W Magazine', 'glamour': 'Glamour',
'vanityfair': u'Vanity Fair', 'wmagazine': 'W Magazine',
} 'vanityfair': 'Vanity Fair',
}
_VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) _VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
_TEST = { _TEST = {
u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
u'file': u'5171b343c2b4c00dd0c1ccb3.mp4', 'file': '5171b343c2b4c00dd0c1ccb3.mp4',
u'md5': u'1921f713ed48aabd715691f774c451f7', 'md5': '1921f713ed48aabd715691f774c451f7',
u'info_dict': { 'info_dict': {
u'title': u'3D Printed Speakers Lit With LED', 'title': '3D Printed Speakers Lit With LED',
u'description': u'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
} }
} }
def _extract_series(self, url, webpage): def _extract_series(self, url, webpage):
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>', title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
webpage, u'series title', flags=re.DOTALL) webpage, 'series title', flags=re.DOTALL)
url_object = compat_urllib_parse_urlparse(url) url_object = compat_urllib_parse_urlparse(url)
base_url = '%s://%s' % (url_object.scheme, url_object.netloc) base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
@ -57,39 +59,41 @@ class CondeNastIE(InfoExtractor):
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>', description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
r'<div class="video-post-content">(.+?)</div>', r'<div class="video-post-content">(.+?)</div>',
], ],
webpage, u'description', webpage, 'description',
fatal=False, flags=re.DOTALL) fatal=False, flags=re.DOTALL)
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage, params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
u'player params', flags=re.DOTALL) 'player params', flags=re.DOTALL)
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id') video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id') player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target') target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
data = compat_urllib_parse.urlencode({'videoId': video_id, data = compat_urllib_parse.urlencode({'videoId': video_id,
'playerId': player_id, 'playerId': player_id,
'target': target, 'target': target,
}) })
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]', base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
webpage, u'base info url', webpage, 'base info url',
default='http://player.cnevids.com/player/loader.js?') default='http://player.cnevids.com/player/loader.js?')
info_url = base_info_url + data info_url = base_info_url + data
info_page = self._download_webpage(info_url, video_id, info_page = self._download_webpage(info_url, video_id,
u'Downloading video info') 'Downloading video info')
video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info') video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
video_info = json.loads(video_info) video_info = json.loads(video_info)
def _formats_sort_key(f): formats = [{
type_ord = 1 if f['type'] == 'video/mp4' else 0 'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
quality_ord = 1 if f['quality'] == 'high' else 0 'url': fdata['src'],
return (quality_ord, type_ord) 'ext': fdata['type'].split('/')[-1],
best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1] 'quality': 1 if fdata['quality'] == 'high' else 0,
} for fdata in video_info['sources'][0]]
self._sort_formats(formats)
return {'id': video_id, return {
'url': best_format['src'], 'id': video_id,
'ext': best_format['type'].split('/')[-1], 'formats': formats,
'title': video_info['title'], 'title': video_info['title'],
'thumbnail': video_info['poster_frame'], 'thumbnail': video_info['poster_frame'],
'description': description, 'description': description,
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -0,0 +1,188 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import json
import base64
import zlib
from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
bytes_to_intlist,
intlist_to_bytes,
unified_strdate,
clean_html,
)
from ..aes import (
aes_cbc_decrypt,
inc,
)
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TEST = {
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
'info_dict': {
'id': '645513',
'ext': 'flv',
'title': 'Wanna be the Strongest in the World Episode 1 An Idol-Wrestler is Born!',
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
'upload_date': '20131013',
},
'params': {
# rtmp
'skip_download': True,
},
}
_FORMAT_IDS = {
'360': ('60', '106'),
'480': ('61', '106'),
'720': ('62', '106'),
'1080': ('80', '108'),
}
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(data)
iv = bytes_to_intlist(iv)
id = int(id)
def obfuscate_key_aux(count, modulo, start):
output = list(start)
for _ in range(count):
output.append(output[-1] + output[-2])
# cut off start values
output = output[2:]
output = list(map(lambda x: x % modulo + 33, output))
return output
def obfuscate_key(key):
num1 = int(floor(pow(2, 25) * sqrt(6.9)))
num2 = (num1 ^ key) << 5
num3 = key ^ num1
num4 = num3 ^ (num3 >> 3) ^ num2
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
# Extend 160 Bit hash to 256 Bit
return shaHash + [0] * 12
key = obfuscate_key(id)
class Counter:
__value = iv
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
def _convert_subtitles_to_srt(self, subtitles):
output = ''
for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1):
start = start.replace('.', ',')
end = end.replace('.', ',')
text = clean_html(text)
text = text.replace('\\N', '\n')
if not text:
continue
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
return output
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
if mobj.group('prefix') == 'm':
mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
else:
webpage_url = 'http://www.' + mobj.group('url')
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
if note_m:
raise ExtractorError(note_m)
mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
if mobj:
msg = json.loads(mobj.group('msg'))
if msg.get('type') == 'error':
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
video_title = re.sub(r' {2,}', ' ', video_title)
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
if not video_description:
video_description = None
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
if video_upload_date:
video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
playerdata_req = compat_urllib_request.Request(playerdata_url)
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
formats = []
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt+'p'
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
# urlencode doesn't work!
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
formats.append({
'url': video_url,
'play_path': video_play_path,
'ext': 'flv',
'format': video_format,
'format_id': video_format,
})
subtitles = {}
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
video_id, note='Downloading subtitles for '+sub_name)
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
if not id or not iv or not data:
continue
id = int(id)
iv = base64.b64decode(iv)
data = base64.b64decode(data)
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
return {
'id': video_id,
'title': video_title,
'description': video_description,
'thumbnail': video_thumbnail,
'uploader': video_uploader,
'upload_date': video_upload_date,
'subtitles': subtitles,
'formats': formats,
}

View File

@ -1,51 +1,71 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, unescapeHTML,
find_xpath_attr,
) )
class CSpanIE(InfoExtractor): class CSpanIE(InfoExtractor):
_VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' _VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
_TEST = { IE_DESC = 'C-SPAN'
u'url': u'http://www.c-spanvideo.org/program/HolderonV', _TESTS = [{
u'file': u'315139.flv', 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
u'md5': u'74a623266956f69e4df0068ab6c80fe4', 'md5': '8e44ce11f0f725527daccc453f553eb0',
u'info_dict': { 'info_dict': {
u"title": u"Attorney General Eric Holder on Voting Rights Act Decision" 'id': '315139',
'ext': 'mp4',
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
}, },
u'skip': u'Requires rtmpdump' 'skip': 'Regularly fails on travis, for unknown reasons',
} }, {
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
# For whatever reason, the served video alternates between
# two different ones
#'md5': 'dbb0f047376d457f2ab8b3929cbb2d0c',
'info_dict': {
'id': '340723',
'ext': 'mp4',
'title': 'International Health Care Models',
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
prog_name = mobj.group(1) page_id = mobj.group('id')
webpage = self._download_webpage(url, prog_name) webpage = self._download_webpage(url, page_id)
video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id') video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
data = compat_urllib_parse.urlencode({'programid': video_id,
'dynamic':'1'})
info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
self.report_extraction(video_id) description = self._html_search_regex(
[
# The full description
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
# If the description is small enough the other div is not
# present, otherwise this is a stripped version
r'<p class=\'initial\'>(.*?)</p>'
],
webpage, 'description', flags=re.DOTALL)
title = self._html_search_regex(r'<string name="title">(.*?)</string>', info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
video_info, 'title') data = self._download_json(info_url, video_id)
description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
webpage, 'description',
flags=re.MULTILINE|re.DOTALL)
url = self._search_regex(r'<string name="URL">(.*?)</string>', url = unescapeHTML(data['video']['files'][0]['path']['#text'])
video_info, 'video url')
url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
path = self._search_regex(r'<string name="path">(.*?)</string>',
video_info, 'rtmp play path')
return {'id': video_id, doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
'title': title, video_id)
'ext': 'flv',
'url': url, def find_string(s):
'play_path': path, return find_xpath_attr(doc, './/string', 'name', s).text
'description': description,
'thumbnail': self._og_search_thumbnail(webpage), return {
} 'id': video_id,
'title': find_string('title'),
'url': url,
'description': description,
'thumbnail': find_string('poster'),
}

View File

@ -0,0 +1,25 @@
# encoding: utf-8
from __future__ import unicode_literals
from .canalplus import CanalplusIE
class D8IE(CanalplusIE):
_VALID_URL = r'https?://www\.d8\.tv/.*?/(?P<path>.*)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/d8/%s'
IE_NAME = 'd8.tv'
_TEST = {
'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html',
'file': '966289.flv',
'info_dict': {
'title': 'Campagne intime - Documentaire exceptionnel',
'description': 'md5:d2643b799fb190846ae09c61e59a859f',
'upload_date': '20131108',
},
'params': {
# rtmp
'skip_download': True,
},
'skip': 'videos get deleted after a while',
}

View File

@ -11,6 +11,8 @@ from ..utils import (
get_element_by_attribute, get_element_by_attribute,
get_element_by_id, get_element_by_id,
orderedSet, orderedSet,
str_to_int,
int_or_none,
ExtractorError, ExtractorError,
) )
@ -21,13 +23,23 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
"""Build a request with the family filter disabled""" """Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url) request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off') request.add_header('Cookie', 'family_filter=off')
request.add_header('Cookie', 'ff=off')
return request return request
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion""" """Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
IE_NAME = u'dailymotion' IE_NAME = u'dailymotion'
_FORMATS = [
(u'stream_h264_ld_url', u'ld'),
(u'stream_h264_url', u'standard'),
(u'stream_h264_hq_url', u'hq'),
(u'stream_h264_hd_url', u'hd'),
(u'stream_h264_hd1080_url', u'hd180'),
]
_TESTS = [ _TESTS = [
{ {
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech', u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@ -52,15 +64,26 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
}, },
u'skip': u'VEVO is only available in some countries', u'skip': u'VEVO is only available in some countries',
}, },
# age-restricted video
{
u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
u'file': u'xyh2zz.mp4',
u'md5': u'0d667a7b9cebecc3c89ee93099c4159d',
u'info_dict': {
u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
u'uploader': 'HotWaves1012',
u'age_limit': 18,
}
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
# Extract id and simplified title from URL # Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split('_')[0].split('?')[0] video_id = mobj.group('id')
video_extension = 'mp4'
url = 'http://www.dailymotion.com/video/%s' % video_id url = 'http://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
@ -79,10 +102,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'Vevo video detected: %s' % vevo_id) self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo') return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', age_limit = self._rta_search(webpage)
# Looking for official user
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
webpage, 'video uploader')
video_upload_date = None video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
@ -99,37 +119,49 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title'] msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
raise ExtractorError(msg, expected=True) raise ExtractorError(msg, expected=True)
# TODO: support choosing qualities formats = []
for (key, format_id) in self._FORMATS:
for key in ['stream_h264_hd1080_url','stream_h264_hd_url', video_url = info.get(key)
'stream_h264_hq_url','stream_h264_url', if video_url is not None:
'stream_h264_ld_url']: m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
if info.get(key):#key in info and info[key]: if m_size is not None:
max_quality = key width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
self.to_screen(u'Using %s' % key) else:
break width, height = None, None
else: formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': format_id,
'width': width,
'height': height,
})
if not formats:
raise ExtractorError(u'Unable to extract video URL') raise ExtractorError(u'Unable to extract video URL')
video_url = info[max_quality]
# subtitles # subtitles
video_subtitles = self.extract_subtitles(video_id) video_subtitles = self.extract_subtitles(video_id, webpage)
if self._downloader.params.get('listsubtitles', False): if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id) self._list_available_subtitles(video_id, webpage)
return return
return [{ view_count = self._search_regex(
r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
if view_count is not None:
view_count = str_to_int(view_count)
return {
'id': video_id, 'id': video_id,
'url': video_url, 'formats': formats,
'uploader': video_uploader, 'uploader': info['owner_screenname'],
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'ext': video_extension,
'subtitles': video_subtitles, 'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url'] 'thumbnail': info['thumbnail_url'],
}] 'age_limit': age_limit,
'view_count': view_count,
}
def _get_available_subtitles(self, video_id): def _get_available_subtitles(self, video_id, webpage):
try: try:
sub_list = self._download_webpage( sub_list = self._download_webpage(
'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@ -158,7 +190,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
webpage = self._download_webpage(request, webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum) id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) playlist_el = get_element_by_attribute(u'class', u'row video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el)) video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:

View File

@ -1,26 +1,28 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
determine_ext,
) )
class DaumIE(InfoExtractor): class DaumIE(InfoExtractor):
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)' _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
IE_NAME = u'daum.net' IE_NAME = 'daum.net'
_TEST = { _TEST = {
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
u'file': u'52554690.mp4', 'info_dict': {
u'info_dict': { 'id': '52554690',
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부', 'ext': 'mp4',
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부', 'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
u'upload_date': u'20130831', 'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
u'duration': 3868, 'upload_date': '20130831',
'duration': 3868,
}, },
} }
@ -29,17 +31,16 @@ class DaumIE(InfoExtractor):
video_id = mobj.group(1) video_id = mobj.group(1)
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id) webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"', full_id = self._search_regex(
webpage, u'full id') r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
webpage, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id}) query = compat_urllib_parse.urlencode({'vid': full_id})
info_xml = self._download_webpage( info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info') 'Downloading video info')
urls_xml = self._download_webpage( urls = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info') video_id, 'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id) self.to_screen(u'%s: Getting video urls' % video_id)
formats = [] formats = []
@ -49,18 +50,16 @@ class DaumIE(InfoExtractor):
'vid': full_id, 'vid': full_id,
'profile': profile, 'profile': profile,
}) })
url_xml = self._download_webpage( url_doc = self._download_xml(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query, 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False) video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text format_url = url_doc.find('result/url').text
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'ext': determine_ext(format_url),
'format_id': profile, 'format_id': profile,
}) })
info = { return {
'id': video_id, 'id': video_id,
'title': info.find('TITLE').text, 'title': info.find('TITLE').text,
'formats': formats, 'formats': formats,
@ -69,6 +68,3 @@ class DaumIE(InfoExtractor):
'duration': int(info.find('DURATION').text), 'duration': int(info.find('DURATION').text),
'upload_date': info.find('REGDTTM').text[:8], 'upload_date': info.find('REGDTTM').text[:8],
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
import json import json
@ -5,15 +7,14 @@ from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor): class DefenseGouvFrIE(InfoExtractor):
_IE_NAME = 'defense.gouv.fr' IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
r'ligthboxvideo/base-de-medias/webtv/(.*)') r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = { _TEST = {
u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/' 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'), 'file': '11213.mp4',
u'file': u'11213.mp4', 'md5': '75bba6124da7e63d2d60b5244ec9430c',
u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
"info_dict": { "info_dict": {
"title": "attaque-chimique-syrienne-du-21-aout-2013-1" "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
} }

View File

@ -1,60 +0,0 @@
import re
import os
import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
compat_str,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
)
class DepositFilesIE(InfoExtractor):
"""Information extractor for depositfiles.com"""
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
def _real_extract(self, url):
file_id = url.split('/')[-1]
# Rebuild url in english locale
url = 'http://depositfiles.com/en/files/' + file_id
# Retrieve file webpage with 'Free download' button pressed
free_download_indication = { 'gateway_result' : '1' }
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
try:
self.report_download_webpage(file_id)
webpage = compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
# Search for the real file URL
mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
if (mobj is None) or (mobj.group(1) is None):
# Try to figure out reason of the error.
mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
if (mobj is not None) and (mobj.group(1) is not None):
restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
raise ExtractorError(u'%s' % restriction_message)
else:
raise ExtractorError(u'Unable to extract download URL from: %s' % url)
file_url = mobj.group(1)
file_extension = os.path.splitext(file_url)[1][1:]
# Search for file title
file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
return [{
'id': file_id.decode('utf-8'),
'url': file_url.decode('utf-8'),
'uploader': None,
'upload_date': None,
'title': file_title,
'ext': file_extension.decode('utf-8'),
}]

View File

@ -0,0 +1,46 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class DiscoveryIE(InfoExtractor):
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
_TEST = {
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
'file': '614784.mp4',
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
'info_dict': {
'title': 'MythBusters: Mission Impossible Outtakes',
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
' each other -- to the point of confusing Jamie\'s dog -- and '
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
' back.'),
'duration': 156,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
webpage, 'video list', flags=re.DOTALL)
video_list = json.loads(video_list_json)
info = video_list['clips'][0]
formats = []
for f in info['mp4']:
formats.append(
{'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
return {
'id': info['contentId'],
'title': video_list['name'],
'formats': formats,
'description': info['videoCaption'],
'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
'duration': info['duration'],
}

View File

@ -1,41 +1,42 @@
from __future__ import unicode_literals
import re import re
import json
import time import time
from .common import InfoExtractor from .common import InfoExtractor
class DotsubIE(InfoExtractor): class DotsubIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?dotsub\.com/view/([^/]+)' _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
_TEST = { _TEST = {
u'url': u'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', 'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
u'file': u'aed3b8b2-1889-4df5-ae63-ad85f5572f27.flv', 'md5': '0914d4d69605090f623b7ac329fea66e',
u'md5': u'0914d4d69605090f623b7ac329fea66e', 'info_dict': {
u'info_dict': { 'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
u"title": u"Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary", 'ext': 'flv',
u"uploader": u"4v4l0n42", 'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
u'description': u'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', 'uploader': '4v4l0n42',
u'thumbnail': u'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', 'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
u'upload_date': u'20101213', 'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
'upload_date': '20101213',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1) video_id = mobj.group('id')
info_url = "https://dotsub.com/api/media/%s/metadata" %(video_id) info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
webpage = self._download_webpage(info_url, video_id) info = self._download_json(info_url, video_id)
info = json.loads(webpage)
date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds date = time.gmtime(info['dateCreated']/1000) # The timestamp is in miliseconds
return [{ return {
'id': video_id, 'id': video_id,
'url': info['mediaURI'], 'url': info['mediaURI'],
'ext': 'flv', 'ext': 'flv',
'title': info['title'], 'title': info['title'],
'thumbnail': info['screenshotURI'], 'thumbnail': info['screenshotURI'],
'description': info['description'], 'description': info['description'],
'uploader': info['user'], 'uploader': info['user'],
'view_count': info['numberOfViews'], 'view_count': info['numberOfViews'],
'upload_date': u'%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), 'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
}] }

View File

@ -1,22 +1,20 @@
# coding: utf-8 # coding: utf-8
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
unified_strdate, unified_strdate,
) )
class DreiSatIE(InfoExtractor): class DreiSatIE(InfoExtractor):
IE_NAME = '3sat' IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = { _TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
u'file': u'36983.webm', u'file': u'36983.mp4',
u'md5': u'57c97d0469d71cf874f6815aa2b7c944', u'md5': u'9dcfe344732808dbfcc901537973c922',
u'info_dict': { u'info_dict': {
u"title": u"Kaffeeland Schweiz", u"title": u"Kaffeeland Schweiz",
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
@ -30,8 +28,7 @@ class DreiSatIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details') details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
thumbnail_els = details_doc.findall('.//teaserimage') thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{ thumbnails = [{
@ -54,20 +51,14 @@ class DreiSatIE(InfoExtractor):
'width': int(fe.find('./width').text), 'width': int(fe.find('./width').text),
'height': int(fe.find('./height').text), 'height': int(fe.find('./height').text),
'url': fe.find('./url').text, 'url': fe.find('./url').text,
'ext': determine_ext(fe.find('./url').text),
'filesize': int(fe.find('./filesize').text), 'filesize': int(fe.find('./filesize').text),
'video_bitrate': int(fe.find('./videoBitrate').text), 'video_bitrate': int(fe.find('./videoBitrate').text),
'3sat_qualityname': fe.find('./quality').text,
} for fe in format_els } for fe in format_els
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')] if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
def _sortkey(format): self._sort_formats(formats)
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
prefer_http = 1 if 'rtmp' in format['url'] else 0
return (qidx, prefer_http, format['video_bitrate'])
formats.sort(key=_sortkey)
info = { return {
'_type': 'video', '_type': 'video',
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
@ -78,8 +69,3 @@ class DreiSatIE(InfoExtractor):
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': upload_date, 'upload_date': upload_date,
} }
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
import os.path
import re
from .common import InfoExtractor
class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
_TEST = {
'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4',
'md5': '8ae17c51172fb7f93bdd6a214cc8c896',
'info_dict': {
'id': '0qr9sai2veej4f8',
'ext': 'mp4',
'title': 'THE_DOCTOR_GAMES'
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
title = os.path.splitext(mobj.group('title'))[0]
video_url = url + '?dl=1'
return {
'id': video_id,
'title': title,
'url': video_url,
}

View File

@ -1,5 +1,4 @@
import re import re
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext from ..utils import determine_ext
@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
config_xml = self._download_webpage( config = self._download_xml(
'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video_url = config.find('file').text video_url = config.find('file').text
return { return {

View File

@ -1,4 +1,3 @@
import itertools
import json import json
import random import random
import re import re
@ -11,7 +10,7 @@ from ..utils import (
class EightTracksIE(InfoExtractor): class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks' IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$' _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
_TEST = { _TEST = {
u"name": u"EightTracks", u"name": u"EightTracks",
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a", u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
@ -101,7 +100,7 @@ class EightTracksIE(InfoExtractor):
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url next_url = first_url
res = [] res = []
for i in itertools.count(): for i in range(track_count):
api_json = self._download_webpage(next_url, playlist_id, api_json = self._download_webpage(next_url, playlist_id,
note=u'Downloading song information %s/%s' % (str(i+1), track_count), note=u'Downloading song information %s/%s' % (str(i+1), track_count),
errnote=u'Failed to download song information') errnote=u'Failed to download song information')
@ -116,7 +115,5 @@ class EightTracksIE(InfoExtractor):
'ext': 'm4a', 'ext': 'm4a',
} }
res.append(info) res.append(info)
if api_data['set']['at_last_track']:
break
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id']) next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
return res return res

View File

@ -0,0 +1,37 @@
# encoding: utf-8
import re
from .common import InfoExtractor
from .brightcove import BrightcoveIE
from ..utils import ExtractorError
class EitbIE(InfoExtractor):
IE_NAME = u'eitb.tv'
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
_TEST = {
u'add_ie': ['Brightcove'],
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
u'md5': u'edf4436247185adee3ea18ce64c47998',
u'info_dict': {
u'id': u'2743577154001',
u'ext': u'mp4',
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
# All videos from eitb has this description in the brightcove info
u'description': u'.',
u'uploader': u'Euskal Telebista',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
chapter_id = mobj.group('chapter_id')
webpage = self._download_webpage(url, chapter_id)
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
if bc_url is None:
raise ExtractorError(u'Could not extract the Brightcove url')
# The BrightcoveExperience object doesn't contain the video id, we set
# it manually
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
return self.url_result(bc_url, BrightcoveIE.ie_key())

View File

@ -0,0 +1,58 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import unified_strdate
class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESC = 'El País'
_TEST = {
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55',
'info_dict': {
'id': 'tiempo-nuevo-recetas-viejas',
'ext': 'mp4',
'title': 'Tiempo nuevo, recetas viejas',
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex(
r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex(
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
fatal=False)
thumbnail = (
None if thumbnail_suffix is None
else prefix + thumbnail_suffix)
title = self._html_search_regex(
'<h2 class="entry-header entry-title.*?>(.*?)</h2>',
webpage, 'title')
date_str = self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
webpage, 'upload date', fatal=False)
upload_date = (None if date_str is None else unified_strdate(date_str))
return {
'id': video_id,
'url': video_url,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': thumbnail,
'upload_date': upload_date,
}

View File

@ -0,0 +1,43 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .fivemin import FiveMinIE
from ..utils import (
url_basename,
)
class EngadgetIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://www.engadget.com/
(?:video/5min/(?P<id>\d+)|
[\d/]+/.*?)
'''
_TEST = {
'url': 'http://www.engadget.com/video/5min/518153925/',
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
'info_dict': {
'id': '518153925',
'ext': 'mp4',
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
},
'add_ie': ['FiveMin'],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if video_id is not None:
return FiveMinIE._build_result(video_id)
else:
title = url_basename(url)
webpage = self._download_webpage(url, title)
ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
return {
'_type': 'playlist',
'title': title,
'entries': [FiveMinIE._build_result(id) for id in ids]
}

Some files were not shown because too many files have changed in this diff Show More