Compare commits

..

220 Commits

Author SHA1 Message Date
59282080c8 release 2013.09.06.1 2013-09-06 10:53:35 +02:00
98f3da4040 Merge remote-tracking branch 'origin/master' 2013-09-06 10:53:24 +02:00
1d213233cd Do not re-download files for hashsum generation (Fixes #1383) 2013-09-06 10:51:53 +02:00
fd9cf73836 [youtube] Users: download from the api in json to simplify extraction (fixes #1358)
There could be duplicate videos or other videos if the description have links.
2013-09-06 10:43:02 +02:00
0638ad9999 [youtube] Fix detection of tags from HLS videos. 2013-09-06 10:25:31 +02:00
1eb527692a release 2013.09.06 2013-09-06 10:13:33 +02:00
09bb17e108 Merge pull request #1378 from patrickslin/patch-6
Vevo sig changed again, please update for us! Thanks very much! (fixes #...
2013-09-06 09:53:23 +02:00
1cf911bc82 Vevo sig changed again, please update for us! Thanks very much! (fixes #1375) 2013-09-05 17:38:03 -07:00
f4b052321b [youtube] Urls like youtube.com/NASA are now interpreted as users (fixes #1069)
Video urls like http://youtube.com/BaW_jenozKc are not valid, but http://youtu.be/BaW_jenozKc is correct.
2013-09-05 22:39:15 +02:00
a636203ea5 release 2013.09.05 2013-09-05 22:30:50 +02:00
c215217e39 [youtube] Playlists: extract the videos id from ['media$group']['yt$videoid'] (fixes #1374)
'media$player' is not defined for private videos.
2013-09-05 21:40:04 +02:00
08e291b54d [generic] Recognize html5 video in the format '<video src=".+?"' and only unquote the url when extracting the id (fixes #1372) 2013-09-05 18:02:17 +02:00
6b95b065be Add extractor for tvcast.naver.com (closes #1331) 2013-09-05 10:53:40 +02:00
9363169b67 [daum] Get the video page from a canonical url to extract the full id (fixes #1373) and extract description. 2013-09-05 10:08:17 +02:00
085bea4513 Credit @Huarong for tv.sohu.com 2013-09-04 22:09:22 +02:00
150f20828b Add extractor for daum.net (closes #1330) 2013-09-04 22:06:50 +02:00
08523ee20a release 2013.09.04 2013-09-04 14:33:32 +02:00
5d5171d26a Merge pull request #1341 from xanadu/master
add support for "-f mp4" for YouTube
2013-09-03 18:52:12 -07:00
96fb5605b2 AHLS -> Apple HTTP Live Streaming 2013-09-03 18:49:35 -07:00
7011de0bc2 Merge pull request #1363 from Rudloff/defense
defense.gouv.fr
2013-09-03 18:23:08 -07:00
c3dd69eab4 Merge remote-tracking branch 'upstream/master' 2013-09-03 12:22:29 -07:00
025171c476 Suggested by @phihag 2013-09-03 12:03:19 +02:00
c8dbccde30 [orf] Remove the test video, they seem to expire in one week 2013-09-03 11:51:01 +02:00
4ff7a0f1f6 [dailymotion] improve the regex for extracting the video info 2013-09-03 11:33:59 +02:00
9c2ade40de [vimeo] Handle Assertions Error when trying to get the description
In some pages the html tags are not closed, python 2.6 cannot handle it.
2013-09-03 11:11:36 +02:00
aa32314d09 [vimeo] add support for videos that embed the download url in the player page (fixes #1364) 2013-09-03 10:48:56 +02:00
52afe99665 Extractor for defense.gouv.fr 2013-09-03 01:51:17 +02:00
b0446d6a33 Merge remote-tracking branch 'upstream/master' 2013-09-03 01:27:49 +02:00
8e4e89f1c2 Add an extractor for VeeHD (closes #1359) 2013-09-02 11:54:09 +02:00
6c758d79de [metacafe] Add more cases for detecting the uploader detection (reported in #1343) 2013-08-31 22:35:39 +02:00
691008087b Add an automatic page generator for the supported sites (related #156)
They are listed in the "supportedsites.html" page.
2013-08-31 15:18:52 +02:00
85f03346eb Merge remote-tracking branch 'upstream/master' 2013-08-30 17:51:59 -07:00
bdc6b3fc64 add support for "-f mp4" for YouTube 2013-08-30 17:51:50 -07:00
847f582290 Merge remote-tracking branch 'upstream/master' 2013-08-31 00:37:29 +02:00
10f5c016ec release 2013.08.30 2013-08-30 21:02:07 +02:00
2e756879f1 [youtube] update algo for length 86 2013-08-30 20:49:51 +02:00
c7a7750d3b [youtube] Fix typo in the _VALID_URL for YoutubeFavouritesIE, it was intended to also match :ytfavourites 2013-08-30 20:13:05 +02:00
9193c1eede Add youtube keywords to the bash completion script 2013-08-30 20:11:53 +02:00
3243d0f7b6 release 2013.08.29 2013-08-29 23:29:34 +02:00
23b00bc0e4 [youtube] update algo for length 84
Only appears sometimes, nearly identical to length 86.
2013-08-29 22:44:29 +02:00
52e1eea18b [youtube] update algo for length 86 (fixes #1349) 2013-08-29 22:33:58 +02:00
ee80d66727 [ign] update 1up extractor to work with the updated IGNIE 2013-08-29 21:51:09 +02:00
f1fb2d12b3 [ign] extract videos from articles pages 2013-08-29 21:39:36 +02:00
deb2c73212 Merge pull request #1347 from whydoubt/fix_orf_at
Fix orf.at extractor by adding file coding mark
2013-08-29 11:05:38 -07:00
8928491074 Fix orf.at extractor by adding file coding mark 2013-08-29 12:51:38 -05:00
545434670b Add an extractor for orf.at (closes #1346)
Make find_xpath_attr also accept numbers in the value
2013-08-29 19:16:07 +02:00
54fda45bac Merge pull request #1342 from whydoubt/fix_mit_26
Fix MIT extractor for Python 2.6
2013-08-29 13:42:08 +02:00
c7bf7366bc Update descriptions checksum for some test for Unistra and Youtube 2013-08-29 13:41:59 +02:00
b7052e5087 Also print the field that fails if it is a md5 checksum 2013-08-29 12:15:45 +02:00
0d75ae2ce3 Fix detection of the webpage charset if it's declared using ' instead of "
Like in "<meta charset='utf-8'/>"
2013-08-29 11:35:15 +02:00
b5ba7b9dcf Fix MIT extractor for Python 2.6
The HTML for the MIT page does not parse cleanly for Python 2.6 due
to script tags within an actual script element.  The offending piece
is inside a comment block, so removing all such comment blocks
fixes the parsing.
2013-08-28 14:24:42 -05:00
483e0ddd4d Merge remote-tracking branch 'upstream/master' 2013-08-28 10:19:28 -07:00
2891932bf0 release 2013.08.28.1 2013-08-28 19:00:17 +02:00
591078babf Merge remote-tracking branch 'upstream/master' 2013-08-28 09:57:28 -07:00
9868c781a1 Merge remote-tracking branch 'origin/master' 2013-08-28 18:22:33 +02:00
c257baff85 Merge remote-tracking branch 'rzhxeo/youporn-hd'
Conflicts:
	youtube_dl/utils.py
2013-08-28 18:22:28 +02:00
878e83c5a4 YoupornIE: Clean up extraction of hd video 2013-08-28 16:04:48 +02:00
0012690aae Let aes_decrypt_text return bytes instead of unicode 2013-08-28 16:03:35 +02:00
6e74bc41ca Fix division bug in aes.py 2013-08-28 16:01:43 +02:00
cba892fa1f Add intlist_to_bytes to utils.py 2013-08-28 15:59:07 +02:00
550bfd4cbd Merge pull request #1 from phihag/youporn-hd-pr
Allow changes to run under Python 3
2013-08-28 06:23:33 -07:00
920ef0779b Hide the password and username in verbose mode (closes #1089) 2013-08-28 15:14:02 +02:00
48ea9cea77 Allow changes to run under Python 3 2013-08-28 14:34:49 +02:00
ccf4b799df Merge remote-tracking branch 'origin/master' 2013-08-28 14:02:40 +02:00
f143d86ad2 [sohu] Handle encoding, and fix tests 2013-08-28 14:00:05 +02:00
8ae97d76ee PostProcessingError holds the message in the 'msg' property, not in 'message' (fixes #1323)
Causes DeprecationWarning: http://www.python.org/dev/peps/pep-0352/
2013-08-28 13:37:31 +02:00
f8b362739e Merge remote-tracking branch 'Huarong/master' 2013-08-28 13:10:59 +02:00
6d69d03bac Merge remote-tracking branch 'origin/reuse_ies' 2013-08-28 13:05:21 +02:00
204da0d3e3 Merge remote-tracking branch 'origin/master' 2013-08-28 12:57:44 +02:00
c496ca96e7 Fix platform name in Python 2 with --verbose (Closes #1228) 2013-08-28 12:57:10 +02:00
67b22dd036 Add extractors for video.mit.edu and techtv.mit.edu (closes #1327)
video.mit.edu just embeds the videos from techtv.mit.edu
2013-08-28 12:55:42 +02:00
ce6a696e4d Remove unused imports 2013-08-28 12:47:38 +02:00
a5caba1eb0 [generic] simply use urljoin 2013-08-28 12:47:27 +02:00
cd9c100963 Merge remote-tracking branch 'upstream/master' 2013-08-28 12:20:12 +02:00
edde6c56ac Print playpath with --get-url (Fixes #1334) 2013-08-28 12:14:45 +02:00
b7f89fe692 Merge remote-tracking branch 'upstream/master' 2013-08-28 12:10:34 +02:00
ae3531adf9 [generic] Fix URL concatenation
When the url is something like http://example.org/foo/bar?x=y  and the added is file/video.mp4 , we want http://example.org/foo/file/video.mp4
Fixes #1268.
2013-08-28 12:08:17 +02:00
8cf5ee7831 Merge branch 'master' of github.com:rg3/youtube-dl 2013-08-28 11:57:18 +02:00
aa3e950764 Tolerate junk at the end of gzip-compressed content (#1268) 2013-08-28 11:57:13 +02:00
1301a0dd42 Merge remote-tracking branch 'upstream/master' 2013-08-28 11:02:12 +02:00
af8bd6a82d Show the time taken to download in the same format as the ETA 2013-08-28 10:56:11 +02:00
6d38616e67 Merge pull request #1181 from h3xx/master
Add some verbosity when reporting finished downloads

Remove the mixed use of tabs and spaces for indentation.
2013-08-28 10:54:07 +02:00
4f5f18acb9 [addanime] add file 2013-08-28 10:28:16 +02:00
3e223834d9 [youtube] update algo for length 88, thanks to @Ramhack (fixes #1328) 2013-08-28 10:26:44 +02:00
a1bb0f8773 [cnn] remove debug print call. 2013-08-28 10:20:37 +02:00
0e283428f7 HTTPError is in urllib.error in Python 3, not in http.error 2013-08-28 10:18:39 +02:00
2eabb80254 [addanime] improve 2013-08-28 04:25:38 +02:00
44586389e4 [appletrailers] Add support 2013-08-28 02:18:44 +02:00
273f603efb [cnn] Allow more URLs 2013-08-28 00:14:19 +02:00
1619e22f40 release 2013.08.28 2013-08-27 23:31:36 +02:00
88a79ce6a6 Delete default user agent (Fixes #1309) 2013-08-27 23:31:24 +02:00
acebc9cd6b Revert "Install our own HTTPS handler as well (#1309)"
This reverts commit 36399e8576 and fixes #1322.
2013-08-27 23:28:20 +02:00
443c12a703 Merge pull request #1324 from whydoubt/fix_gplus
Initial slash in Google+ photos link was removed
2013-08-27 13:36:39 -07:00
7f3c4f4f65 Initial slash in Google+ photos link was removed 2013-08-27 14:38:50 -05:00
0bc56fa66a Add an extractor for NBC news (closes #1320) 2013-08-27 12:38:57 +02:00
1a582dd49d Add an extractor for CNN (closes #1318) 2013-08-27 11:56:48 +02:00
c5b921b597 Merge remote-tracking branch 'upstream/master' 2013-08-27 10:47:47 +02:00
e86ea47c02 [canalc2] Small improvements 2013-08-27 10:35:20 +02:00
aa5a63a5b5 Merge remote-tracking branch 'Rudloff/canalc2' 2013-08-27 10:31:46 +02:00
2a7b4da9b2 [hark] get the song info in JSON and extract more information. 2013-08-27 10:25:38 +02:00
069d098f84 [canalplus] Accept player.canalplus.fr urls 2013-08-27 10:21:57 +02:00
b3889f7023 release 2013.08.27 2013-08-27 02:30:47 +02:00
65883c8dbd Merge branch 'master' of github.com:rg3/youtube-dl 2013-08-27 02:00:23 +02:00
341ca8d74c [trilulilu] Add support for trilulilu.ro
Fun fact: The ads (not yet supported) are loaded from youtube ;)
2013-08-27 01:59:00 +02:00
99859d436c Merge remote-tracking branch 'upstream/master' 2013-08-26 15:16:13 -07:00
1b01e2b085 Merge pull request #1315 from yasoob/master
fixed tests for c56 and dailymotion
2013-08-26 13:38:48 -07:00
976fc7d137 fixed tests for c56 and dailymotion 2013-08-27 01:00:17 +05:00
c3b7b29c23 Merge remote-tracking branch 'origin/master' 2013-08-26 21:29:44 +02:00
627a91a9a8 [generic] small typo 2013-08-26 21:29:31 +02:00
6dc6302599 Merge pull request #1231 from yasoob/master
Added an IE for hark.com
2013-08-26 12:29:04 -07:00
7a20e2e1f8 Merge remote-tracking branch 'upstream/master' 2013-08-26 03:16:42 +02:00
90648143c3 Merge pull request #1310 from rzhxeo/rtlnow
Add support for http://superrtlnow.de
2013-08-25 15:45:22 -07:00
5c6658d4dd Merge remote-tracking branch 'upstream/master' 2013-08-24 23:01:39 +02:00
9585f890f8 [generic] add support for relative URLs (Fixes #1308) 2013-08-24 22:56:37 +02:00
0838239e8e [generic] Support double slash URLs (Fixes #1309) 2013-08-24 22:52:45 +02:00
36399e8576 Install our own HTTPS handler as well (#1309) 2013-08-24 22:49:22 +02:00
9460db832c [ro220] Add support for 220.ro 2013-08-24 21:10:03 +02:00
d68730a56e Add SUPER RTL NOW to RTLnow extractor 2013-08-24 13:22:28 +02:00
f2aeefe29c [youtube] update algo for length 84 2013-08-24 10:48:12 +02:00
39c6f507df Merge remote-tracking branch 'upstream/master' 2013-08-23 15:33:36 -07:00
d2d1eb5b0a Switch to domain yt-dl.org 2013-08-23 23:57:23 +02:00
8ae7be3ef4 release 2013.08.23 2013-08-23 23:09:53 +02:00
306170518f [youtube] update algo for length 86 (fixes #1302) 2013-08-23 22:36:59 +02:00
aa6a10c44a Allow to specify multiple subtitles languages separated by commas (closes #518) 2013-08-23 18:34:57 +02:00
9af73dc4fc Print a message before embedding the subtitles 2013-08-23 18:17:43 +02:00
fc483bb6af [xhamster] use determine_ext 2013-08-23 17:23:34 +02:00
53b0f3e4e2 Merge pull request #1301 from rzhxeo/xhamster
XHamsterIE: Fix video extension and add video description
2013-08-23 17:21:30 +02:00
4353cf51a0 XHamsterIE: Add video description 2013-08-23 16:40:20 +02:00
ce34e9ce5e XHamsterIE: Fix video extension
Cut off GET parameter
2013-08-23 16:33:41 +02:00
d4051a8e05 Add a post processor for embedding subtitles in mp4 videos (closes #1052) 2013-08-23 15:06:19 +02:00
df3df7fb64 [youtube] Fix download of subtitles with '--all-subs'
If _extract_subtitles is called the option 'write subtitles' is always true.
2013-08-23 13:14:22 +02:00
9e9c164052 Merge pull request #937 from jaimeMF/subtitles_rework
Subtitles rework
2013-08-23 02:40:25 -07:00
066090dd3f [youtube] add algo for length 80 and update player info 2013-08-23 11:33:56 +02:00
614d9c19c1 Merge remote-tracking branch 'upstream/master' 2013-08-22 17:02:41 -07:00
74e6672beb Merge pull request #1297 from iemejia/master
[subtitles] separated subtitle options in their own group
2013-08-22 16:30:14 -07:00
02bcf0d389 release 2013.08.22 2013-08-22 23:29:42 +02:00
10204dc898 [videofyme] Add an additional quality (they change between downloads of the info) and update md5 sum of the test video 2013-08-22 23:23:52 +02:00
1865ed31b9 [subtitles] separated subtitle options in their own group 2013-08-22 22:44:04 +02:00
3669cdba10 [youtube] update algo for length 82 (fixes #1296) 2013-08-22 22:35:15 +02:00
939fbd26ac [youtube] fix the order of DASH formats 2013-08-22 19:45:24 +02:00
b4e60dac23 Merge remote-tracking branch 'upstream/master' 2013-08-22 10:43:51 -07:00
e6ddb4e7af Merge pull request #1279 from xanadu/master
Add YouTube DASH formats to YouTubeIE
2013-08-22 19:33:34 +02:00
83390b83d9 Merge pull request #1266 from MiLk/py-generator
Update the youtube algorithm generator
2013-08-22 10:18:58 -07:00
ff2424595a lxml is not part of the standard library. 2013-08-22 14:47:51 +02:00
adeb9c73d6 Merge remote-tracking branch 'upstream/master' 2013-08-22 14:04:30 +02:00
cd0abcc0bb Extractor for canalc2.tv 2013-08-22 13:54:23 +02:00
4a55479fa9 Credit Pierre Rudloff for JeuxVideoIE and UnistraIE 2013-08-22 13:21:32 +02:00
f527115b5f Rename utv.py to unistra.py and extract more info
There are other sites that could be named utv, which would conflict if they are added
2013-08-22 13:19:35 +02:00
75e1b46add Download from utv.unistra.fr (PR #1271)
Squashed to a single commit to keep the file 'youtube-dl' unchanged and remove the revert commit.
2013-08-22 12:58:12 +02:00
05a2926c5c Merge remote-tracking branch 'upstream/master' 2013-08-22 12:55:58 +02:00
7070b83687 Merge remote-tracking branch 'upstream/master' 2013-08-22 12:54:17 +02:00
8d212e604a Merge remote-tracking branch 'upstream/master'
Conflicts:
	youtube_dl/extractor/jeuxvideo.py
2013-08-22 12:52:05 +02:00
063fcc9676 [jeuxvideo] Extract more information and add test 2013-08-22 12:37:34 +02:00
8403612258 Merge pull request #1267 from Rudloff/master
Download videos from jeuxvideo.com

Edited to keep the file 'youtube-dl' unchanged.
2013-08-22 12:25:21 +02:00
25b51c7816 Download videos from jeuxvideo.com 2013-08-22 12:12:34 +02:00
9779b63bb6 Add an extractor for PBS (closes #870 and #873) 2013-08-22 11:57:21 +02:00
d81aef3adf Add an extractor for tv.slashdot.org (closes #1192)
It uses the ooyala platform, so it just extracts the ooyala url.
2013-08-21 21:51:58 +02:00
5af7e056a7 Merge remote-tracking branch 'upstream/master' 2013-08-21 10:53:42 -07:00
45ed795cb0 [youtube] update uploader name for a test video: 'IconaPop' has changed to 'Icona Pop' 2013-08-21 19:28:48 +02:00
683e98a8a4 [statigram] change test video
The old one cannot be accessed.
2013-08-21 19:20:27 +02:00
e0cfeb2ea7 [funnyordie] fix extraction of video url and title 2013-08-21 18:58:25 +02:00
75340ee383 [vevo] Fix urls with a query (#1258) 2013-08-21 18:20:03 +02:00
668de34c6b [soundcloud] Support widget urls (fixes #1252) 2013-08-21 17:06:37 +02:00
a91b954bb4 [vimeo] extract information for Vimeo Pro videos from http://player.vimeo.com/video/{video_id} (fixes #1197)
For some videos https://vimeo.com/{video_id} doesn't work
2013-08-21 13:48:19 +02:00
a3f62b8255 Merge remote-tracking branch 'upstream/master' 2013-08-21 00:07:03 -07:00
37b6d5f684 fix hls test 2013-08-20 23:51:05 -07:00
b7a6838407 address review comment 2013-08-20 21:57:32 -07:00
cde846b3d3 fix code style 2013-08-20 21:42:49 -07:00
6c3e6e88d3 Allow hours in ETA display (Fixes #1280) 2013-08-21 05:44:19 +02:00
739674cd77 [rtlnow] Add support for error message for queries from outside of Germany 2013-08-21 05:24:58 +02:00
4b2d7cae11 release 2013.08.21 2013-08-21 04:33:57 +02:00
7fea7156cb [generic] support HTML5 video 2013-08-21 04:32:22 +02:00
3093468977 [generic] Ignore stupid HTTP servers (#1284) 2013-08-21 04:32:07 +02:00
79cb25776f Cache suitable regular expressions
This speeds up TestAllURLsMatching.test_no_duplicates by about 8000% at the cost of minimal memory overhead.
2013-08-21 04:06:48 +02:00
87f78946a5 [collegehumor] Allow old-style videos (Fixes #1285) 2013-08-21 03:50:56 +02:00
211fbc1328 fix failed tests 2013-08-19 18:57:55 -07:00
836a086ce9 Add YouTube DASH formats to YouTubeIE 2013-08-19 18:22:25 -07:00
90d3989b99 Merge remote-tracking branch 'upstream/master' 2013-08-19 17:11:52 -07:00
d741e55a42 [youtube] Support watch_popup URLs (Fixes #1275) 2013-08-19 10:27:42 +02:00
17d3aaaf16 Merge pull request #1273 from rzhxeo/rtlnow
Add support for http://voxnow.de
2013-08-19 00:19:06 -07:00
ea55b2a4ca Add VOXnow to RTLnow extractor 2013-08-19 08:57:36 +02:00
3f0537dd4a Merge remote-tracking branch 'rzhxeo/rtlnow' 2013-08-19 00:25:34 +02:00
943f7f7a39 Download videos from jeuxvideo.com 2013-08-18 16:11:47 +02:00
12e895fc5a Merge branch 'master' into py-generator 2013-08-18 11:12:38 +02:00
bda2c49d75 Update algo - see #1254
Signed-off-by: Emilien Kenler <hello@emilienkenler.com>
2013-08-18 11:10:39 +02:00
01b32990da Add RTLnow extractor 2013-08-18 08:16:53 +02:00
dbda1b5147 Add RTLnow extractor
Supports http://rtl2now.rtl2.de and http://rtl-now.rtl.de
2013-08-18 08:15:18 +02:00
ddf3bd328b release 2013.08.17 2013-08-17 08:33:36 +02:00
b9c37b92cf Merge pull request #1256 from patrickslin/patch-5
Length 85 changed again? (fixes #1254)
2013-08-16 14:07:49 -07:00
5a27ecdd2e Update AddAnime.py 2013-08-16 23:54:09 +03:00
f9c3c90ca8 Length 85 changed again? (fixes #1254) 2013-08-16 08:54:01 -07:00
6daccbe317 release 2013.08.15 2013-08-15 22:40:00 +02:00
71ea844c0e Merge pull request #1248 from patrickslin/patch-4
Unable to Download Video (fixes #1247)
2013-08-15 13:38:32 -07:00
3a7256697e Unable to Download Video (fixes #1247) 2013-08-15 13:00:20 -07:00
d1ba998274 release 2013.08.14 2013-08-14 10:19:53 +02:00
718ced8d8c Merge pull request #1239 from patrickslin/patch-3
Updated Vevo Signature Length (fixes #1237)
2013-08-14 01:18:58 -07:00
e1842025d0 Updated Vevo Signature Length (fixes #1237) 2013-08-13 17:57:35 -07:00
2b9213cdc1 Update generator
Signed-off-by: Emilien Kenler <hello@emilienkenler.com>
2013-08-12 10:48:40 +02:00
e3a88568b0 Added an IE for hark.com 2013-08-11 22:23:05 +05:00
0577177e3e [vevo] fix testcase 2013-08-11 07:12:38 +02:00
298f833b16 Note update possibility on errors (thanks @chbrown, #1229) 2013-08-11 06:46:24 +02:00
97b3656c2e YoupornIE: Add support for hd videos and update Test 2013-08-09 18:37:33 +02:00
f3bcebb1d2 add an aes implementation 2013-08-09 18:36:01 +02:00
0f399e6e5e release 2013.08.09 2013-08-09 15:49:09 +02:00
5b075e27cb Merge pull request #1218 from patrickslin/patch-2
New sig len 89 algo
2013-08-09 03:42:13 -07:00
8a9d86a2a7 New sig len 89 algo
Fixes new YT encrypted sig len 89.
2013-08-08 21:48:12 -07:00
d5b00ee6e0 improve sohu extractor 2013-08-06 10:26:57 +08:00
461cead4f7 changes 2013-08-06 04:34:24 +03:00
b5a6d40818 fix parse title bug 2013-08-05 22:51:54 +08:00
968b5e0112 Add some verbosity when reporting finished downloads
For example:

    [download] Resuming download at byte 1868140
    [download] Destination: Entry #1-Bn59FJ4HrmU.flv
    [download] 100% of 3.27MiB in 4s

This format is meant to somewhat mirror the behavior of wget(1) when reporting finished downloads:

    100%[==================>] 54,836,682   788KB/s   in 74s

    2013-08-04 12:32:05 (728 KB/s) - 'google-chrome-stable_current_x86_64.rpm' saved [54836682/54836682]
2013-08-04 12:45:24 -05:00
4ec929dc9b use ..utils/clean_html() 2013-08-03 10:29:58 +08:00
6624a2b07d add an extractor for tv.sohu.com 2013-08-02 17:58:46 +08:00
6d3a7d03e1 fix bug: kankan extractor not support http://vod.kankan.com/v/70/70309.shtml 2013-08-02 15:26:11 +08:00
95fdc7d69c Merge remote-tracking branch 'upstream/master' 2013-08-01 10:57:12 -07:00
6804038d06 Don't try to write the subtitles if it's None 2013-07-20 12:59:47 +02:00
2f799533ae YoutubeIE: don't crash when trying to get automatic captions if the videos has standard subtitles. 2013-07-20 12:56:10 +02:00
88ae5991cd YoutubeIE: use the same function for getting the subtitles for the "--write-sub" and "--all-sub" options 2013-07-20 12:56:06 +02:00
5d51a883c2 Use a dictionary for storing the subtitles
The errors while getting the subtitles are reported as warnings, if no subtitles are found return and empty dict.
2013-07-20 12:52:25 +02:00
c4a91be726 Save subtitles using the same code for all the options 2013-07-20 12:52:24 +02:00
56c7366547 YoutubeIE: reuse instances of InfoExtractors (closes #998)
When a IE is added to the list, it's also added to a dictionary. When a IE is requested it first looks in the dictionary and if there's no instance it will create a new one.

That way _real_initialize is only called once for each IE, saving time if it needs to login for example.
2013-07-08 15:14:27 +02:00
58 changed files with 2410 additions and 324 deletions

View File

@ -113,25 +113,28 @@ which means you can modify it, redistribute it or use it however you like.
## Video Format Options: ## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of -f, --format FORMAT video format code, specifiy the order of
preference using slashes: "-f 22/17/18" preference using slashes: "-f 22/17/18". "-f mp4"
and "-f flv" are also supported
--all-formats download all available video formats --all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific one --prefer-free-formats prefer free video formats unless a specific one
is requested is requested
--max-quality FORMAT highest quality format to download --max-quality FORMAT highest quality format to download
-F, --list-formats list all available formats (currently youtube -F, --list-formats list all available formats (currently youtube
only) only)
## Subtitle Options:
--write-sub write subtitle file (currently youtube only) --write-sub write subtitle file (currently youtube only)
--write-auto-sub write automatic subtitle file (currently youtube --write-auto-sub write automatic subtitle file (currently youtube
only) only)
--only-sub [deprecated] alias of --skip-download --only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the --all-subs downloads all the available subtitles of the
video (currently youtube only) video
--list-subs lists all available subtitles for the video --list-subs lists all available subtitles for the video
(currently youtube only) --sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube
--sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) only)
(currently youtube only) --sub-lang LANGS languages of the subtitles to download (optional)
--sub-lang LANG language of the subtitles to download (optional) separated by commas, use IETF language tags like
use IETF language tags like 'en' 'en,pt'
## Authentication Options: ## Authentication Options:
-u, --username USERNAME account username -u, --username USERNAME account username
@ -153,6 +156,8 @@ which means you can modify it, redistribute it or use it however you like.
processing; the video is erased by default processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post- --no-post-overwrites do not overwrite post-processed files; the post-
processed files are overwritten by default processed files are overwritten by default
--embed-subs embed subtitles in the video (only for mp4
videos)
# CONFIGURATION # CONFIGURATION

View File

@ -4,8 +4,12 @@ __youtube-dl()
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
opts="{{flags}}" opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
if [[ ${cur} == * ]] ; then if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
return 0
elif [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
return 0 return 0
fi fi

View File

@ -19,15 +19,22 @@ if 'signature' in versions_info:
new_version = {} new_version = {}
filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version} filenames = {
'bin': 'youtube-dl',
'exe': 'youtube-dl.exe',
'tar': 'youtube-dl-%s.tar.gz' % version}
build_dir = os.path.join('..', '..', 'build', version)
for key, filename in filenames.items(): for key, filename in filenames.items():
print('Downloading and checksumming %s...' %filename) fn = os.path.join(build_dir, filename)
url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename) with open(fn, 'rb') as f:
data = urllib.request.urlopen(url).read() data = f.read()
if not data:
raise ValueError('File %s is empty!' % fn)
sha256sum = hashlib.sha256(data).hexdigest() sha256sum = hashlib.sha256(data).hexdigest()
new_version[key] = (url, sha256sum) new_version[key] = (url, sha256sum)
versions_info['versions'][version] = new_version versions_info['versions'][version] = new_version
versions_info['latest'] = version versions_info['latest'] = version
json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True) with open('update/versions.json', 'w') as jsonf:
json.dump(versions_info, jsonf, indent=4, sort_keys=True)

View File

@ -22,7 +22,7 @@ entry_template=textwrap.dedent("""
<atom:link href="http://rg3.github.io/youtube-dl" /> <atom:link href="http://rg3.github.io/youtube-dl" />
<atom:content type="xhtml"> <atom:content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml"> <div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a> Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
</div> </div>
</atom:content> </atom:content>
<atom:author> <atom:author>
@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
with open('update/releases.atom','w',encoding='utf-8') as atom_file: with open('update/releases.atom','w',encoding='utf-8') as atom_file:
atom_file.write(atom_template) atom_file.write(atom_template)

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python3
import sys
import os
import textwrap
# We must be able to import youtube_dl
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import youtube_dl
def main():
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
template = tmplf.read()
ie_htmls = []
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
try:
ie_html += ': {}'.format(ie.IE_DESC)
except AttributeError:
pass
if ie.working() == False:
ie_html += ' (Currently broken)'
ie_htmls.append('<li>{}</li>'.format(ie_html))
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
sitesf.write(template)
if __name__ == '__main__':
main()

View File

@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe git checkout HEAD -- youtube-dl youtube-dl.exe
/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..." /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
@ -85,6 +85,7 @@ ROOT=$(pwd)
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
"$ROOT/devscripts/gh-pages/generate-download.py" "$ROOT/devscripts/gh-pages/generate-download.py"
"$ROOT/devscripts/gh-pages/update-copyright.py" "$ROOT/devscripts/gh-pages/update-copyright.py"
"$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update git add *.html *.html.in update
git commit -m "release $version" git commit -m "release $version"
git show HEAD git show HEAD

View File

@ -11,30 +11,36 @@ tests = [
# 90 # 90
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
# 88 # 89
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
# 88 - vflapUV9V 2013/08/28
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
# 87 - vflART1Nf 2013/07/24 # 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"), "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
# 86 - vflm_D8eE 2013/07/31 # 86 - vfluy6kdb 2013/09/06
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre"), "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
# 85 - vflSAFCP9 2013/07/19 # 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
# 84 # 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
# 83 - vflTWC9KW 2013/08/01 # 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
"qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"), ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
# 82 # 82 - vflZK4ZYR 2013/08/23
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
# 81 - vflLC8JvQ 2013/07/25 # 81 - vflLC8JvQ 2013/07/25
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
# 80 - vflZK4ZYR 2013/08/23 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
# 79 - vflLC8JvQ 2013/07/25 (sporadic) # 79 - vflLC8JvQ 2013/07/25 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),

View File

@ -11,6 +11,15 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
from helper import get_testcases from helper import get_testcases
class TestAllURLsMatching(unittest.TestCase): class TestAllURLsMatching(unittest.TestCase):
def setUp(self):
self.ies = gen_extractors()
def matching_ies(self, url):
return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
def assertMatch(self, url, ie_list):
self.assertEqual(self.matching_ies(url), ie_list)
def test_youtube_playlist_matching(self): def test_youtube_playlist_matching(self):
self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585 self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
@ -24,12 +33,17 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_matching(self): def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
def test_youtube_channel_matching(self): def test_youtube_channel_matching(self):
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM')) self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')) self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')) self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
def test_youtube_user_matching(self):
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
def test_justin_tv_channelid_matching(self): def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
@ -50,6 +64,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
def test_no_duplicates(self): def test_no_duplicates(self):
ies = gen_extractors() ies = gen_extractors()
@ -62,15 +77,12 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
def test_keywords(self): def test_keywords(self):
ies = gen_extractors() self.assertMatch(':ytsubs', ['youtube:subscriptions'])
matching_ies = lambda url: [ie.IE_NAME for ie in ies self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
if ie.suitable(url) and ie.IE_NAME != 'generic'] self.assertMatch(':thedailyshow', ['ComedyCentral'])
self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions']) self.assertMatch(':tds', ['ComedyCentral'])
self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions']) self.assertMatch(':colbertreport', ['ComedyCentral'])
self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral']) self.assertMatch(':cr', ['ComedyCentral'])
self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -127,11 +127,10 @@ def generator(test_case):
info_dict = json.load(infof) info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items(): for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'): if isinstance(expected, compat_str) and expected.startswith('md5:'):
self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field))) got = 'md5:' + md5(info_dict.get(info_field))
else: else:
got = info_dict.get(info_field) got = info_dict.get(info_field)
self.assertEqual( self.assertEqual(expected, got,
expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict # If checkable fields are missing from the test case, print the info_dict

View File

@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase):
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_it(self): def test_youtube_subtitles_it(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['subtitleslang'] = 'it' DL.params['subtitleslangs'] = ['it']
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['it']
self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d') self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_onlysubtitles(self): def test_youtube_onlysubtitles(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['onlysubtitles'] = True DL.params['onlysubtitles'] = True
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_allsubtitles(self): def test_youtube_allsubtitles(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['allsubtitles'] = True DL.params['allsubtitles'] = True
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
subtitles = info_dict[0]['subtitles'] subtitles = info_dict[0]['subtitles']
self.assertEqual(len(subtitles), 13) self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self): def test_youtube_subtitles_sbv_format(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'sbv' DL.params['subtitlesformat'] = 'sbv'
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self): def test_youtube_subtitles_vtt_format(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writesubtitles'] = True DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'vtt' DL.params['subtitlesformat'] = 'vtt'
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM') info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self): def test_youtube_list_subtitles(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['listsubtitles'] = True DL.params['listsubtitles'] = True
@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
def test_youtube_automatic_captions(self): def test_youtube_automatic_captions(self):
DL = FakeYDL() DL = FakeYDL()
DL.params['writeautomaticsub'] = True DL.params['writeautomaticsub'] = True
DL.params['subtitleslang'] = 'it' DL.params['subtitleslangs'] = ['it']
IE = YoutubeIE(DL) IE = YoutubeIE(DL)
info_dict = IE.extract('8YoUxe5ncPo') info_dict = IE.extract('8YoUxe5ncPo')
sub = info_dict[0]['subtitles'][0] sub = info_dict[0]['subtitles']['it']
self.assertTrue(sub[2] is not None) self.assertTrue(sub is not None)
def test_youtube_multiple_langs(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
DL.params['subtitleslangs'] = langs
IE = YoutubeIE(DL)
subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -63,6 +63,17 @@ class FileDownloader(object):
converted = float(bytes) / float(1024 ** exponent) converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix) return '%.2f%s' % (converted, suffix)
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, eta_mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod @staticmethod
def calc_percent(byte_counter, data_len): def calc_percent(byte_counter, data_len):
if data_len is None: if data_len is None:
@ -78,10 +89,7 @@ class FileDownloader(object):
return '--:--' return '--:--'
rate = float(current) / dif rate = float(current) / dif
eta = int((float(total) - float(current)) / rate) eta = int((float(total) - float(current)) / rate)
(eta_mins, eta_secs) = divmod(eta, 60) return FileDownloader.format_seconds(eta)
if eta_mins > 99:
return '--:--'
return '%02d:%02d' % (eta_mins, eta_secs)
@staticmethod @staticmethod
def calc_speed(start, now, bytes): def calc_speed(start, now, bytes):
@ -230,12 +238,14 @@ class FileDownloader(object):
"""Report it was impossible to resume download.""" """Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume') self.to_screen(u'[download] Unable to resume')
def report_finish(self): def report_finish(self, data_len_str, tot_time):
"""Report download finished.""" """Report download finished."""
if self.params.get('noprogress', False): if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed') self.to_screen(u'[download] Download completed')
else: else:
self.to_screen(u'') clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename) self.report_destination(filename)
@ -538,7 +548,7 @@ class FileDownloader(object):
self.report_error(u'Did not get any data blocks') self.report_error(u'Did not get any data blocks')
return False return False
stream.close() stream.close()
self.report_finish() self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len: if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len)) raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)

View File

@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
return dict((program, executable(program)) for program in programs) return dict((program, executable(program)) for program in programs)
def run_ffmpeg(self, path, out_path, opts): def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
if not self._exes['ffmpeg'] and not self._exes['avconv']: if not self._exes['ffmpeg'] and not self._exes['avconv']:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
files_cmd = []
for path in input_paths:
files_cmd.extend(['-i', encodeFilename(path)])
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
+ opts + + opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))]) [encodeFilename(self._ffmpeg_filename_argument(out_path))])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate() stdout,stderr = p.communicate()
if p.returncode != 0: if p.returncode != 0:
@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor):
msg = stderr.strip().split('\n')[-1] msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg) raise FFmpegPostProcessorError(msg)
def run_ffmpeg(self, path, out_path, opts):
self.run_ffmpeg_multiple_files([path], out_path, opts)
def _ffmpeg_filename_argument(self, fn): def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'): if fn.startswith(u'-'):
@ -129,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try: try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err: except FFmpegPostProcessorError as err:
raise AudioConversionError(err.message) raise AudioConversionError(err.msg)
def run(self, information): def run(self, information):
path = information['filepath'] path = information['filepath']
@ -199,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
except: except:
etype,e,tb = sys.exc_info() etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError): if isinstance(e, AudioConversionError):
msg = u'audio conversion failed: ' + e.message msg = u'audio conversion failed: ' + e.msg
else: else:
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
raise PostProcessingError(msg) raise PostProcessingError(msg)
@ -232,3 +240,227 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
information['format'] = self._preferedformat information['format'] = self._preferedformat
information['ext'] = self._preferedformat information['ext'] = self._preferedformat
return False,information return False,information
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
_lang_map = {
'aa': 'aar',
'ab': 'abk',
'ae': 'ave',
'af': 'afr',
'ak': 'aka',
'am': 'amh',
'an': 'arg',
'ar': 'ara',
'as': 'asm',
'av': 'ava',
'ay': 'aym',
'az': 'aze',
'ba': 'bak',
'be': 'bel',
'bg': 'bul',
'bh': 'bih',
'bi': 'bis',
'bm': 'bam',
'bn': 'ben',
'bo': 'bod',
'br': 'bre',
'bs': 'bos',
'ca': 'cat',
'ce': 'che',
'ch': 'cha',
'co': 'cos',
'cr': 'cre',
'cs': 'ces',
'cu': 'chu',
'cv': 'chv',
'cy': 'cym',
'da': 'dan',
'de': 'deu',
'dv': 'div',
'dz': 'dzo',
'ee': 'ewe',
'el': 'ell',
'en': 'eng',
'eo': 'epo',
'es': 'spa',
'et': 'est',
'eu': 'eus',
'fa': 'fas',
'ff': 'ful',
'fi': 'fin',
'fj': 'fij',
'fo': 'fao',
'fr': 'fra',
'fy': 'fry',
'ga': 'gle',
'gd': 'gla',
'gl': 'glg',
'gn': 'grn',
'gu': 'guj',
'gv': 'glv',
'ha': 'hau',
'he': 'heb',
'hi': 'hin',
'ho': 'hmo',
'hr': 'hrv',
'ht': 'hat',
'hu': 'hun',
'hy': 'hye',
'hz': 'her',
'ia': 'ina',
'id': 'ind',
'ie': 'ile',
'ig': 'ibo',
'ii': 'iii',
'ik': 'ipk',
'io': 'ido',
'is': 'isl',
'it': 'ita',
'iu': 'iku',
'ja': 'jpn',
'jv': 'jav',
'ka': 'kat',
'kg': 'kon',
'ki': 'kik',
'kj': 'kua',
'kk': 'kaz',
'kl': 'kal',
'km': 'khm',
'kn': 'kan',
'ko': 'kor',
'kr': 'kau',
'ks': 'kas',
'ku': 'kur',
'kv': 'kom',
'kw': 'cor',
'ky': 'kir',
'la': 'lat',
'lb': 'ltz',
'lg': 'lug',
'li': 'lim',
'ln': 'lin',
'lo': 'lao',
'lt': 'lit',
'lu': 'lub',
'lv': 'lav',
'mg': 'mlg',
'mh': 'mah',
'mi': 'mri',
'mk': 'mkd',
'ml': 'mal',
'mn': 'mon',
'mr': 'mar',
'ms': 'msa',
'mt': 'mlt',
'my': 'mya',
'na': 'nau',
'nb': 'nob',
'nd': 'nde',
'ne': 'nep',
'ng': 'ndo',
'nl': 'nld',
'nn': 'nno',
'no': 'nor',
'nr': 'nbl',
'nv': 'nav',
'ny': 'nya',
'oc': 'oci',
'oj': 'oji',
'om': 'orm',
'or': 'ori',
'os': 'oss',
'pa': 'pan',
'pi': 'pli',
'pl': 'pol',
'ps': 'pus',
'pt': 'por',
'qu': 'que',
'rm': 'roh',
'rn': 'run',
'ro': 'ron',
'ru': 'rus',
'rw': 'kin',
'sa': 'san',
'sc': 'srd',
'sd': 'snd',
'se': 'sme',
'sg': 'sag',
'si': 'sin',
'sk': 'slk',
'sl': 'slv',
'sm': 'smo',
'sn': 'sna',
'so': 'som',
'sq': 'sqi',
'sr': 'srp',
'ss': 'ssw',
'st': 'sot',
'su': 'sun',
'sv': 'swe',
'sw': 'swa',
'ta': 'tam',
'te': 'tel',
'tg': 'tgk',
'th': 'tha',
'ti': 'tir',
'tk': 'tuk',
'tl': 'tgl',
'tn': 'tsn',
'to': 'ton',
'tr': 'tur',
'ts': 'tso',
'tt': 'tat',
'tw': 'twi',
'ty': 'tah',
'ug': 'uig',
'uk': 'ukr',
'ur': 'urd',
'uz': 'uzb',
've': 'ven',
'vi': 'vie',
'vo': 'vol',
'wa': 'wln',
'wo': 'wol',
'xh': 'xho',
'yi': 'yid',
'yo': 'yor',
'za': 'zha',
'zh': 'zho',
'zu': 'zul',
}
def __init__(self, downloader=None, subtitlesformat='srt'):
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
self._subformat = subtitlesformat
@classmethod
def _conver_lang_code(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
return cls._lang_map.get(code[:2])
def run(self, information):
if information['ext'] != u'mp4':
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
sub_langs = [key for key in information['subtitles']]
filename = information['filepath']
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
for (i, lang) in enumerate(sub_langs):
opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
lang_code = self._conver_lang_code(lang)
if lang_code is not None:
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
opts.extend(['-f', 'mp4'])
temp_filename = filename + u'.temp'
self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, information

View File

@ -76,7 +76,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video allsubtitles: Downloads all the subtitles of the video
listsubtitles: Lists all available subtitles for the video listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitleslang: Language of the subtitles to download subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range. daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file skip_download: Skip the actual download of the video file
@ -97,6 +97,7 @@ class YoutubeDL(object):
def __init__(self, params): def __init__(self, params):
"""Create a FileDownloader object with the given options.""" """Create a FileDownloader object with the given options."""
self._ies = [] self._ies = []
self._ies_instances = {}
self._pps = [] self._pps = []
self._progress_hooks = [] self._progress_hooks = []
self._download_retcode = 0 self._download_retcode = 0
@ -111,8 +112,21 @@ class YoutubeDL(object):
def add_info_extractor(self, ie): def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list.""" """Add an InfoExtractor object to the end of the list."""
self._ies.append(ie) self._ies.append(ie)
self._ies_instances[ie.ie_key()] = ie
ie.set_downloader(self) ie.set_downloader(self)
def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
the _ies list, if there's no instance it will create a new one and add
it to the extractor list.
"""
ie = self._ies_instances.get(ie_key)
if ie is None:
ie = get_info_extractor(ie_key)()
self.add_info_extractor(ie)
return ie
def add_default_info_extractors(self): def add_default_info_extractors(self):
""" """
Add the InfoExtractors returned by gen_extractors to the end of the list Add the InfoExtractors returned by gen_extractors to the end of the list
@ -294,9 +308,7 @@ class YoutubeDL(object):
''' '''
if ie_key: if ie_key:
ie = get_info_extractor(ie_key)() ies = [self.get_info_extractor(ie_key)]
ie.set_downloader(self)
ies = [ie]
else: else:
ies = self._ies ies = self._ies
@ -448,7 +460,8 @@ class YoutubeDL(object):
if self.params.get('forceid', False): if self.params.get('forceid', False):
compat_print(info_dict['id']) compat_print(info_dict['id'])
if self.params.get('forceurl', False): if self.params.get('forceurl', False):
compat_print(info_dict['url']) # For RTMP URLs, also include the playpath
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
compat_print(info_dict['thumbnail']) compat_print(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and 'description' in info_dict: if self.params.get('forcedescription', False) and 'description' in info_dict:
@ -483,34 +496,21 @@ class YoutubeDL(object):
self.report_error(u'Cannot write description file ' + descfn) self.report_error(u'Cannot write description file ' + descfn)
return return
if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']: subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub'),
self.params.get('allsubtitles', False)])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE # subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE # that way it will silently go on when used with unsupporting IE
subtitle = info_dict['subtitles'][0]
(sub_error, sub_lang, sub) = subtitle
sub_format = self.params.get('subtitlesformat')
if sub_error:
self.report_warning("Some error while getting the subtitles")
else:
try:
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn)
return
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
subtitles = info_dict['subtitles'] subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat') sub_format = self.params.get('subtitlesformat')
for subtitle in subtitles: for sub_lang in subtitles.keys():
(sub_error, sub_lang, sub) = subtitle sub = subtitles[sub_lang]
if sub_error: if sub is None:
self.report_warning("Some error while getting the subtitles") continue
else:
try: try:
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format sub_filename = subtitles_filename(filename, sub_lang, sub_format)
self.report_writesubtitles(sub_filename) self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub) subfile.write(sub)

View File

@ -27,6 +27,8 @@ __authors__ = (
'Johny Mo Swag', 'Johny Mo Swag',
'Axel Noack', 'Axel Noack',
'Albert Kim', 'Albert Kim',
'Pierre Rudloff',
'Huarong Huo',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@ -44,6 +46,7 @@ import sys
import warnings import warnings
import platform import platform
from .utils import * from .utils import *
from .update import update_self from .update import update_self
from .version import __version__ from .version import __version__
@ -82,6 +85,9 @@ def parseOpts(overrideArguments=None):
return "".join(opts) return "".join(opts)
def _comma_separated_values_options_callback(option, opt_str, value, parser):
setattr(parser.values, option.dest, value.split(','))
def _find_term_columns(): def _find_term_columns():
columns = os.environ.get('COLUMNS', None) columns = os.environ.get('COLUMNS', None)
if columns: if columns:
@ -95,6 +101,16 @@ def parseOpts(overrideArguments=None):
pass pass
return None return None
def _hide_login_info(opts):
opts = list(opts)
for private_opt in ['-p', '--password', '-u', '--username']:
try:
i = opts.index(private_opt)
opts[i+1] = '<PRIVATE>'
except ValueError:
pass
return opts
max_width = 80 max_width = 80
max_help_position = 80 max_help_position = 80
@ -119,6 +135,7 @@ def parseOpts(overrideArguments=None):
selection = optparse.OptionGroup(parser, 'Video Selection') selection = optparse.OptionGroup(parser, 'Video Selection')
authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication = optparse.OptionGroup(parser, 'Authentication Options')
video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format = optparse.OptionGroup(parser, 'Video Format Options')
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
downloader = optparse.OptionGroup(parser, 'Download Options') downloader = optparse.OptionGroup(parser, 'Download Options')
postproc = optparse.OptionGroup(parser, 'Post-processing Options') postproc = optparse.OptionGroup(parser, 'Post-processing Options')
filesystem = optparse.OptionGroup(parser, 'Filesystem Options') filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@ -176,7 +193,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format', video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT', action='store', dest='format', metavar='FORMAT',
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"') help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats', video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all') action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats', video_format.add_option('--prefer-free-formats',
@ -185,27 +202,29 @@ def parseOpts(overrideArguments=None):
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
video_format.add_option('-F', '--list-formats', video_format.add_option('-F', '--list-formats',
action='store_true', dest='listformats', help='list all available formats (currently youtube only)') action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
video_format.add_option('--write-sub', '--write-srt',
subtitles.add_option('--write-sub', '--write-srt',
action='store_true', dest='writesubtitles', action='store_true', dest='writesubtitles',
help='write subtitle file (currently youtube only)', default=False) help='write subtitle file (currently youtube only)', default=False)
video_format.add_option('--write-auto-sub', '--write-automatic-sub', subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub', action='store_true', dest='writeautomaticsub',
help='write automatic subtitle file (currently youtube only)', default=False) help='write automatic subtitle file (currently youtube only)', default=False)
video_format.add_option('--only-sub', subtitles.add_option('--only-sub',
action='store_true', dest='skip_download', action='store_true', dest='skip_download',
help='[deprecated] alias of --skip-download', default=False) help='[deprecated] alias of --skip-download', default=False)
video_format.add_option('--all-subs', subtitles.add_option('--all-subs',
action='store_true', dest='allsubtitles', action='store_true', dest='allsubtitles',
help='downloads all the available subtitles of the video (currently youtube only)', default=False) help='downloads all the available subtitles of the video', default=False)
video_format.add_option('--list-subs', subtitles.add_option('--list-subs',
action='store_true', dest='listsubtitles', action='store_true', dest='listsubtitles',
help='lists all available subtitles for the video (currently youtube only)', default=False) help='lists all available subtitles for the video', default=False)
video_format.add_option('--sub-format', subtitles.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', action='store', dest='subtitlesformat', metavar='FORMAT',
help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
video_format.add_option('--sub-lang', '--srt-lang', subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG', action='callback', dest='subtitleslang', metavar='LANGS', type='str',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'') default=[], callback=_comma_separated_values_options_callback,
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
downloader.add_option('-r', '--rate-limit', downloader.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)') dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
@ -320,6 +339,8 @@ def parseOpts(overrideArguments=None):
help='keeps the video file on disk after the post-processing; the video is erased by default') help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
help='do not overwrite post-processed files; the post-processed files are overwritten by default') help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)')
parser.add_option_group(general) parser.add_option_group(general)
@ -328,6 +349,7 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(filesystem) parser.add_option_group(filesystem)
parser.add_option_group(verbosity) parser.add_option_group(verbosity)
parser.add_option_group(video_format) parser.add_option_group(video_format)
parser.add_option_group(subtitles)
parser.add_option_group(authentication) parser.add_option_group(authentication)
parser.add_option_group(postproc) parser.add_option_group(postproc)
@ -347,9 +369,9 @@ def parseOpts(overrideArguments=None):
argv = systemConf + userConf + commandLineConf argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv) opts, args = parser.parse_args(argv)
if opts.verbose: if opts.verbose:
sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n') sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n') sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n') sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args return parser, opts, args
@ -420,6 +442,10 @@ def _real_main(argv=None):
proxy_handler = compat_urllib_request.ProxyHandler(proxies) proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts) https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler()) opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders =[]
compat_urllib_request.install_opener(opener) compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
@ -567,7 +593,7 @@ def _real_main(argv=None):
'allsubtitles': opts.allsubtitles, 'allsubtitles': opts.allsubtitles,
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,
'subtitleslang': opts.subtitleslang, 'subtitleslangs': opts.subtitleslang,
'matchtitle': decodeOption(opts.matchtitle), 'matchtitle': decodeOption(opts.matchtitle),
'rejecttitle': decodeOption(opts.rejecttitle), 'rejecttitle': decodeOption(opts.rejecttitle),
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
@ -597,7 +623,7 @@ def _real_main(argv=None):
sys.exc_clear() sys.exc_clear()
except: except:
pass pass
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n') sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n') sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
ydl.add_default_info_extractors() ydl.add_default_info_extractors()
@ -607,6 +633,8 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo: if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version # Update version
if opts.update_self: if opts.update_self:

202
youtube_dl/aes.py Normal file
View File

@ -0,0 +1,202 @@
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16
def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
returns the next counter block
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
decrypted_data=[]
for i in range(block_count):
counter_block = counter.next_value()
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
block += [0]*(BLOCK_SIZE_BYTES - len(block))
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
decrypted_data += xor(block, cipher_counter_block)
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
def key_expansion(data):
"""
Generate key schedule
@param {int[]} data 16/24/32-Byte cipher key
@returns {int[]} 176/208/240-Byte expanded key
"""
data = data[:] # copy
rcon_iteration = 1
key_size_bytes = len(data)
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
while len(data) < expanded_key_size_bytes:
temp = data[-4:]
temp = key_schedule_core(temp, rcon_iteration)
rcon_iteration += 1
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
for _ in range(3):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
if key_size_bytes == 32:
temp = data[-4:]
temp = sub_bytes(temp)
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
data = data[:expanded_key_size_bytes]
return data
def aes_encrypt(data, expanded_key):
"""
Encrypt one block with aes
@param {int[]} data 16-Byte state
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1):
data = sub_bytes(data)
data = shift_rows(data)
if i != rounds:
data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
return data
def aes_decrypt_text(data, password, key_size_bytes):
"""
Decrypt text
- The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
- The cipher key is retrieved by encrypting the first 16 Byte of 'password'
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
- Mode of operation is 'counter'
@param {str} data Base64 encoded string
@param {str,unicode} password Password (will be encoded with utf-8)
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
@returns {str} Decrypted data
"""
NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data))
password = bytes_to_intlist(password.encode('utf-8'))
key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
nonce = data[:NONCE_LENGTH_BYTES]
cipher = data[NONCE_LENGTH_BYTES:]
class Counter:
__value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
plaintext = intlist_to_bytes(decrypted_data)
return plaintext
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
MIX_COLUMN_MATRIX = ((2,3,1,1),
(1,2,3,1),
(1,1,2,3),
(3,1,1,2))
def sub_bytes(data):
return [SBOX[x] for x in data]
def rotate(data):
return data[1:] + [data[0]]
def key_schedule_core(data, rcon_iteration):
data = rotate(data)
data = sub_bytes(data)
data[0] = data[0] ^ RCON[rcon_iteration]
return data
def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)]
def mix_column(data):
data_mixed = []
for row in range(4):
mixed = 0
for column in range(4):
addend = data[column]
if MIX_COLUMN_MATRIX[row][column] in (2,3):
addend <<= 1
if addend > 0xff:
addend &= 0xff
addend ^= 0x1b
if MIX_COLUMN_MATRIX[row][column] == 3:
addend ^= data[column]
mixed ^= addend & 0xff
data_mixed.append(mixed)
return data_mixed
def mix_columns(data):
data_mixed = []
for i in range(4):
column = data[i*4 : (i+1)*4]
data_mixed += mix_column(column)
return data_mixed
def shift_rows(data):
data_shifted = []
for column in range(4):
for row in range(4):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted
def inc(data):
data = data[:] # copy
for i in range(len(data)-1,-1,-1):
if data[i] == 255:
data[i] = 0
else:
data[i] = data[i] + 1
break
return data

View File

@ -1,3 +1,5 @@
from .appletrailers import AppleTrailersIE
from .addanime import AddAnimeIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .ard import ARDIE from .ard import ARDIE
from .arte import ArteTvIE from .arte import ArteTvIE
@ -6,16 +8,21 @@ from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .c56 import C56IE
from .canalplus import CanalplusIE from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .cspan import CSpanIE from .cspan import CSpanIE
from .dailymotion import DailymotionIE, DailymotionPlaylistIE from .dailymotion import DailymotionIE, DailymotionPlaylistIE
from .daum import DaumIE
from .depositfiles import DepositFilesIE from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE
from .ehow import EHowIE from .ehow import EHowIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
from .escapist import EscapistIE from .escapist import EscapistIE
@ -29,6 +36,7 @@ from .gametrailers import GametrailersIE
from .generic import GenericIE from .generic import GenericIE
from .googleplus import GooglePlusIE from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE from .googlesearch import GoogleSearchIE
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
@ -36,6 +44,7 @@ from .ign import IGNIE, OneUPIE
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE from .jukebox import JukeboxIE
from .justintv import JustinTVIE from .justintv import JustinTVIE
from .kankan import KankanIE from .kankan import KankanIE
@ -43,20 +52,29 @@ from .keek import KeekIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE from .livestream import LivestreamIE
from .metacafe import MetacafeIE from .metacafe import MetacafeIE
from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mtv import MTVIE from .mtv import MTVIE
from .muzu import MuzuTVIE from .muzu import MuzuTVIE
from .myspass import MySpassIE from .myspass import MySpassIE
from .myvideo import MyVideoIE from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE from .nba import NBAIE
from .nbc import NBCNewsIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
@ -67,17 +85,19 @@ from .ted import TEDIE
from .tf1 import TF1IE from .tf1 import TF1IE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
from .tutv import TutvIE from .tutv import TutvIE
from .unistra import UnistraIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE from .veoh import VeohIE
from .vevo import VevoIE from .vevo import VevoIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE from .wat import WatIE
from .weibo import WeiboIE from .weibo import WeiboIE
from .wimp import WimpIE from .wimp import WimpIE
@ -111,12 +131,14 @@ _ALL_CLASSES = [
] ]
_ALL_CLASSES.append(GenericIE) _ALL_CLASSES.append(GenericIE)
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL. The order does matter; the first extractor matched is the one handling the URL.
""" """
return [klass() for klass in _ALL_CLASSES] return [klass() for klass in _ALL_CLASSES]
def get_info_extractor(ie_name): def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name""" """Returns the info extractor class with the given ie_name"""
return globals()[ie_name+'IE'] return globals()[ie_name+'IE']

View File

@ -0,0 +1,75 @@
import re
from .common import InfoExtractor
from ..utils import (
compat_HTTPError,
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
ExtractorError,
)
class AddAnimeIE(InfoExtractor):
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'AddAnime'
_TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
u'file': u'24MR3YO5SAS9.flv',
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
u'info_dict': {
u"description": u"One Piece 606",
u"title": u"One Piece 606"
}
}
def _real_extract(self, url):
try:
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
raise
redir_webpage = ee.cause.read().decode('utf-8')
action = self._search_regex(
r'<form id="challenge-form" action="([^"]+)"',
redir_webpage, u'Redirect form')
vc = self._search_regex(
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
redir_webpage, u'redirect vc value')
av = re.search(
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
redir_webpage)
if av is None:
raise ExtractorError(u'Cannot find redirect math task')
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
parsed_url = compat_urllib_parse_urlparse(url)
av_val = av_res + len(parsed_url.netloc)
confirm_url = (
parsed_url.scheme + u'://' + parsed_url.netloc +
action + '?' +
compat_urllib_parse.urlencode({
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
self._download_webpage(
confirm_url, video_id,
note=u'Confirming after redirect')
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
webpage, u'video file URL')
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
return {
'_type': 'video',
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': video_title,
'description': video_description
}

View File

@ -0,0 +1,166 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [
{
u"file": u"manofsteel-trailer4.mov",
u"md5": u"11874af099d480cc09e103b189805d5f",
u"info_dict": {
u"duration": 111,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
u"title": u"Trailer 4",
u"upload_date": u"20130523",
u"uploader_id": u"wb",
},
},
{
u"file": u"manofsteel-trailer3.mov",
u"md5": u"07a0a262aae5afe68120eed61137ab34",
u"info_dict": {
u"duration": 182,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
u"title": u"Trailer 3",
u"upload_date": u"20130417",
u"uploader_id": u"wb",
},
},
{
u"file": u"manofsteel-trailer.mov",
u"md5": u"e401fde0813008e3307e54b6f384cff1",
u"info_dict": {
u"duration": 148,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
u"title": u"Trailer",
u"upload_date": u"20121212",
u"uploader_id": u"wb",
},
},
{
u"file": u"manofsteel-teaser.mov",
u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
u"info_dict": {
u"duration": 93,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
u"title": u"Teaser",
u"upload_date": u"20120721",
u"uploader_id": u"wb",
},
}
]
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie = mobj.group('movie')
uploader_id = mobj.group('company')
playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
playlist_snippet = self._download_webpage(playlist_url, movie)
playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
playlist_html = u'<html>' + playlist_cleaned + u'</html>'
size_cache = {}
doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = []
for li in doc.findall('./div/ul/li'):
title = li.find('.//h3').text
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
date_el = li.find('.//p')
upload_date = None
m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
if m:
upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
runtime_el = date_el.find('./br')
m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
duration = None
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
formats = []
for formats_el in li.findall('.//a'):
if formats_el.attrib['class'] != 'OverlayPanel':
continue
target = formats_el.attrib['target']
format_code = formats_el.text
if 'Automatic' in format_code:
continue
size_q = formats_el.attrib['href']
size_id = size_q.rpartition('#videos-')[2]
if size_id not in size_cache:
size_url = url + size_q
sizepage_html = self._download_webpage(
size_url, movie,
note=u'Downloading size info %s' % size_id,
errnote=u'Error while downloading size info %s' % size_id,
)
_doc = xml.etree.ElementTree.fromstring(sizepage_html)
size_cache[size_id] = _doc
sizepage_doc = size_cache[size_id]
links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
for vid_a in links:
href = vid_a.get('href')
if not href.endswith(target):
continue
detail_q = href.partition('#')[0]
detail_url = url + '/' + detail_q
m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
detail_id = m.group('detail_id')
detail_html = self._download_webpage(
detail_url, movie,
note=u'Downloading detail %s %s' % (detail_id, size_id),
errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
)
detail_doc = xml.etree.ElementTree.fromstring(detail_html)
movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
assert movie_link_el.get('class') == 'movieLink'
movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
ext = determine_ext(movie_link)
assert ext == 'mov'
formats.append({
'format': format_code,
'ext': ext,
'url': movie_link,
})
info = {
'_type': 'video',
'id': video_id,
'title': title,
'formats': formats,
'title': title,
'duration': duration,
'thumbnail': thumbnail,
'upload_date': upload_date,
'uploader_id': uploader_id,
'user_agent': 'QuickTime compatible (youtube-dl)',
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['ext']
playlist.append(info)
return {
'_type': 'playlist',
'id': movie,
'entries': playlist,
}

View File

@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
_TEST ={ _TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
u'file': u'93440716.mp4', u'file': u'93440716.flv',
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e', u'md5': u'e59995ac63d0457783ea05f93f12a866',
u'info_dict': { u'info_dict': {
u'title': u'网事知多少 第32期车怒', u'title': u'网事知多少 第32期车怒',
}, },

View File

@ -0,0 +1,35 @@
# coding: utf-8
import re
from .common import InfoExtractor
class Canalc2IE(InfoExtractor):
_IE_NAME = 'canalc2.tv'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
_TEST = {
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
u'file': u'12163.mp4',
u'md5': u'060158428b650f896c542dfbb3d6487f',
u'info_dict': {
u'title': u'Terrasses du Numérique'
}
}
def _real_extract(self, url):
video_id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, video_id)
file_name = self._search_regex(
r"so\.addVariable\('file','(.*?)'\);",
webpage, 'file name')
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
title = self._html_search_regex(
r'class="evenement8">(.*?)</a>', webpage, u'title')
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': title,
}

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import unified_strdate
class CanalplusIE(InfoExtractor): class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)' _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr' IE_NAME = u'canalplus.fr'

View File

@ -0,0 +1,58 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{
u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
u'info_dict': {
u'title': u'Nadal wins 8th French Open title',
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
},
},
{
u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
u"info_dict": {
u"title": "Student's epic speech stuns new freshmen",
u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
path = mobj.group('path')
page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
info_xml = self._download_webpage(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
formats = []
for f in info.findall('files/file'):
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
if mf is not None:
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
formats = sorted(formats)
(_,_,_, video_path) = formats[-1]
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
return {'id': info.attrib['id'],
'title': info.find('headline').text,
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': thumbnails[-1][1],
'thumbnails': thumbs_dict,
'description': info.find('description').text,
}

View File

@ -4,6 +4,7 @@ import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
determine_ext,
ExtractorError, ExtractorError,
) )
@ -12,7 +13,7 @@ from ..utils import (
class CollegeHumorIE(InfoExtractor): class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TEST = { _TESTS = [{
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
u'file': u'6902724.mp4', u'file': u'6902724.mp4',
u'md5': u'1264c12ad95dca142a9f0bf7968105a0', u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
u'title': u'Comic-Con Cosplay Catastrophe', u'title': u'Comic-Con Cosplay Catastrophe',
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.', u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
}, },
} },
{
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
u'file': u'3505939.mp4',
u'md5': u'c51ca16b82bb456a4397987791a835f5',
u'info_dict': {
u'title': u'Font Conference',
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -49,11 +59,12 @@ class CollegeHumorIE(InfoExtractor):
info['description'] = videoNode.findall('./description')[0].text info['description'] = videoNode.findall('./description')[0].text
info['title'] = videoNode.findall('./caption')[0].text info['title'] = videoNode.findall('./caption')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
manifest_url = videoNode.findall('./file')[0].text next_url = videoNode.findall('./file')[0].text
except IndexError: except IndexError:
raise ExtractorError(u'Invalid metadata XML file') raise ExtractorError(u'Invalid metadata XML file')
manifest_url += '?hdcore=2.10.3' if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id, manifestXml = self._download_webpage(manifest_url, video_id,
u'Downloading XML manifest', u'Downloading XML manifest',
u'Unable to download video info XML') u'Unable to download video info XML')
@ -65,9 +76,12 @@ class CollegeHumorIE(InfoExtractor):
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err: except IndexError as err:
raise ExtractorError(u'Invalid manifest file') raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail']) url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
info['ext'] = 'mp4' info['ext'] = 'mp4'
return [info] else:
# Old-style direct links
info['url'] = next_url
info['ext'] = determine_ext(info['url'])
return info

View File

@ -47,7 +47,8 @@ class InfoExtractor(object):
uploader_id: Nickname or id of the video uploader. uploader_id: Nickname or id of the video uploader.
location: Physical location of the video. location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump). player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents. subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
view_count: How many users have watched the video on the platform. view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file, urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen like returned by urllib.request.urlopen
@ -77,7 +78,13 @@ class InfoExtractor(object):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE.""" """Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url) is not None
# This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url) is not None
@classmethod @classmethod
def working(cls): def working(cls):
@ -107,6 +114,11 @@ class InfoExtractor(object):
"""Real extraction process. Redefine in subclasses.""" """Real extraction process. Redefine in subclasses."""
pass pass
@classmethod
def ie_key(cls):
"""A string for getting the InfoExtractor with get_info_extractor"""
return cls.__name__[:-2]
@property @property
def IE_NAME(self): def IE_NAME(self):
return type(self).__name__[:-2] return type(self).__name__[:-2]
@ -122,7 +134,7 @@ class InfoExtractor(object):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None: if errnote is None:
errnote = u'Unable to download webpage' errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2]) raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None): def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
""" Returns a tuple (page content as string, URL handle) """ """ Returns a tuple (page content as string, URL handle) """
@ -133,12 +145,17 @@ class InfoExtractor(object):
urlh = self._request_webpage(url_or_request, video_id, note, errnote) urlh = self._request_webpage(url_or_request, video_id, note, errnote)
content_type = urlh.headers.get('Content-Type', '') content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m: if m:
encoding = m.group(1) encoding = m.group(1)
else:
m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
webpage_bytes[:1024])
if m:
encoding = m.group(1).decode('ascii')
else: else:
encoding = 'utf-8' encoding = 'utf-8'
webpage_bytes = urlh.read()
if self._downloader.params.get('dump_intermediate_pages', False): if self._downloader.params.get('dump_intermediate_pages', False):
try: try:
url = url_or_request.get_full_url() url = url_or_request.get_full_url()

View File

@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
u'file': u'x33vw9.mp4', u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb', u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': { u'info_dict': {
u"uploader": u"Alex and Van .", u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
} }
} }
@ -55,7 +55,8 @@ class DailymotionIE(InfoExtractor):
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id, embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page') u'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info') info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info) info = json.loads(info)
# TODO: support choosing qualities # TODO: support choosing qualities

View File

@ -0,0 +1,74 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
determine_ext,
)
class DaumIE(InfoExtractor):
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
IE_NAME = u'daum.net'
_TEST = {
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
u'file': u'52554690.mp4',
u'info_dict': {
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
u'upload_date': u'20130831',
u'duration': 3868,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info_xml = self._download_webpage(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info')
urls_xml = self._download_webpage(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id)
formats = []
for format_el in urls.findall('result/output_list/output_list'):
profile = format_el.attrib['profile']
format_query = compat_urllib_parse.urlencode({
'vid': full_id,
'profile': profile,
})
url_xml = self._download_webpage(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,
'ext': determine_ext(format_url),
'format_id': profile,
})
info = {
'id': video_id,
'title': info.find('TITLE').text,
'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
'description': info.find('CONTENTS').text,
'duration': int(info.find('DURATION').text),
'upload_date': info.find('REGDTTM').text[:8],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,39 @@
import re
import json
from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor):
_IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = {
u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
u'file': u'11213.mp4',
u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
"info_dict": {
"title": "attaque-chimique-syrienne-du-21-aout-2013-1"
}
}
def _real_extract(self, url):
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
video_id = self._search_regex(
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id)
info = self._download_webpage(json_url, title,
'Downloading JSON config')
video_url = json.loads(info)['renditions'][0]['url']
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': title,
}

View File

@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
webpage, u'video URL', flags=re.DOTALL) webpage, u'video URL', flags=re.DOTALL)
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
info = { info = {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'ext': 'mp4', 'ext': 'mp4',
'title': title, 'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
} }
return [info] return [info]

View File

@ -8,11 +8,13 @@ from ..utils import (
compat_urllib_error, compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_urlparse,
ExtractorError, ExtractorError,
) )
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
IE_DESC = u'Generic downloader that works on some sites' IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*' _VALID_URL = r'.*'
@ -107,8 +109,13 @@ class GenericIE(InfoExtractor):
return new_url return new_url
def _real_extract(self, url): def _real_extract(self, url):
try:
new_url = self._test_redirect(url) new_url = self._test_redirect(url)
if new_url: return [self.url_result(new_url)] if new_url:
return [self.url_result(new_url)]
except compat_urllib_error.HTTPError:
# This may be a stupid server that doesn't like HEAD, our UA, or so
pass
video_id = url.split('/')[-1] video_id = url.split('/')[-1]
try: try:
@ -119,7 +126,7 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
self.report_extraction(video_id) self.report_extraction(video_id)
# Look for BrigthCove: # Look for BrightCove:
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None: if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.') self.to_screen(u'Brightcove video detected.')
@ -144,6 +151,9 @@ class GenericIE(InfoExtractor):
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player: # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None: if m_video_type is not None:
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
if mobj is None:
# HTML5 video
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
if mobj is None: if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
@ -152,8 +162,9 @@ class GenericIE(InfoExtractor):
if mobj.group(1) is None: if mobj.group(1) is None:
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1)) video_url = mobj.group(1)
video_id = os.path.basename(video_url) video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
# here's a fun little line of code for you: # here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:] video_extension = os.path.splitext(video_id)[1][1:]

View File

@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
webpage, 'title', default=u'NA') webpage, 'title', default=u'NA')
# Step 2, Simulate clicking the image box to launch video # Step 2, Simulate clicking the image box to launch video
DOMAIN = 'https://plus.google.com' DOMAIN = 'https://plus.google.com/'
video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN), video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
webpage, u'video page URL') webpage, u'video page URL')
if not video_page.startswith(DOMAIN): if not video_page.startswith(DOMAIN):
video_page = DOMAIN + video_page video_page = DOMAIN + video_page

View File

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
import re
import json
from .common import InfoExtractor
from ..utils import determine_ext
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
_TEST = {
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
u'file': u'mmbzyhkgny.mp3',
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
u'info_dict': {
u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
u'duration': 11,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
json_url = "http://www.hark.com/clips/%s.json" %(video_id)
info_json = self._download_webpage(json_url, video_id)
info = json.loads(info_json)
final_url = info['url']
return {'id': video_id,
'url' : final_url,
'title': info['name'],
'ext': determine_ext(final_url),
'description': info['description'],
'thumbnail': info['image_original'],
'duration': info['duration'],
}

View File

@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = u'ign.com' IE_NAME = u'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
name_or_id = mobj.group('name_or_id') name_or_id = mobj.group('name_or_id')
page_type = mobj.group('type')
webpage = self._download_webpage(url, name_or_id) webpage = self._download_webpage(url, name_or_id)
if page_type == 'articles':
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
return self.url_result(video_url, ie='IGN')
video_id = self._find_video_id(webpage) video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id) result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE, description = self._html_search_regex(self._DESCRIPTION_RE,
@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
class OneUPIE(IGNIE): class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system.""" """Extractor for 1up.com, it uses the ign videos system."""
_VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)' _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com' IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>' _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

View File

@ -0,0 +1,47 @@
# coding: utf-8
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor):
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
_TEST = {
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
u'file': u'5182.mp4',
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
u'info_dict': {
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
xml_link = m_download.group(1)
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
xml_config = self._download_webpage(xml_link, title,
'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
info = re.search(r'<format\.json>(.*?)</format\.json>',
xml_config, re.MULTILINE|re.DOTALL).group(1)
info = json.loads(info)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']
return {'id': id,
'title' : config.find('titre_video').text,
'ext' : 'mp4',
'url' : video_url,
'description': self._og_search_description(webpage),
'thumbnail': config.find('image').text,
}

View File

@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title') title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid') surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
gcid = gcids[-1]
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid, video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
video_id, u'Downloading video url info') video_id, u'Downloading video url info')

View File

@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title') video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);', r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, u'uploader nickname', fatal=False) webpage, u'uploader nickname', fatal=False)
return { return {

View File

@ -0,0 +1,74 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_id,
)
class TechTVMITIE(InfoExtractor):
IE_NAME = u'techtv.mit.edu'
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
_TEST = {
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
u'file': u'25418.mp4',
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
u'info_dict': {
u'title': u'MIT DNA Learning Center Set',
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
raw_page = self._download_webpage(
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
raw_page, u'base url')
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
u'video formats')
formats = json.loads(formats_json)
formats = sorted(formats, key=lambda f: f['bitrate'])
title = get_element_by_id('edit-title', clean_page)
description = clean_html(get_element_by_id('edit-description', clean_page))
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
raw_page, u'thumbnail', flags=re.DOTALL)
return {'id': video_id,
'title': title,
'url': base_url + formats[-1]['url'].replace('mp4:', ''),
'ext': 'mp4',
'description': description,
'thumbnail': thumbnail,
}
class MITIE(TechTVMITIE):
IE_NAME = u'video.mit.edu'
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
_TEST = {
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
u'file': u'21783.mp4',
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
u'info_dict': {
u'title': u'The Government is Profiling You',
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
u'embed url')
return self.url_result(embed_url, ie='TechTVMIT')

View File

@ -0,0 +1,73 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
)
class NaverIE(InfoExtractor):
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
_TEST = {
u'url': u'http://tvcast.naver.com/v/81652',
u'file': u'81652.mp4',
u'info_dict': {
u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
u'upload_date': u'20130903',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
raise ExtractorError(u'couldn\'t extract vid and key')
vid = m_id.group(1)
key = m_id.group(2)
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
query_urls = compat_urllib_parse.urlencode({
'masterVid': vid,
'protocol': 'p2p',
'inKey': key,
})
info_xml = self._download_webpage(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
video_id, u'Downloading video info')
urls_xml = self._download_webpage(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):
domain = format_el.find('Domain').text
if domain.startswith('rtmp'):
continue
formats.append({
'url': domain + format_el.find('uri').text,
'ext': 'mp4',
'width': int(format_el.find('width').text),
'height': int(format_el.find('height').text),
})
info = {
'id': video_id,
'title': info.find('Subject').text,
'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': info.find('WriteDate').text.replace('.', ''),
'view_count': int(info.find('PlayCount').text),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,33 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
u'file': u'52753292.flv',
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
u'info_dict': {
u'title': u'Crew emerges after four-month Mars food study',
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
return {'id': video_id,
'title': info.find('headline').text,
'ext': 'flv',
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
'description': compat_str(info.find('caption').text),
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}

View File

@ -0,0 +1,54 @@
# coding: utf-8
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
ExtractorError,
find_xpath_attr,
)
class ORFIE(InfoExtractor):
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
playlist = json.loads(playlist_json)
videos = []
ns = '{http://tempuri.org/XMLSchema.xsd}'
xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
webpage_description = self._og_search_description(webpage)
for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
# Get best quality url
rtmp_url = None
for q in ['Q6A', 'Q4A', 'Q1A']:
video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
if video_url is not None:
rtmp_url = video_url.text
break
if rtmp_url is None:
raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
description = self._html_search_regex(
r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
u'description', default=webpage_description, flags=re.DOTALL)
videos.append({
'_type': 'video',
'id': info['id'],
'title': info['title'],
'url': rtmp_url,
'ext': 'flv',
'description': description,
})
return videos

View File

@ -0,0 +1,34 @@
import re
import json
from .common import InfoExtractor
class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
_TEST = {
u'url': u'http://video.pbs.org/video/2365006249/',
u'file': u'2365006249.mp4',
u'md5': 'ce1888486f0908d555a8093cac9a7362',
u'info_dict': {
u'title': u'A More Perfect Union',
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
u'duration': 3190,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info_page = self._download_webpage(info_url, video_id)
info =json.loads(info_page)
return {'id': video_id,
'title': info['title'],
'url': info['alternate_encoding']['url'],
'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
}

View File

@ -0,0 +1,42 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
compat_parse_qs,
)
class Ro220IE(InfoExtractor):
IE_NAME = '220.ro'
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
u'file': u'LYV6doKo7f.mp4',
u'md5': u'03af18b73a07b4088753930db7a34add',
u'info_dict': {
u"title": u"Luati-le Banii sez 4 ep 1",
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
flashVars_str = self._search_regex(
r'<param name="flashVars" value="([^"]+)"',
webpage, u'flashVars')
flashVars = compat_parse_qs(flashVars_str)
info = {
'_type': 'video',
'id': video_id,
'ext': 'mp4',
'url': flashVars['videoURL'][0],
'title': flashVars['title'][0],
'description': clean_html(flashVars['desc'][0]),
'thumbnail': flashVars['preview'][0],
}
return info

View File

@ -0,0 +1,126 @@
# encoding: utf-8
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError,
)
class RTLnowIE(InfoExtractor):
"""Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
u'info_dict': {
u'upload_date': u'20070416',
u'title': u'Ahornallee - Folge 1 - Der Einzug',
u'description': u'Folge 1 - Der Einzug',
},
u'params': {
u'skip_download': True,
},
u'skip': u'Only works from Germany',
},
{
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
u'file': u'69756.flv',
u'info_dict': {
u'upload_date': u'20120519',
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
},
u'params': {
u'skip_download': True,
},
u'skip': u'Only works from Germany',
},
{
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
u'file': u'13883.flv',
u'info_dict': {
u'upload_date': u'20090627',
u'title': u'Voxtours - Südafrika-Reporter II',
u'description': u'Südafrika-Reporter II',
},
u'params': {
u'skip_download': True,
},
},
{
u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
u'file': u'99205.flv',
u'info_dict': {
u'upload_date': u'20080928',
u'title': u'Medicopter 117 - Angst!',
u'description': u'Angst!',
u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
},
u'params': {
u'skip_download': True,
},
}]
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://' + mobj.group('url')
video_page_url = u'http://' + mobj.group('base_url')
video_id = mobj.group(u'video_id')
webpage = self._download_webpage(webpage_url, video_id)
note_m = re.search(r'''(?sx)
<div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
<div[ ]id="playerteaser">''', webpage)
if note_m:
msg = clean_html(note_m.group(1))
raise ExtractorError(msg)
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
webpage, u'title')
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
webpage, u'playerdata_url')
playerdata = self._download_webpage(playerdata_url, video_id)
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
if mobj:
video_description = mobj.group(u'description')
if mobj.group('upload_date_Y'):
video_upload_date = mobj.group('upload_date_Y')
else:
video_upload_date = u'20' + mobj.group('upload_date_y')
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
else:
video_description = None
video_upload_date = None
self._downloader.report_warning(u'Unable to extract description and upload date')
# Thumbnail: not every video has an thumbnail
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
if mobj:
video_thumbnail = mobj.group(u'thumbnail')
else:
video_thumbnail = None
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
video_url = mobj.group(u'url')
video_play_path = u'mp4:' + mobj.group(u'play_path')
video_player_url = video_page_url + u'includes/vodplayer.swf'
return [{
'id': video_id,
'url': video_url,
'play_path': video_play_path,
'page_url': video_page_url,
'player_url': video_player_url,
'ext': 'flv',
'title': video_title,
'description': video_description,
'upload_date': video_upload_date,
'thumbnail': video_thumbnail,
}]

View File

@ -0,0 +1,23 @@
import re
from .common import InfoExtractor
class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
u'info_dict': {
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
return self.url_result(ooyala_url, 'Ooyala')

View File

@ -0,0 +1,90 @@
# encoding: utf-8
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class SohuIE(InfoExtractor):
_VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
_TEST = {
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
u'file': u'382479172.mp4',
u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
u'info_dict': {
u'title': u'MVFar East Movement《The Illest》',
},
}
def _real_extract(self, url):
def _fetch_data(vid_id):
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id)
data_json = self._download_webpage(
data_url, video_id,
note=u'Downloading JSON data for ' + str(vid_id))
return json.loads(data_json)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
webpage, u'video title')
title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
u'video path')
data = _fetch_data(vid)
QUALITIES = ('ori', 'super', 'high', 'nor')
vid_ids = [data['data'][q + 'Vid']
for q in QUALITIES
if data['data'][q + 'Vid'] != 0]
if not vid_ids:
raise ExtractorError(u'No formats available for this video')
# For now, we just pick the highest available quality
vid_id = vid_ids[-1]
format_data = data if vid == vid_id else _fetch_data(vid_id)
part_count = format_data['data']['totalBlocks']
allot = format_data['allot']
prot = format_data['prot']
clipsURL = format_data['data']['clipsURL']
su = format_data['data']['su']
playlist = []
for i in range(part_count):
part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
(allot, prot, clipsURL[i], su[i]))
part_str = self._download_webpage(
part_url, video_id,
note=u'Downloading part %d of %d' % (i+1, part_count))
part_info = part_str.split('|')
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
video_info = {
'id': '%s_part%02d' % (video_id, i + 1),
'title': title,
'url': video_url,
'ext': 'mp4',
}
playlist.append(video_info)
if len(playlist) == 1:
info = playlist[0]
info['id'] = video_id
else:
info = {
'_type': 'playlist',
'entries': playlist,
'id': video_id,
}
return info

View File

@ -4,6 +4,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_str, compat_str,
compat_urlparse,
ExtractorError, ExtractorError,
unified_strdate, unified_strdate,
@ -22,6 +23,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''^(?:https?://)? _VALID_URL = r'''^(?:https?://)?
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$) (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
) )
''' '''
IE_NAME = u'soundcloud' IE_NAME = u'soundcloud'
@ -79,6 +81,9 @@ class SoundcloudIE(InfoExtractor):
if track_id is not None: if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id full_title = track_id
elif mobj.group('widget'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
return self.url_result(query['url'][0], ie='Soundcloud')
else: else:
# extract uploader (which is in the url) # extract uploader (which is in the url)
uploader = mobj.group(1) uploader = mobj.group(1)

View File

@ -5,13 +5,13 @@ from .common import InfoExtractor
class StatigramIE(InfoExtractor): class StatigramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
_TEST = { _TEST = {
u'url': u'http://statigr.am/p/484091715184808010_284179915', u'url': u'http://statigr.am/p/522207370455279102_24101272',
u'file': u'484091715184808010_284179915.mp4', u'file': u'522207370455279102_24101272.mp4',
u'md5': u'deda4ff333abe2e118740321e992605b', u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
u'info_dict': { u'info_dict': {
u"uploader_id": u"videoseconds", u'uploader_id': u'aguynamedpatrick',
u"title": u"Instagram photo by @videoseconds" u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
} },
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,73 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class TriluliluIE(InfoExtractor):
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
u'file': u"big-buck-bunny-1.mp4",
u'info_dict': {
u"title": u"Big Buck Bunny",
u"description": u":) pentru copilul din noi",
},
# Server ignores Range headers (--test)
u"params": {
u"skip_download": True
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
log_str = self._search_regex(
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
log = json.loads(log_str)
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log)
format_str = self._download_webpage(
format_url, video_id,
note=u'Downloading formats',
errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
u'&source=site&hash=%(hash)s&username=%(userid)s&'
u'key=ministhebest&format=%%s&sig=&exp=' %
log)
formats = [
{
'format': fnode.text,
'url': video_url_template % fnode.text,
}
for fnode in format_doc.findall('./formats/format')
]
info = {
'_type': 'video',
'id': video_id,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['format'].partition('-')[0]
return info

View File

@ -0,0 +1,32 @@
import re
from .common import InfoExtractor
class UnistraIE(InfoExtractor):
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
_TEST = {
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
u'file': u'154.mp4',
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
u'info_dict': {
u'title': u'M!ss Yella',
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
},
}
def _real_extract(self, url):
id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, id)
file = re.search(r'file: "(.*?)",', webpage).group(1)
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
return {'id': id,
'title': title,
'ext': 'mp4',
'url': video_url,
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
}

View File

@ -0,0 +1,56 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_id,
clean_html,
)
class VeeHDIE(InfoExtractor):
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://veehd.com/video/4686958',
u'file': u'4686958.mp4',
u'info_dict': {
u'title': u'Time Lapse View from Space ( ISS)',
u'uploader_id': u'spotted',
u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
webpage, u'player path')
player_url = compat_urlparse.urljoin(url, player_path)
player_page = self._download_webpage(player_url, video_id,
u'Downloading player page')
config_json = self._search_regex(r'value=\'config=({.+?})\'',
player_page, u'config json')
config = json.loads(config_json)
video_url = compat_urlparse.unquote(config['clip']['url'])
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
webpage, u'uploader')
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
webpage, u'thumbnail')
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
webpage, u'description', flags=re.DOTALL)
return {
'_type': 'video',
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -8,10 +8,10 @@ from ..utils import (
class VevoIE(InfoExtractor): class VevoIE(InfoExtractor):
""" """
Accecps urls from vevo.com or in the format 'vevo:{id}' Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE) (currently used by MTVIE)
""" """
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$' _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
_TEST = { _TEST = {
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4', u'file': u'GB1101300280.mp4',
@ -19,7 +19,7 @@ class VevoIE(InfoExtractor):
u'info_dict': { u'info_dict': {
u"upload_date": u"20130624", u"upload_date": u"20130624",
u"uploader": u"Hurts", u"uploader": u"Hurts",
u"title": u"Somebody To Die For" u"title": u"Somebody to Die For"
} }
} }

View File

@ -14,7 +14,7 @@ class VideofyMeIE(InfoExtractor):
_TEST = { _TEST = {
u'url': u'http://www.videofy.me/thisisvideofyme/1100701', u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
u'file': u'1100701.mp4', u'file': u'1100701.mp4',
u'md5': u'2046dd5758541d630bfa93e741e2fd79', u'md5': u'c77d700bdc16ae2e9f3c26019bd96143',
u'info_dict': { u'info_dict': {
u'title': u'This is VideofyMe', u'title': u'This is VideofyMe',
u'description': None, u'description': None,
@ -32,9 +32,8 @@ class VideofyMeIE(InfoExtractor):
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video = config.find('video') video = config.find('video')
sources = video.find('sources') sources = video.find('sources')
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on') url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
if url_node is None: for key in ['on', 'av', 'off']] if node is not None)
url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
video_url = url_node.find('url').text video_url = url_node.find('url').text
return {'id': video_id, return {'id': video_id,

View File

@ -20,7 +20,8 @@ class VimeoIE(InfoExtractor):
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$' _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
_NETRC_MACHINE = 'vimeo' _NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo' IE_NAME = u'vimeo'
_TEST = { _TESTS = [
{
u'url': u'http://vimeo.com/56015672', u'url': u'http://vimeo.com/56015672',
u'file': u'56015672.mp4', u'file': u'56015672.mp4',
u'md5': u'8879b6cc097e987f02484baf890129e5', u'md5': u'8879b6cc097e987f02484baf890129e5',
@ -29,9 +30,31 @@ class VimeoIE(InfoExtractor):
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
u"uploader_id": u"user7108434", u"uploader_id": u"user7108434",
u"uploader": u"Filippo Valsorda", u"uploader": u"Filippo Valsorda",
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550" u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
} },
} },
{
u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
u'file': u'68093876.mp4',
u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82',
u'note': u'Vimeo Pro video (#1197)',
u'info_dict': {
u'uploader_id': u'openstreetmapus',
u'uploader': u'OpenStreetMap US',
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
},
},
{
u'url': u'http://player.vimeo.com/video/54469442',
u'file': u'54469442.mp4',
u'md5': u'619b811a4417aa4abe78dc653becf511',
u'note': u'Videos that embed the url in the player page',
u'info_dict': {
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
u'uploader': u'The BLN & Business of Software',
},
},
]
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
@ -83,7 +106,9 @@ class VimeoIE(InfoExtractor):
video_id = mobj.group('id') video_id = mobj.group('id')
if not mobj.group('proto'): if not mobj.group('proto'):
url = 'https://' + url url = 'https://' + url
if mobj.group('direct_link') or mobj.group('pro'): elif mobj.group('pro'):
url = 'http://player.vimeo.com/video/' + video_id
elif mobj.group('direct_link'):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
@ -97,7 +122,8 @@ class VimeoIE(InfoExtractor):
# Extract the config JSON # Extract the config JSON
try: try:
config = webpage.split(' = {config:')[1].split(',assets:')[0] config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
webpage, u'info section', flags=re.DOTALL)
config = json.loads(config) config = json.loads(config)
except: except:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@ -117,12 +143,22 @@ class VimeoIE(InfoExtractor):
video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
# Extract video thumbnail # Extract video thumbnail
video_thumbnail = config["video"]["thumbnail"] video_thumbnail = config["video"].get("thumbnail")
if video_thumbnail is None:
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
# Extract video description # Extract video description
video_description = None
try:
video_description = get_element_by_attribute("itemprop", "description", webpage) video_description = get_element_by_attribute("itemprop", "description", webpage)
if video_description: video_description = clean_html(video_description) if video_description: video_description = clean_html(video_description)
else: video_description = u'' except AssertionError as err:
# On some pages like (http://player.vimeo.com/video/54469442) the
# html tags are not closed, python 2.6 cannot handle it
if err.args[0] == 'we should not get here!':
pass
else:
raise
# Extract upload date # Extract upload date
video_upload_date = None video_upload_date = None
@ -139,14 +175,15 @@ class VimeoIE(InfoExtractor):
# TODO bind to format param # TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
files = { 'hd': [], 'sd': [], 'other': []} files = { 'hd': [], 'sd': [], 'other': []}
config_files = config["video"].get("files") or config["request"].get("files")
for codec_name, codec_extension in codecs: for codec_name, codec_extension in codecs:
if codec_name in config["video"]["files"]: if codec_name in config_files:
if 'hd' in config["video"]["files"][codec_name]: if 'hd' in config_files[codec_name]:
files['hd'].append((codec_name, codec_extension, 'hd')) files['hd'].append((codec_name, codec_extension, 'hd'))
elif 'sd' in config["video"]["files"][codec_name]: elif 'sd' in config_files[codec_name]:
files['sd'].append((codec_name, codec_extension, 'sd')) files['sd'].append((codec_name, codec_extension, 'sd'))
else: else:
files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0])) files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
for quality in ('hd', 'sd', 'other'): for quality in ('hd', 'sd', 'other'):
if len(files[quality]) > 0: if len(files[quality]) > 0:
@ -158,6 +195,10 @@ class VimeoIE(InfoExtractor):
else: else:
raise ExtractorError(u'No known codec found') raise ExtractorError(u'No known codec found')
video_url = None
if isinstance(config_files[video_codec], dict):
video_url = config_files[video_codec][video_quality].get("url")
if video_url is None:
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
%(video_id, sig, timestamp, video_quality, video_codec.upper()) %(video_id, sig, timestamp, video_quality, video_codec.upper())

View File

@ -6,7 +6,6 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse,
unified_strdate, unified_strdate,
) )

View File

@ -3,7 +3,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
unescapeHTML,
determine_ext,
ExtractorError, ExtractorError,
) )
@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
video_url = compat_urllib_parse.unquote(mobj.group('file')) video_url = compat_urllib_parse.unquote(mobj.group('file'))
else: else:
video_url = mobj.group('server')+'/key='+mobj.group('file') video_url = mobj.group('server')+'/key='+mobj.group('file')
video_extension = video_url.split('.')[-1]
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
webpage, u'title') webpage, u'title')
# Can't see the description anywhere in the UI # Only a few videos have an description
# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)', mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
# webpage, u'description', fatal=False) if mobj:
# if video_description: video_description = unescapeHTML(video_description) video_description = unescapeHTML(mobj.group('description'))
else:
video_description = None
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
if mobj: if mobj:
@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
return [{ return [{
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'ext': video_extension, 'ext': determine_ext(video_url),
'title': video_title, 'title': video_title,
# 'description': video_description, 'description': video_description,
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail 'thumbnail': video_thumbnail

View File

@ -12,14 +12,16 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
) )
from ..aes import (
aes_decrypt_text
)
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
_TEST = { _TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4', u'file': u'505835.mp4',
u'md5': u'c37ddbaaa39058c76a7e86c6813423c1', u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
u'info_dict': { u'info_dict': {
u"upload_date": u"20101221", u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page # Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">' LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
links = re.findall(LINK_RE, download_list_html) links = re.findall(LINK_RE, download_list_html)
if(len(links) == 0):
# Get link of hd video if available
mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
if mobj != None:
encrypted_video_url = mobj.group(u'encrypted_video_url')
video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
links = [video_url] + links
if not links:
raise ExtractorError(u'ERROR: no known formats available for video') raise ExtractorError(u'ERROR: no known formats available for video')
self.to_screen(u'Links found: %d' % len(links)) self.to_screen(u'Links found: %d' % len(links))
@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
self._print_formats(formats) self._print_formats(formats)
return return
req_format = self._downloader.params.get('format', None) req_format = self._downloader.params.get('format', 'best')
self.to_screen(u'Format: %s' % req_format) self.to_screen(u'Format: %s' % req_format)
if req_format is None or req_format == 'best': if req_format is None or req_format == 'best':

View File

@ -135,37 +135,60 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_VALID_URL = r"""^ _VALID_URL = r"""^
( (
(?:https?://)? # http(s):// (optional) (?:https?://)? # http(s):// (optional)
(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls (?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID: (?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/ (?:(?:v|embed|e)/) # v/ or embed/ or e/
|(?: # or the v= param in all its forms |(?: # or the v= param in all its forms
(?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #! (?:\?|\#!?) # the params delimiter ? or # or #!
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
v= v=
) )
)? # optional -> youtube.com/xxxx is OK ))
|youtu\.be/ # just youtu.be/xxxx
)
)? # all until now is optional -> you can pass the naked ID )? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]+) # here is it! the YouTube video ID ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow (?(1).+)? # if we found the ID, everything can follow
$""" $"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
# Listed in order of quality # Listed in order of quality
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13', _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
'95', '94', '93', '92', '132', '151', # Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '84', '102', '83', '101', '82', '100', '85', '84', '102', '83', '101', '82', '100',
# Dash video
'138', '137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '171', '139',
] ]
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13', _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
'95', '94', '93', '92', '132', '151', # Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '102', '84', '101', '83', '100', '82', '85', '102', '84', '101', '83', '100', '82',
# Dash video
'138', '248', '137', '247', '136', '246', '245',
'244', '135', '243', '134', '242', '133', '160',
# Dash audio
'172', '141', '171', '140', '139',
] ]
_video_formats_map = {
'flv': ['35', '34', '6', '5'],
'3gp': ['36', '17', '13'],
'mp4': ['38', '37', '22', '18'],
'webm': ['46', '45', '44', '43'],
}
_video_extensions = { _video_extensions = {
'13': '3gp', '13': '3gp',
'17': 'mp4', '17': '3gp',
'18': 'mp4', '18': 'mp4',
'22': 'mp4', '22': 'mp4',
'36': '3gp',
'37': 'mp4', '37': 'mp4',
'38': 'mp4', '38': 'mp4',
'43': 'webm', '43': 'webm',
@ -182,7 +205,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'101': 'webm', '101': 'webm',
'102': 'webm', '102': 'webm',
# videos that use m3u8 # Apple HTTP Live Streaming
'92': 'mp4', '92': 'mp4',
'93': 'mp4', '93': 'mp4',
'94': 'mp4', '94': 'mp4',
@ -190,6 +213,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'96': 'mp4', '96': 'mp4',
'132': 'mp4', '132': 'mp4',
'151': 'mp4', '151': 'mp4',
# Dash mp4
'133': 'mp4',
'134': 'mp4',
'135': 'mp4',
'136': 'mp4',
'137': 'mp4',
'138': 'mp4',
'139': 'mp4',
'140': 'mp4',
'141': 'mp4',
'160': 'mp4',
# Dash webm
'171': 'webm',
'172': 'webm',
'242': 'webm',
'243': 'webm',
'244': 'webm',
'245': 'webm',
'246': 'webm',
'247': 'webm',
'248': 'webm',
} }
_video_dimensions = { _video_dimensions = {
'5': '240x400', '5': '240x400',
@ -200,6 +246,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'22': '720x1280', '22': '720x1280',
'34': '360x640', '34': '360x640',
'35': '480x854', '35': '480x854',
'36': '240x320',
'37': '1080x1920', '37': '1080x1920',
'38': '3072x4096', '38': '3072x4096',
'43': '360x640', '43': '360x640',
@ -220,8 +267,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'102': '720p', '102': '720p',
'132': '240p', '132': '240p',
'151': '72p', '151': '72p',
'133': '240p',
'134': '360p',
'135': '480p',
'136': '720p',
'137': '1080p',
'138': '>1080p',
'139': '48k',
'140': '128k',
'141': '256k',
'160': '192p',
'171': '128k',
'172': '256k',
'242': '240p',
'243': '360p',
'244': '480p',
'245': '480p',
'246': '480p',
'247': '720p',
'248': '1080p',
} }
_3d_itags = ['85', '84', '102', '83', '101', '82', '100'] _special_itags = {
'82': '3D',
'83': '3D',
'84': '3D',
'85': '3D',
'100': '3D',
'101': '3D',
'102': '3D',
'133': 'DASH Video',
'134': 'DASH Video',
'135': 'DASH Video',
'136': 'DASH Video',
'137': 'DASH Video',
'138': 'DASH Video',
'139': 'DASH Audio',
'140': 'DASH Audio',
'141': 'DASH Audio',
'160': 'DASH Video',
'171': 'DASH Audio',
'172': 'DASH Audio',
'242': 'DASH Video',
'243': 'DASH Video',
'244': 'DASH Video',
'245': 'DASH Video',
'246': 'DASH Video',
'247': 'DASH Video',
'248': 'DASH Video',
}
IE_NAME = u'youtube' IE_NAME = u'youtube'
_TESTS = [ _TESTS = [
{ {
@ -254,8 +348,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
u"info_dict": { u"info_dict": {
u"upload_date": u"20120506", u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
u"uploader": u"IconaPop", u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop" u"uploader_id": u"IconaPop"
} }
}, },
@ -335,22 +429,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
elif len(s) == 90: elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
elif len(s) == 89:
return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
elif len(s) == 88: elif len(s) == 88:
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
elif len(s) == 87: elif len(s) == 87:
return s[83:53:-1] + s[3] + s[52:40:-1] + s[86] + s[39:10:-1] + s[0] + s[9:3:-1] + s[53] return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86: elif len(s) == 86:
return s[5:20] + s[2] + s[21:] return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
elif len(s) == 85: elif len(s) == 85:
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
elif len(s) == 84: elif len(s) == 84:
return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27] return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83: elif len(s) == 83:
return s[:15] + s[80] + s[16:80] + s[15] return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82: elif len(s) == 82:
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
elif len(s) == 81: elif len(s) == 81:
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
elif len(s) == 80:
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
elif len(s) == 79: elif len(s) == 79:
return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
@ -373,11 +471,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try: try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'unable to download video subtitles: %s' % compat_str(err), None) self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {}
sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
if not sub_lang_list: if not sub_lang_list:
return (u'video doesn\'t have subtitles', None) self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}
return sub_lang_list return sub_lang_list
def _list_available_subtitles(self, video_id): def _list_available_subtitles(self, video_id):
@ -386,8 +486,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _request_subtitle(self, sub_lang, sub_name, video_id, format): def _request_subtitle(self, sub_lang, sub_name, video_id, format):
""" """
Return tuple: Return the subtitle as a string or None if they are not found
(error_message, sub_lang, sub)
""" """
self.report_video_subtitles_request(video_id, sub_lang, format) self.report_video_subtitles_request(video_id, sub_lang, format)
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
@ -400,21 +499,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try: try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8') sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'unable to download video subtitles: %s' % compat_str(err), None, None) self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub: if not sub:
return (u'Did not fetch video subtitles', None, None) self._downloader.report_warning(u'Did not fetch video subtitles')
return (None, sub_lang, sub) return
return sub
def _request_automatic_caption(self, video_id, webpage): def _request_automatic_caption(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
sub_lang = self._downloader.params.get('subtitleslang') or 'en' sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
sub_format = self._downloader.params.get('subtitlesformat') sub_format = self._downloader.params.get('subtitlesformat')
self.to_screen(u'%s: Looking for automatic captions' % video_id) self.to_screen(u'%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage) mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
if mobj is None: if mobj is None:
return [(err_msg, None, None)] self._downloader.report_warning(err_msg)
return {}
player_config = json.loads(mobj.group(1)) player_config = json.loads(mobj.group(1))
try: try:
args = player_config[u'args'] args = player_config[u'args']
@ -429,40 +531,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}) })
subtitles_url = caption_url + '&' + params subtitles_url = caption_url + '&' + params
sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
return [(None, sub_lang, sub)] return {sub_lang: sub}
except KeyError: # An extractor error can be raise by the download process if there are
return [(err_msg, None, None)] # no automatic captions but there are subtitles
except (KeyError, ExtractorError):
self._downloader.report_warning(err_msg)
return {}
def _extract_subtitle(self, video_id): def _extract_subtitles(self, video_id):
""" """
Return a list with a tuple: Return a dictionary: {language: subtitles} or {} if the subtitles
[(error_message, sub_lang, sub)] couldn't be found
""" """
sub_lang_list = self._get_available_subtitles(video_id) available_subs_list = self._get_available_subtitles(video_id)
sub_format = self._downloader.params.get('subtitlesformat') sub_format = self._downloader.params.get('subtitlesformat')
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles if not available_subs_list: #There was some error, it didn't get the available subtitles
return [(sub_lang_list[0], None, None)] return {}
if self._downloader.params.get('subtitleslang', False): if self._downloader.params.get('allsubtitles', False):
sub_lang = self._downloader.params.get('subtitleslang') sub_lang_list = available_subs_list
elif 'en' in sub_lang_list:
sub_lang = 'en'
else: else:
sub_lang = list(sub_lang_list.keys())[0] if self._downloader.params.get('subtitleslangs', False):
if not sub_lang in sub_lang_list: reqested_langs = self._downloader.params.get('subtitleslangs')
return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] elif 'en' in available_subs_list:
reqested_langs = ['en']
else:
reqested_langs = [list(available_subs_list.keys())[0]]
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) sub_lang_list = {}
return [subtitle] for sub_lang in reqested_langs:
if not sub_lang in available_subs_list:
def _extract_all_subtitles(self, video_id): self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
sub_lang_list = self._get_available_subtitles(video_id) continue
sub_format = self._downloader.params.get('subtitlesformat') sub_lang_list[sub_lang] = available_subs_list[sub_lang]
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles subtitles = {}
return [(sub_lang_list[0], None, None)]
subtitles = []
for sub_lang in sub_lang_list: for sub_lang in sub_lang_list:
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
subtitles.append(subtitle) if subtitle:
subtitles[sub_lang] = subtitle
return subtitles return subtitles
def _print_formats(self, formats): def _print_formats(self, formats):
@ -470,7 +575,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for x in formats: for x in formats:
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'), print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
self._video_dimensions.get(x, '???'), self._video_dimensions.get(x, '???'),
' (3D)' if x in self._3d_itags else '')) ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
def _extract_id(self, url): def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE) mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -505,13 +610,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else: else:
# Specific formats. We pick the first in a slash-delimeted sequence. # Specific formats. We pick the first in a slash-delimeted sequence.
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
# available in the specified format. For example,
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
req_formats = req_format.split('/') req_formats = req_format.split('/')
video_url_list = None video_url_list = None
for rf in req_formats: for rf in req_formats:
if rf in url_map: if rf in url_map:
video_url_list = [(rf, url_map[rf])] video_url_list = [(rf, url_map[rf])]
break break
if rf in self._video_formats_map:
for srf in self._video_formats_map[rf]:
if srf in url_map:
video_url_list = [(srf, url_map[srf])]
break
else:
continue
break
if video_url_list is None: if video_url_list is None:
raise ExtractorError(u'requested format not available') raise ExtractorError(u'requested format not available')
return video_url_list return video_url_list
@ -526,7 +643,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest') manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
formats_urls = _get_urls(manifest) formats_urls = _get_urls(manifest)
for format_url in formats_urls: for format_url in formats_urls:
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag') itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
url_map[itag] = format_url url_map[itag] = format_url
return url_map return url_map
@ -653,25 +770,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# subtitles # subtitles
video_subtitles = None video_subtitles = None
if self._downloader.params.get('writesubtitles', False): if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_subtitle(video_id) video_subtitles = self._extract_subtitles(video_id)
if video_subtitles: elif self._downloader.params.get('writeautomaticsub', False):
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('writeautomaticsub', False):
video_subtitles = self._request_automatic_caption(video_id, video_webpage) video_subtitles = self._request_automatic_caption(video_id, video_webpage)
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_all_subtitles(video_id)
for video_subtitle in video_subtitles:
(sub_error, sub_lang, sub) = video_subtitle
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('listsubtitles', False): if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id) self._list_available_subtitles(video_id)
@ -697,6 +799,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if m_s is not None: if m_s is not None:
self.to_screen(u'%s: Encrypted signatures detected.' % video_id) self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']] video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
if m_s is not None:
if 'url_encoded_fmt_stream_map' in video_info:
video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
else:
video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
elif 'adaptive_fmts' in video_info:
if 'url_encoded_fmt_stream_map' in video_info:
video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
else:
video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
except ValueError: except ValueError:
pass pass
@ -756,7 +869,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
self._video_dimensions.get(format_param, '???'), self._video_dimensions.get(format_param, '???'),
' (3D)' if format_param in self._3d_itags else '') ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
results.append({ results.append({
'id': video_id, 'id': video_id,
@ -832,8 +945,11 @@ class YoutubePlaylistIE(InfoExtractor):
for entry in response['feed']['entry']: for entry in response['feed']['entry']:
index = entry['yt$position']['$t'] index = entry['yt$position']['$t']
if 'media$group' in entry and 'media$player' in entry['media$group']: if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
videos.append((index, entry['media$group']['media$player']['url'])) videos.append((
index,
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
))
videos = [v[1] for v in sorted(videos)] videos = [v[1] for v in sorted(videos)]
@ -899,13 +1015,16 @@ class YoutubeChannelIE(InfoExtractor):
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50 _GDATA_PAGE_SIZE = 50
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
IE_NAME = u'youtube:user' IE_NAME = u'youtube:user'
def suitable(cls, url):
if YoutubeIE.suitable(url): return False
else: return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
# Extract username # Extract username
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -928,13 +1047,15 @@ class YoutubeUserIE(InfoExtractor):
page = self._download_webpage(gdata_url, username, page = self._download_webpage(gdata_url, username,
u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE)) u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
try:
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
# Extract video identifiers # Extract video identifiers
ids_in_page = [] ids_in_page = []
for entry in response['feed']['entry']:
for mobj in re.finditer(self._VIDEO_INDICATOR, page): ids_in_page.append(entry['id']['$t'].split('/')[-1])
if mobj.group(1) not in ids_in_page:
ids_in_page.append(mobj.group(1))
video_ids.extend(ids_in_page) video_ids.extend(ids_in_page)
# A little optimization - if current page is not # A little optimization - if current page is not
@ -1073,7 +1194,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites' IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)' IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?' _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
_LOGIN_REQUIRED = True _LOGIN_REQUIRED = True
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,19 +1,20 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import datetime
import email.utils
import errno import errno
import gzip import gzip
import io import io
import json import json
import locale import locale
import os import os
import platform
import re import re
import socket
import sys import sys
import traceback import traceback
import zlib import zlib
import email.utils
import socket
import datetime
try: try:
import urllib.request as compat_urllib_request import urllib.request as compat_urllib_request
@ -60,6 +61,11 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
import httplib as compat_http_client import httplib as compat_http_client
try:
from urllib.error import HTTPError as compat_HTTPError
except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError
try: try:
from subprocess import DEVNULL from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL
@ -207,7 +213,7 @@ if sys.version_info >= (2,7):
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """ """ Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z]+$', key) assert re.match(r'^[a-zA-Z]+$', key)
assert re.match(r'^[a-zA-Z@\s]*$', val) assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val) expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr) return node.find(expr)
else: else:
@ -489,7 +495,7 @@ def make_HTTPS_handler(opts):
class ExtractorError(Exception): class ExtractorError(Exception):
"""Error during info extraction.""" """Error during info extraction."""
def __init__(self, msg, tb=None, expected=False): def __init__(self, msg, tb=None, expected=False, cause=None):
""" tb, if given, is the original traceback (so that it can be printed out). """ tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl. If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
""" """
@ -497,11 +503,12 @@ class ExtractorError(Exception):
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True expected = True
if not expected: if not expected:
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
super(ExtractorError, self).__init__(msg) super(ExtractorError, self).__init__(msg)
self.traceback = tb self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception self.exc_info = sys.exc_info() # preserve original exception
self.cause = cause
def format_traceback(self): def format_traceback(self):
if self.traceback is None: if self.traceback is None:
@ -622,8 +629,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
old_resp = resp old_resp = resp
# gzip # gzip
if resp.headers.get('Content-encoding', '') == 'gzip': if resp.headers.get('Content-encoding', '') == 'gzip':
gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r') content = resp.read()
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
try:
uncompressed = io.BytesIO(gz.read())
except IOError as original_ioerror:
# There may be junk add the end of the file
# See http://stackoverflow.com/q/4928560/35070 for details
for i in range(1, 1024):
try:
gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
uncompressed = io.BytesIO(gz.read())
except IOError:
continue
break
else:
raise original_ioerror
resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg resp.msg = old_resp.msg
# deflate # deflate
if resp.headers.get('Content-encoding', '') == 'deflate': if resp.headers.get('Content-encoding', '') == 'deflate':
@ -657,6 +679,9 @@ def determine_ext(url, default_ext=u'unknown_video'):
else: else:
return default_ext return default_ext
def subtitles_filename(filename, sub_lang, sub_format):
return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
def date_from_str(date_str): def date_from_str(date_str):
""" """
Return a datetime object from a string in the format YYYYMMDD or Return a datetime object from a string in the format YYYYMMDD or
@ -708,3 +733,31 @@ class DateRange(object):
return self.start <= date <= self.end return self.start <= date <= self.end
def __str__(self): def __str__(self):
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
def platform_name():
""" Returns the platform name as a compat_str """
res = platform.platform()
if isinstance(res, bytes):
res = res.decode(preferredencoding())
assert isinstance(res, compat_str)
return res
def bytes_to_intlist(bs):
if not bs:
return []
if isinstance(bs[0], int): # Python 3
return list(bs)
else:
return [ord(c) for c in bs]
def intlist_to_bytes(xs):
if not xs:
return b''
if isinstance(chr(0), bytes): # Python 2
return ''.join([chr(x) for x in xs])
else:
return bytes(xs)

View File

@ -1,2 +1,2 @@
__version__ = '2013.08.08.1' __version__ = '2013.09.06.1'