Compare commits

..

284 Commits

Author SHA1 Message Date
59282080c8 release 2013.09.06.1 2013-09-06 10:53:35 +02:00
98f3da4040 Merge remote-tracking branch 'origin/master' 2013-09-06 10:53:24 +02:00
1d213233cd Do not re-download files for hashsum generation (Fixes #1383) 2013-09-06 10:51:53 +02:00
fd9cf73836 [youtube] Users: download from the api in json to simplify extraction (fixes #1358)
There could be duplicate videos or other videos if the description have links.
2013-09-06 10:43:02 +02:00
0638ad9999 [youtube] Fix detection of tags from HLS videos. 2013-09-06 10:25:31 +02:00
1eb527692a release 2013.09.06 2013-09-06 10:13:33 +02:00
09bb17e108 Merge pull request #1378 from patrickslin/patch-6
Vevo sig changed again, please update for us! Thanks very much! (fixes #...
2013-09-06 09:53:23 +02:00
1cf911bc82 Vevo sig changed again, please update for us! Thanks very much! (fixes #1375) 2013-09-05 17:38:03 -07:00
f4b052321b [youtube] Urls like youtube.com/NASA are now interpreted as users (fixes #1069)
Video urls like http://youtube.com/BaW_jenozKc are not valid, but http://youtu.be/BaW_jenozKc is correct.
2013-09-05 22:39:15 +02:00
a636203ea5 release 2013.09.05 2013-09-05 22:30:50 +02:00
c215217e39 [youtube] Playlists: extract the videos id from ['media$group']['yt$videoid'] (fixes #1374)
'media$player' is not defined for private videos.
2013-09-05 21:40:04 +02:00
08e291b54d [generic] Recognize html5 video in the format '<video src=".+?"' and only unquote the url when extracting the id (fixes #1372) 2013-09-05 18:02:17 +02:00
6b95b065be Add extractor for tvcast.naver.com (closes #1331) 2013-09-05 10:53:40 +02:00
9363169b67 [daum] Get the video page from a canonical url to extract the full id (fixes #1373) and extract description. 2013-09-05 10:08:17 +02:00
085bea4513 Credit @Huarong for tv.sohu.com 2013-09-04 22:09:22 +02:00
150f20828b Add extractor for daum.net (closes #1330) 2013-09-04 22:06:50 +02:00
08523ee20a release 2013.09.04 2013-09-04 14:33:32 +02:00
5d5171d26a Merge pull request #1341 from xanadu/master
add support for "-f mp4" for YouTube
2013-09-03 18:52:12 -07:00
96fb5605b2 AHLS -> Apple HTTP Live Streaming 2013-09-03 18:49:35 -07:00
7011de0bc2 Merge pull request #1363 from Rudloff/defense
defense.gouv.fr
2013-09-03 18:23:08 -07:00
c3dd69eab4 Merge remote-tracking branch 'upstream/master' 2013-09-03 12:22:29 -07:00
025171c476 Suggested by @phihag 2013-09-03 12:03:19 +02:00
c8dbccde30 [orf] Remove the test video, they seem to expire in one week 2013-09-03 11:51:01 +02:00
4ff7a0f1f6 [dailymotion] improve the regex for extracting the video info 2013-09-03 11:33:59 +02:00
9c2ade40de [vimeo] Handle Assertions Error when trying to get the description
In some pages the html tags are not closed, python 2.6 cannot handle it.
2013-09-03 11:11:36 +02:00
aa32314d09 [vimeo] add support for videos that embed the download url in the player page (fixes #1364) 2013-09-03 10:48:56 +02:00
52afe99665 Extractor for defense.gouv.fr 2013-09-03 01:51:17 +02:00
b0446d6a33 Merge remote-tracking branch 'upstream/master' 2013-09-03 01:27:49 +02:00
8e4e89f1c2 Add an extractor for VeeHD (closes #1359) 2013-09-02 11:54:09 +02:00
6c758d79de [metacafe] Add more cases for detecting the uploader detection (reported in #1343) 2013-08-31 22:35:39 +02:00
691008087b Add an automatic page generator for the supported sites (related #156)
They are listed in the "supportedsites.html" page.
2013-08-31 15:18:52 +02:00
85f03346eb Merge remote-tracking branch 'upstream/master' 2013-08-30 17:51:59 -07:00
bdc6b3fc64 add support for "-f mp4" for YouTube 2013-08-30 17:51:50 -07:00
847f582290 Merge remote-tracking branch 'upstream/master' 2013-08-31 00:37:29 +02:00
10f5c016ec release 2013.08.30 2013-08-30 21:02:07 +02:00
2e756879f1 [youtube] update algo for length 86 2013-08-30 20:49:51 +02:00
c7a7750d3b [youtube] Fix typo in the _VALID_URL for YoutubeFavouritesIE, it was intended to also match :ytfavourites 2013-08-30 20:13:05 +02:00
9193c1eede Add youtube keywords to the bash completion script 2013-08-30 20:11:53 +02:00
3243d0f7b6 release 2013.08.29 2013-08-29 23:29:34 +02:00
23b00bc0e4 [youtube] update algo for length 84
Only appears sometimes, nearly identical to length 86.
2013-08-29 22:44:29 +02:00
52e1eea18b [youtube] update algo for length 86 (fixes #1349) 2013-08-29 22:33:58 +02:00
ee80d66727 [ign] update 1up extractor to work with the updated IGNIE 2013-08-29 21:51:09 +02:00
f1fb2d12b3 [ign] extract videos from articles pages 2013-08-29 21:39:36 +02:00
deb2c73212 Merge pull request #1347 from whydoubt/fix_orf_at
Fix orf.at extractor by adding file coding mark
2013-08-29 11:05:38 -07:00
8928491074 Fix orf.at extractor by adding file coding mark 2013-08-29 12:51:38 -05:00
545434670b Add an extractor for orf.at (closes #1346)
Make find_xpath_attr also accept numbers in the value
2013-08-29 19:16:07 +02:00
54fda45bac Merge pull request #1342 from whydoubt/fix_mit_26
Fix MIT extractor for Python 2.6
2013-08-29 13:42:08 +02:00
c7bf7366bc Update descriptions checksum for some test for Unistra and Youtube 2013-08-29 13:41:59 +02:00
b7052e5087 Also print the field that fails if it is a md5 checksum 2013-08-29 12:15:45 +02:00
0d75ae2ce3 Fix detection of the webpage charset if it's declared using ' instead of "
Like in "<meta charset='utf-8'/>"
2013-08-29 11:35:15 +02:00
b5ba7b9dcf Fix MIT extractor for Python 2.6
The HTML for the MIT page does not parse cleanly for Python 2.6 due
to script tags within an actual script element.  The offending piece
is inside a comment block, so removing all such comment blocks
fixes the parsing.
2013-08-28 14:24:42 -05:00
483e0ddd4d Merge remote-tracking branch 'upstream/master' 2013-08-28 10:19:28 -07:00
2891932bf0 release 2013.08.28.1 2013-08-28 19:00:17 +02:00
591078babf Merge remote-tracking branch 'upstream/master' 2013-08-28 09:57:28 -07:00
9868c781a1 Merge remote-tracking branch 'origin/master' 2013-08-28 18:22:33 +02:00
c257baff85 Merge remote-tracking branch 'rzhxeo/youporn-hd'
Conflicts:
	youtube_dl/utils.py
2013-08-28 18:22:28 +02:00
878e83c5a4 YoupornIE: Clean up extraction of hd video 2013-08-28 16:04:48 +02:00
0012690aae Let aes_decrypt_text return bytes instead of unicode 2013-08-28 16:03:35 +02:00
6e74bc41ca Fix division bug in aes.py 2013-08-28 16:01:43 +02:00
cba892fa1f Add intlist_to_bytes to utils.py 2013-08-28 15:59:07 +02:00
550bfd4cbd Merge pull request #1 from phihag/youporn-hd-pr
Allow changes to run under Python 3
2013-08-28 06:23:33 -07:00
920ef0779b Hide the password and username in verbose mode (closes #1089) 2013-08-28 15:14:02 +02:00
48ea9cea77 Allow changes to run under Python 3 2013-08-28 14:34:49 +02:00
ccf4b799df Merge remote-tracking branch 'origin/master' 2013-08-28 14:02:40 +02:00
f143d86ad2 [sohu] Handle encoding, and fix tests 2013-08-28 14:00:05 +02:00
8ae97d76ee PostProcessingError holds the message in the 'msg' property, not in 'message' (fixes #1323)
Causes DeprecationWarning: http://www.python.org/dev/peps/pep-0352/
2013-08-28 13:37:31 +02:00
f8b362739e Merge remote-tracking branch 'Huarong/master' 2013-08-28 13:10:59 +02:00
6d69d03bac Merge remote-tracking branch 'origin/reuse_ies' 2013-08-28 13:05:21 +02:00
204da0d3e3 Merge remote-tracking branch 'origin/master' 2013-08-28 12:57:44 +02:00
c496ca96e7 Fix platform name in Python 2 with --verbose (Closes #1228) 2013-08-28 12:57:10 +02:00
67b22dd036 Add extractors for video.mit.edu and techtv.mit.edu (closes #1327)
video.mit.edu just embeds the videos from techtv.mit.edu
2013-08-28 12:55:42 +02:00
ce6a696e4d Remove unused imports 2013-08-28 12:47:38 +02:00
a5caba1eb0 [generic] simply use urljoin 2013-08-28 12:47:27 +02:00
cd9c100963 Merge remote-tracking branch 'upstream/master' 2013-08-28 12:20:12 +02:00
edde6c56ac Print playpath with --get-url (Fixes #1334) 2013-08-28 12:14:45 +02:00
b7f89fe692 Merge remote-tracking branch 'upstream/master' 2013-08-28 12:10:34 +02:00
ae3531adf9 [generic] Fix URL concatenation
When the url is something like http://example.org/foo/bar?x=y  and the added is file/video.mp4 , we want http://example.org/foo/file/video.mp4
Fixes #1268.
2013-08-28 12:08:17 +02:00
8cf5ee7831 Merge branch 'master' of github.com:rg3/youtube-dl 2013-08-28 11:57:18 +02:00
aa3e950764 Tolerate junk at the end of gzip-compressed content (#1268) 2013-08-28 11:57:13 +02:00
1301a0dd42 Merge remote-tracking branch 'upstream/master' 2013-08-28 11:02:12 +02:00
af8bd6a82d Show the time taken to download in the same format as the ETA 2013-08-28 10:56:11 +02:00
6d38616e67 Merge pull request #1181 from h3xx/master
Add some verbosity when reporting finished downloads

Remove the mixed use of tabs and spaces for indentation.
2013-08-28 10:54:07 +02:00
4f5f18acb9 [addanime] add file 2013-08-28 10:28:16 +02:00
3e223834d9 [youtube] update algo for length 88, thanks to @Ramhack (fixes #1328) 2013-08-28 10:26:44 +02:00
a1bb0f8773 [cnn] remove debug print call. 2013-08-28 10:20:37 +02:00
0e283428f7 HTTPError is in urllib.error in Python 3, not in http.error 2013-08-28 10:18:39 +02:00
2eabb80254 [addanime] improve 2013-08-28 04:25:38 +02:00
44586389e4 [appletrailers] Add support 2013-08-28 02:18:44 +02:00
273f603efb [cnn] Allow more URLs 2013-08-28 00:14:19 +02:00
1619e22f40 release 2013.08.28 2013-08-27 23:31:36 +02:00
88a79ce6a6 Delete default user agent (Fixes #1309) 2013-08-27 23:31:24 +02:00
acebc9cd6b Revert "Install our own HTTPS handler as well (#1309)"
This reverts commit 36399e8576 and fixes #1322.
2013-08-27 23:28:20 +02:00
443c12a703 Merge pull request #1324 from whydoubt/fix_gplus
Initial slash in Google+ photos link was removed
2013-08-27 13:36:39 -07:00
7f3c4f4f65 Initial slash in Google+ photos link was removed 2013-08-27 14:38:50 -05:00
0bc56fa66a Add an extractor for NBC news (closes #1320) 2013-08-27 12:38:57 +02:00
1a582dd49d Add an extractor for CNN (closes #1318) 2013-08-27 11:56:48 +02:00
c5b921b597 Merge remote-tracking branch 'upstream/master' 2013-08-27 10:47:47 +02:00
e86ea47c02 [canalc2] Small improvements 2013-08-27 10:35:20 +02:00
aa5a63a5b5 Merge remote-tracking branch 'Rudloff/canalc2' 2013-08-27 10:31:46 +02:00
2a7b4da9b2 [hark] get the song info in JSON and extract more information. 2013-08-27 10:25:38 +02:00
069d098f84 [canalplus] Accept player.canalplus.fr urls 2013-08-27 10:21:57 +02:00
b3889f7023 release 2013.08.27 2013-08-27 02:30:47 +02:00
65883c8dbd Merge branch 'master' of github.com:rg3/youtube-dl 2013-08-27 02:00:23 +02:00
341ca8d74c [trilulilu] Add support for trilulilu.ro
Fun fact: The ads (not yet supported) are loaded from youtube ;)
2013-08-27 01:59:00 +02:00
99859d436c Merge remote-tracking branch 'upstream/master' 2013-08-26 15:16:13 -07:00
1b01e2b085 Merge pull request #1315 from yasoob/master
fixed tests for c56 and dailymotion
2013-08-26 13:38:48 -07:00
976fc7d137 fixed tests for c56 and dailymotion 2013-08-27 01:00:17 +05:00
c3b7b29c23 Merge remote-tracking branch 'origin/master' 2013-08-26 21:29:44 +02:00
627a91a9a8 [generic] small typo 2013-08-26 21:29:31 +02:00
6dc6302599 Merge pull request #1231 from yasoob/master
Added an IE for hark.com
2013-08-26 12:29:04 -07:00
7a20e2e1f8 Merge remote-tracking branch 'upstream/master' 2013-08-26 03:16:42 +02:00
90648143c3 Merge pull request #1310 from rzhxeo/rtlnow
Add support for http://superrtlnow.de
2013-08-25 15:45:22 -07:00
5c6658d4dd Merge remote-tracking branch 'upstream/master' 2013-08-24 23:01:39 +02:00
9585f890f8 [generic] add support for relative URLs (Fixes #1308) 2013-08-24 22:56:37 +02:00
0838239e8e [generic] Support double slash URLs (Fixes #1309) 2013-08-24 22:52:45 +02:00
36399e8576 Install our own HTTPS handler as well (#1309) 2013-08-24 22:49:22 +02:00
9460db832c [ro220] Add support for 220.ro 2013-08-24 21:10:03 +02:00
d68730a56e Add SUPER RTL NOW to RTLnow extractor 2013-08-24 13:22:28 +02:00
f2aeefe29c [youtube] update algo for length 84 2013-08-24 10:48:12 +02:00
39c6f507df Merge remote-tracking branch 'upstream/master' 2013-08-23 15:33:36 -07:00
d2d1eb5b0a Switch to domain yt-dl.org 2013-08-23 23:57:23 +02:00
8ae7be3ef4 release 2013.08.23 2013-08-23 23:09:53 +02:00
306170518f [youtube] update algo for length 86 (fixes #1302) 2013-08-23 22:36:59 +02:00
aa6a10c44a Allow to specify multiple subtitles languages separated by commas (closes #518) 2013-08-23 18:34:57 +02:00
9af73dc4fc Print a message before embedding the subtitles 2013-08-23 18:17:43 +02:00
fc483bb6af [xhamster] use determine_ext 2013-08-23 17:23:34 +02:00
53b0f3e4e2 Merge pull request #1301 from rzhxeo/xhamster
XHamsterIE: Fix video extension and add video description
2013-08-23 17:21:30 +02:00
4353cf51a0 XHamsterIE: Add video description 2013-08-23 16:40:20 +02:00
ce34e9ce5e XHamsterIE: Fix video extension
Cut off GET parameter
2013-08-23 16:33:41 +02:00
d4051a8e05 Add a post processor for embedding subtitles in mp4 videos (closes #1052) 2013-08-23 15:06:19 +02:00
df3df7fb64 [youtube] Fix download of subtitles with '--all-subs'
If _extract_subtitles is called the option 'write subtitles' is always true.
2013-08-23 13:14:22 +02:00
9e9c164052 Merge pull request #937 from jaimeMF/subtitles_rework
Subtitles rework
2013-08-23 02:40:25 -07:00
066090dd3f [youtube] add algo for length 80 and update player info 2013-08-23 11:33:56 +02:00
614d9c19c1 Merge remote-tracking branch 'upstream/master' 2013-08-22 17:02:41 -07:00
74e6672beb Merge pull request #1297 from iemejia/master
[subtitles] separated subtitle options in their own group
2013-08-22 16:30:14 -07:00
02bcf0d389 release 2013.08.22 2013-08-22 23:29:42 +02:00
10204dc898 [videofyme] Add an additional quality (they change between downloads of the info) and update md5 sum of the test video 2013-08-22 23:23:52 +02:00
1865ed31b9 [subtitles] separated subtitle options in their own group 2013-08-22 22:44:04 +02:00
3669cdba10 [youtube] update algo for length 82 (fixes #1296) 2013-08-22 22:35:15 +02:00
939fbd26ac [youtube] fix the order of DASH formats 2013-08-22 19:45:24 +02:00
b4e60dac23 Merge remote-tracking branch 'upstream/master' 2013-08-22 10:43:51 -07:00
e6ddb4e7af Merge pull request #1279 from xanadu/master
Add YouTube DASH formats to YouTubeIE
2013-08-22 19:33:34 +02:00
83390b83d9 Merge pull request #1266 from MiLk/py-generator
Update the youtube algorithm generator
2013-08-22 10:18:58 -07:00
ff2424595a lxml is not part of the standard library. 2013-08-22 14:47:51 +02:00
adeb9c73d6 Merge remote-tracking branch 'upstream/master' 2013-08-22 14:04:30 +02:00
cd0abcc0bb Extractor for canalc2.tv 2013-08-22 13:54:23 +02:00
4a55479fa9 Credit Pierre Rudloff for JeuxVideoIE and UnistraIE 2013-08-22 13:21:32 +02:00
f527115b5f Rename utv.py to unistra.py and extract more info
There are other sites that could be named utv, which would conflict if they are added
2013-08-22 13:19:35 +02:00
75e1b46add Download from utv.unistra.fr (PR #1271)
Squashed to a single commit to keep the file 'youtube-dl' unchanged and remove the revert commit.
2013-08-22 12:58:12 +02:00
05a2926c5c Merge remote-tracking branch 'upstream/master' 2013-08-22 12:55:58 +02:00
7070b83687 Merge remote-tracking branch 'upstream/master' 2013-08-22 12:54:17 +02:00
8d212e604a Merge remote-tracking branch 'upstream/master'
Conflicts:
	youtube_dl/extractor/jeuxvideo.py
2013-08-22 12:52:05 +02:00
063fcc9676 [jeuxvideo] Extract more information and add test 2013-08-22 12:37:34 +02:00
8403612258 Merge pull request #1267 from Rudloff/master
Download videos from jeuxvideo.com

Edited to keep the file 'youtube-dl' unchanged.
2013-08-22 12:25:21 +02:00
25b51c7816 Download videos from jeuxvideo.com 2013-08-22 12:12:34 +02:00
9779b63bb6 Add an extractor for PBS (closes #870 and #873) 2013-08-22 11:57:21 +02:00
d81aef3adf Add an extractor for tv.slashdot.org (closes #1192)
It uses the ooyala platform, so it just extracts the ooyala url.
2013-08-21 21:51:58 +02:00
5af7e056a7 Merge remote-tracking branch 'upstream/master' 2013-08-21 10:53:42 -07:00
45ed795cb0 [youtube] update uploader name for a test video: 'IconaPop' has changed to 'Icona Pop' 2013-08-21 19:28:48 +02:00
683e98a8a4 [statigram] change test video
The old one cannot be accessed.
2013-08-21 19:20:27 +02:00
e0cfeb2ea7 [funnyordie] fix extraction of video url and title 2013-08-21 18:58:25 +02:00
75340ee383 [vevo] Fix urls with a query (#1258) 2013-08-21 18:20:03 +02:00
668de34c6b [soundcloud] Support widget urls (fixes #1252) 2013-08-21 17:06:37 +02:00
a91b954bb4 [vimeo] extract information for Vimeo Pro videos from http://player.vimeo.com/video/{video_id} (fixes #1197)
For some videos https://vimeo.com/{video_id} doesn't work
2013-08-21 13:48:19 +02:00
a3f62b8255 Merge remote-tracking branch 'upstream/master' 2013-08-21 00:07:03 -07:00
37b6d5f684 fix hls test 2013-08-20 23:51:05 -07:00
b7a6838407 address review comment 2013-08-20 21:57:32 -07:00
cde846b3d3 fix code style 2013-08-20 21:42:49 -07:00
6c3e6e88d3 Allow hours in ETA display (Fixes #1280) 2013-08-21 05:44:19 +02:00
739674cd77 [rtlnow] Add support for error message for queries from outside of Germany 2013-08-21 05:24:58 +02:00
4b2d7cae11 release 2013.08.21 2013-08-21 04:33:57 +02:00
7fea7156cb [generic] support HTML5 video 2013-08-21 04:32:22 +02:00
3093468977 [generic] Ignore stupid HTTP servers (#1284) 2013-08-21 04:32:07 +02:00
79cb25776f Cache suitable regular expressions
This speeds up TestAllURLsMatching.test_no_duplicates by about 8000% at the cost of minimal memory overhead.
2013-08-21 04:06:48 +02:00
87f78946a5 [collegehumor] Allow old-style videos (Fixes #1285) 2013-08-21 03:50:56 +02:00
211fbc1328 fix failed tests 2013-08-19 18:57:55 -07:00
836a086ce9 Add YouTube DASH formats to YouTubeIE 2013-08-19 18:22:25 -07:00
90d3989b99 Merge remote-tracking branch 'upstream/master' 2013-08-19 17:11:52 -07:00
d741e55a42 [youtube] Support watch_popup URLs (Fixes #1275) 2013-08-19 10:27:42 +02:00
17d3aaaf16 Merge pull request #1273 from rzhxeo/rtlnow
Add support for http://voxnow.de
2013-08-19 00:19:06 -07:00
ea55b2a4ca Add VOXnow to RTLnow extractor 2013-08-19 08:57:36 +02:00
3f0537dd4a Merge remote-tracking branch 'rzhxeo/rtlnow' 2013-08-19 00:25:34 +02:00
943f7f7a39 Download videos from jeuxvideo.com 2013-08-18 16:11:47 +02:00
12e895fc5a Merge branch 'master' into py-generator 2013-08-18 11:12:38 +02:00
bda2c49d75 Update algo - see #1254
Signed-off-by: Emilien Kenler <hello@emilienkenler.com>
2013-08-18 11:10:39 +02:00
01b32990da Add RTLnow extractor 2013-08-18 08:16:53 +02:00
dbda1b5147 Add RTLnow extractor
Supports http://rtl2now.rtl2.de and http://rtl-now.rtl.de
2013-08-18 08:15:18 +02:00
ddf3bd328b release 2013.08.17 2013-08-17 08:33:36 +02:00
b9c37b92cf Merge pull request #1256 from patrickslin/patch-5
Length 85 changed again? (fixes #1254)
2013-08-16 14:07:49 -07:00
5a27ecdd2e Update AddAnime.py 2013-08-16 23:54:09 +03:00
f9c3c90ca8 Length 85 changed again? (fixes #1254) 2013-08-16 08:54:01 -07:00
6daccbe317 release 2013.08.15 2013-08-15 22:40:00 +02:00
71ea844c0e Merge pull request #1248 from patrickslin/patch-4
Unable to Download Video (fixes #1247)
2013-08-15 13:38:32 -07:00
3a7256697e Unable to Download Video (fixes #1247) 2013-08-15 13:00:20 -07:00
d1ba998274 release 2013.08.14 2013-08-14 10:19:53 +02:00
718ced8d8c Merge pull request #1239 from patrickslin/patch-3
Updated Vevo Signature Length (fixes #1237)
2013-08-14 01:18:58 -07:00
e1842025d0 Updated Vevo Signature Length (fixes #1237) 2013-08-13 17:57:35 -07:00
2b9213cdc1 Update generator
Signed-off-by: Emilien Kenler <hello@emilienkenler.com>
2013-08-12 10:48:40 +02:00
e3a88568b0 Added an IE for hark.com 2013-08-11 22:23:05 +05:00
0577177e3e [vevo] fix testcase 2013-08-11 07:12:38 +02:00
298f833b16 Note update possibility on errors (thanks @chbrown, #1229) 2013-08-11 06:46:24 +02:00
97b3656c2e YoupornIE: Add support for hd videos and update Test 2013-08-09 18:37:33 +02:00
f3bcebb1d2 add an aes implementation 2013-08-09 18:36:01 +02:00
0f399e6e5e release 2013.08.09 2013-08-09 15:49:09 +02:00
5b075e27cb Merge pull request #1218 from patrickslin/patch-2
New sig len 89 algo
2013-08-09 03:42:13 -07:00
8a9d86a2a7 New sig len 89 algo
Fixes new YT encrypted sig len 89.
2013-08-08 21:48:12 -07:00
d468a09789 release 2013.08.08.1 2013-08-08 20:45:16 +02:00
9f4ab73d7f Merge pull request #1216 from patrickslin/patch-5
Invalid signature again (fixes #1215)
2013-08-08 11:44:29 -07:00
02cf62e240 Invalid signature again (fixes #1215) 2013-08-08 11:28:50 -07:00
67fb0c5495 Merge branch 'master' of github.com:rg3/youtube-dl 2013-08-08 08:56:59 +02:00
4efba05c56 Clarify template error message (#1209) 2013-08-08 08:55:26 +02:00
0f90943e45 Merge pull request #1189 from cyisfor/master
More informative error
2013-08-07 17:30:28 -07:00
526e638c8a release 2013.08.08 2013-08-08 00:39:23 +02:00
356e067390 Merge remote-tracking branch 'patrickslin/patch-4' 2013-08-07 20:19:51 +02:00
e2f48f9643 Remove youtube sig tests
The signature algo changes too often for the static test to make sense.
2013-08-07 20:11:40 +02:00
b513a251f8 Merge commit '7a4c6cc92f9ffec9135652a49153caffa5520c29' 2013-08-07 20:11:04 +02:00
36cb11f068 Encrypted sig 87 broken again (fixes #1200) 2013-08-06 21:35:37 -07:00
7a4c6cc92f Updated the 84 length signature decryption
Updated the right 84 length signature decryption 06.08.2013
2013-08-06 15:41:13 +03:00
7edcb8f39c More informative error 2013-08-05 19:43:09 -07:00
d5b00ee6e0 improve sohu extractor 2013-08-06 10:26:57 +08:00
461cead4f7 changes 2013-08-06 04:34:24 +03:00
b5a6d40818 fix parse title bug 2013-08-05 22:51:54 +08:00
968b5e0112 Add some verbosity when reporting finished downloads
For example:

    [download] Resuming download at byte 1868140
    [download] Destination: Entry #1-Bn59FJ4HrmU.flv
    [download] 100% of 3.27MiB in 4s

This format is meant to somewhat mirror the behavior of wget(1) when reporting finished downloads:

    100%[==================>] 54,836,682   788KB/s   in 74s

    2013-08-04 12:32:05 (728 KB/s) - 'google-chrome-stable_current_x86_64.rpm' saved [54836682/54836682]
2013-08-04 12:45:24 -05:00
39b782b390 [collegehumor] support urls in the format www.collegehumor.com/e/{video_id} (fixes #1179) 2013-08-04 16:36:48 +02:00
577664c8e8 Add an extractor from muzu.tv (closes #1177) 2013-08-04 11:10:57 +02:00
bba12cec89 Add an extractor for videofy.me (closes #1171)
Also modify find_xpath_attr to accept values with spaces like for id="HQ on"
2013-08-03 22:50:27 +02:00
70c4c03cb8 [arte] add support for downloading from http://liveweb.arte.tv (fixes #1014) 2013-08-03 19:07:04 +02:00
f5791ed136 [arte] Prefer vídeos without subtitles in the same language (fixes #1173) and fix crash when there's no description 2013-08-03 17:32:29 +02:00
4ec929dc9b use ..utils/clean_html() 2013-08-03 10:29:58 +08:00
fbf189a6ee [myvideo] add support for videos that place the video info inside www.myvideo.de/service/data/video/{id}/config (fixes #616) 2013-08-02 21:09:17 +02:00
09825cb5c0 Add an extractor for Ooyala (closes #833)
Only works for some sites, it doesn't work for videos that use a f4m manifest
2013-08-02 16:53:16 +02:00
ed27d35674 [youtube] don't crash in verbose mode if 'ad3_module' is not defined in age protected videos (fixes #1159) 2013-08-02 14:17:01 +02:00
fd5539eb41 release 2013.08.02 2013-08-02 13:35:13 +02:00
04bca64bde [youtube]: new algo for length 83 (fixes #1164) 2013-08-02 12:38:17 +02:00
03cc7c20c1 [youtube] show which formats are in 3D with "-F" and in the format field 2013-08-02 12:21:28 +02:00
4075311d94 Merge pull request #1163 from xanadu/master
add support for download YouTube 3d format of 3d content
2013-08-02 12:06:34 +02:00
6624a2b07d add an extractor for tv.sohu.com 2013-08-02 17:58:46 +08:00
6d3a7d03e1 fix bug: kankan extractor not support http://vod.kankan.com/v/70/70309.shtml 2013-08-02 15:26:11 +08:00
95fdc7d69c Merge remote-tracking branch 'upstream/master' 2013-08-01 10:57:12 -07:00
86fe61c8f9 add support for download YouTube 3d format of 3d content 2013-08-01 10:47:48 -07:00
9bb6d2f21d Merge pull request #1161 from meyerd/master
Fix regex error when only subtitled video is available on arte.
2013-08-01 04:49:05 -07:00
e3f4593e76 Fix regex error when only subtitled video is available on arte. 2013-08-01 11:48:17 +02:00
1d043b93cf [youtube] Add support for downloading videos with hlsvp (fixes #1083)
They are downloaded with a m3u8 manifest, they seem to be encrypted, but ffmpeg can handle them.
2013-07-31 23:41:05 +02:00
b15d4f624f Allow to download from m3u8 manifests with ffmpeg
They are detected by the extension of the url.
2013-07-31 22:33:37 +02:00
4aa16a50f5 Log a better error message if ffprobe or avconv are not found (related #1134) 2013-07-31 21:22:08 +02:00
bbcbf4d459 Switch some calls to to_stderr to report_error and report_warning 2013-07-31 21:20:46 +02:00
930ad9eecc release 2013.07.31 2013-07-31 10:55:02 +02:00
b072a9defd YoutubeIE: with age protected videos, add a missing "return" to return the signature decrypted with _decrypt_signature 2013-07-31 10:51:00 +02:00
75952c6e3d YoutubeIE: new algo for length 86 (fixes #1156)
Now is using the same length as the flash player used for age protected videos, but the algorithm is different, so now for age protected videos it first tries to use the old algo.
2013-07-31 10:45:13 +02:00
05afc96b73 Print urls from the batch file with --verbose (related #1155) 2013-07-30 23:11:44 +02:00
fa80026915 Disable way and tf1 tests, the whole videos are served sometimes, so the md5 sum doesn't match. 2013-07-30 11:19:07 +02:00
2bc3de0f28 [worldstarhiphop] Small cleanup
The second check for the Vevo id is not necessary.
2013-07-30 11:10:17 +02:00
99c7bc94af Merge pull request #1148 from JohnyMoSwag/master
[worldstarhiphop] support vevo videos
2013-07-30 11:05:40 +02:00
152c8f349d Merge pull request #1149 from pishposhmcgee/patch-3
[vevo] Modified m_urls regex and video_url
2013-07-30 01:57:37 -07:00
d75654c15e using re.search 2013-07-29 14:39:14 -07:00
0725f584e1 [wat] fix the extraction of the video url (fixes #1103)
Use the direct download link for Android.
2013-07-29 23:38:02 +02:00
8cda9241d1 Add an extractor for kankan.com (closes #1133) 2013-07-29 23:13:12 +02:00
a3124ba49f Modified m_urls regex and video_url
Some videos have a leading slash, some do not
2013-07-29 15:45:20 -05:00
579e2691fe detect vevo embed fix 2013-07-29 12:24:26 -07:00
63f05de10b detect vevo embed 2013-07-29 12:11:57 -07:00
caeefc29eb [vimeo] add an extractor for channels 2013-07-29 13:12:09 +02:00
a3c736def2 [dailymotion] Add an extractor for Dailymotion playlists 2013-07-29 12:07:38 +02:00
58261235f0 Add an extractor for roxwell.com (closes #1044) 2013-07-26 13:00:59 +02:00
da70877a1b release 2013.07.25.2 2013-07-25 22:58:40 +02:00
5c468ca8a8 YoutubeIE: add algo for length 79 (fixes #1126) 2013-07-25 22:50:24 +02:00
aedd6bb97d YoutubeIE: new algo for length 81 (fixes #1127) 2013-07-25 22:06:53 +02:00
733d9cacb8 Merge pull request #1120 from pishposhmcgee/patch-1
[collegehumor] Added an option 'e' to go with 'video' or 'embed'
2013-07-25 01:14:43 -07:00
42f2805e48 [keek] Fix testcase (Broken by accident in 6625f82940) 2013-07-25 10:10:37 +02:00
0ffcb7c6fc release 2013.07.25.1 2013-07-25 09:53:15 +02:00
27669bd11d [ina] Allow I at start of video IDs 2013-07-25 09:52:58 +02:00
6625f82940 [keek] Allow httpS URLs (Fixes #1123) 2013-07-25 09:40:19 +02:00
d0866f0bb4 release 2013.07.25 2013-07-25 09:35:25 +02:00
09eeb75130 Merge remote-tracking branch 'pishposhmcgee/patch-2' 2013-07-25 09:34:56 +02:00
0a99956f71 [ina] Fix URL detection (Fixes #1121) 2013-07-25 09:34:12 +02:00
12ef6aefa8 changed video_url regex
Some older videos contain an extra properties such as 'embed' before 'type'.
2013-07-24 21:51:08 -05:00
e93aa81aa6 Added an option 'e' to go with 'video' or 'embed'
Based on links that I've seen, /e/<videoid> also occurs in the wild, and making this substitution yields effective results.
2013-07-24 16:55:28 -05:00
755eb0320e [youtube] use itertools.count instead of a "while True" loop and a manual counter 2013-07-24 22:27:33 +02:00
43ba5456b1 [youtube] add an extractor for the "Watch Later" list 2013-07-24 22:13:39 +02:00
6804038d06 Don't try to write the subtitles if it's None 2013-07-20 12:59:47 +02:00
2f799533ae YoutubeIE: don't crash when trying to get automatic captions if the videos has standard subtitles. 2013-07-20 12:56:10 +02:00
88ae5991cd YoutubeIE: use the same function for getting the subtitles for the "--write-sub" and "--all-sub" options 2013-07-20 12:56:06 +02:00
5d51a883c2 Use a dictionary for storing the subtitles
The errors while getting the subtitles are reported as warnings, if no subtitles are found return and empty dict.
2013-07-20 12:52:25 +02:00
c4a91be726 Save subtitles using the same code for all the options 2013-07-20 12:52:24 +02:00
56c7366547 YoutubeIE: reuse instances of InfoExtractors (closes #998)
When a IE is added to the list, it's also added to a dictionary. When a IE is requested it first looks in the dictionary and if there's no instance it will create a new one.

That way _real_initialize is only called once for each IE, saving time if it needs to login for example.
2013-07-08 15:14:27 +02:00
70 changed files with 3036 additions and 471 deletions

View File

@ -113,25 +113,28 @@ which means you can modify it, redistribute it or use it however you like.
## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of
preference using slashes: "-f 22/17/18"
preference using slashes: "-f 22/17/18". "-f mp4"
and "-f flv" are also supported
--all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific one
is requested
--max-quality FORMAT highest quality format to download
-F, --list-formats list all available formats (currently youtube
only)
## Subtitle Options:
--write-sub write subtitle file (currently youtube only)
--write-auto-sub write automatic subtitle file (currently youtube
only)
--only-sub [deprecated] alias of --skip-download
--all-subs downloads all the available subtitles of the
video (currently youtube only)
video
--list-subs lists all available subtitles for the video
(currently youtube only)
--sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt)
(currently youtube only)
--sub-lang LANG language of the subtitles to download (optional)
use IETF language tags like 'en'
--sub-format FORMAT subtitle format (default=srt) ([sbv/vtt] youtube
only)
--sub-lang LANGS languages of the subtitles to download (optional)
separated by commas, use IETF language tags like
'en,pt'
## Authentication Options:
-u, --username USERNAME account username
@ -153,6 +156,8 @@ which means you can modify it, redistribute it or use it however you like.
processing; the video is erased by default
--no-post-overwrites do not overwrite post-processed files; the post-
processed files are overwritten by default
--embed-subs embed subtitles in the video (only for mp4
videos)
# CONFIGURATION

View File

@ -4,8 +4,12 @@ __youtube-dl()
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
opts="{{flags}}"
keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
if [[ ${cur} == * ]] ; then
if [[ ${cur} =~ : ]]; then
COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
return 0
elif [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
return 0
fi

View File

@ -19,15 +19,22 @@ if 'signature' in versions_info:
new_version = {}
filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version}
filenames = {
'bin': 'youtube-dl',
'exe': 'youtube-dl.exe',
'tar': 'youtube-dl-%s.tar.gz' % version}
build_dir = os.path.join('..', '..', 'build', version)
for key, filename in filenames.items():
print('Downloading and checksumming %s...' %filename)
url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename)
data = urllib.request.urlopen(url).read()
fn = os.path.join(build_dir, filename)
with open(fn, 'rb') as f:
data = f.read()
if not data:
raise ValueError('File %s is empty!' % fn)
sha256sum = hashlib.sha256(data).hexdigest()
new_version[key] = (url, sha256sum)
versions_info['versions'][version] = new_version
versions_info['latest'] = version
json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
with open('update/versions.json', 'w') as jsonf:
json.dump(versions_info, jsonf, indent=4, sort_keys=True)

View File

@ -22,7 +22,7 @@ entry_template=textwrap.dedent("""
<atom:link href="http://rg3.github.io/youtube-dl" />
<atom:content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a>
Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
</div>
</atom:content>
<atom:author>
@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
with open('update/releases.atom','w',encoding='utf-8') as atom_file:
atom_file.write(atom_template)

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python3
import sys
import os
import textwrap
# We must be able to import youtube_dl
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import youtube_dl
def main():
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
template = tmplf.read()
ie_htmls = []
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
try:
ie_html += ': {}'.format(ie.IE_DESC)
except AttributeError:
pass
if ie.working() == False:
ie_html += ' (Currently broken)'
ie_htmls.append('<li>{}</li>'.format(ie_html))
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
sitesf.write(template)
if __name__ == '__main__':
main()

View File

@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
git checkout HEAD -- youtube-dl youtube-dl.exe
/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
@ -85,6 +85,7 @@ ROOT=$(pwd)
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
"$ROOT/devscripts/gh-pages/generate-download.py"
"$ROOT/devscripts/gh-pages/update-copyright.py"
"$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update
git commit -m "release $version"
git show HEAD

View File

@ -11,30 +11,45 @@ tests = [
# 90
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
# 88
# 89
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
# 88 - vflapUV9V 2013/08/28
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
# 87 - vflART1Nf 2013/07/24
"ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"),
# 86 - vfl_ymO4Z 2013/06/27
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
# 86 - vfluy6kdb 2013/09/06
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
# 85
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
# 82 - vflZK4ZYR 2013/08/23
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
# 81 - vflLC8JvQ 2013/07/25
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
# 80 - vflZK4ZYR 2013/08/23 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
# 79 - vflLC8JvQ 2013/07/25 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
]
tests_age_gate = [
# 86 - vflqinMWD
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
# 85 - vflSAFCP9 2013/07/19
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
"ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"),
# 84
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
"<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"),
# 83 - vflcaqGO8 2013/07/11
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
"urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"),
# 82
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
"Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"),
# 81
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
"urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."),
]
def find_matching(wrong, right):
@ -87,6 +102,8 @@ def genall(tests):
def main():
print(genall(tests))
print(u' Age gate:')
print(genall(tests_age_gate))
if __name__ == '__main__':
main()

View File

@ -11,6 +11,15 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
from helper import get_testcases
class TestAllURLsMatching(unittest.TestCase):
def setUp(self):
self.ies = gen_extractors()
def matching_ies(self, url):
return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
def assertMatch(self, url, ie_list):
self.assertEqual(self.matching_ies(url), ie_list)
def test_youtube_playlist_matching(self):
self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
@ -24,12 +33,17 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
def test_youtube_channel_matching(self):
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
def test_youtube_user_matching(self):
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
@ -50,6 +64,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
def test_no_duplicates(self):
ies = gen_extractors()
@ -62,15 +77,12 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
def test_keywords(self):
ies = gen_extractors()
matching_ies = lambda url: [ie.IE_NAME for ie in ies
if ie.suitable(url) and ie.IE_NAME != 'generic']
self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
self.assertMatch(':thedailyshow', ['ComedyCentral'])
self.assertMatch(':tds', ['ComedyCentral'])
self.assertMatch(':colbertreport', ['ComedyCentral'])
self.assertMatch(':cr', ['ComedyCentral'])
if __name__ == '__main__':

View File

@ -127,11 +127,10 @@ def generator(test_case):
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
got = 'md5:' + md5(info_dict.get(info_field))
else:
got = info_dict.get(info_field)
self.assertEqual(
expected, got,
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict

38
test/test_playlists.py Normal file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env python
import sys
import unittest
import json
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
from youtube_dl.utils import *
from helper import FakeYDL
class TestPlaylists(unittest.TestCase):
def assertIsPlaylist(self, info):
"""Make sure the info has '_type' set to 'playlist'"""
self.assertEqual(info['_type'], 'playlist')
def test_dailymotion_playlist(self):
dl = FakeYDL()
ie = DailymotionPlaylistIE(dl)
result = ie.extract('http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'SPORT')
self.assertTrue(len(result['entries']) > 20)
def test_vimeo_channel(self):
dl = FakeYDL()
ie = VimeoChannelIE(dl)
result = ie.extract('http://vimeo.com/channels/tributes')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
if __name__ == '__main__':
unittest.main()

View File

@ -1,67 +0,0 @@
#!/usr/bin/env python
import unittest
import sys
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor.youtube import YoutubeIE
from helper import FakeYDL
sig = YoutubeIE(FakeYDL())._decrypt_signature
class TestYoutubeSig(unittest.TestCase):
def test_92(self):
wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8"
right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7"
self.assertEqual(sig(wrong), right)
def test_90(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`"
right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"
self.assertEqual(sig(wrong), right)
def test_88(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<"
right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"
self.assertEqual(sig(wrong), right)
def test_87(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<"
right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>"
self.assertEqual(sig(wrong), right)
def test_86(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"
right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"
self.assertEqual(sig(wrong), right)
def test_85(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<"
right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"
self.assertEqual(sig(wrong), right)
def test_84(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"
right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"
self.assertEqual(sig(wrong), right)
def test_83(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<"
right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"
self.assertEqual(sig(wrong), right)
def test_82(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<"
right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"
self.assertEqual(sig(wrong), right)
def test_81(self):
wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>."
right = "urty8ioplkjhgfdsazxcvbqm1234567e90QWERTYUIOPLKHGFDSnZXCVBNM!@#$%^&*(-+={[};?/>."
self.assertEqual(sig(wrong), right)
if __name__ == '__main__':
unittest.main()

View File

@ -35,47 +35,47 @@ class TestYoutubeSubtitles(unittest.TestCase):
DL.params['writesubtitles'] = True
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_it(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['subtitleslang'] = 'it'
DL.params['subtitleslangs'] = ['it']
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')
sub = info_dict[0]['subtitles']['it']
self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_onlysubtitles(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['onlysubtitles'] = True
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_allsubtitles(self):
DL = FakeYDL()
DL.params['allsubtitles'] = True
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
subtitles = info_dict[0]['subtitles']
self.assertEqual(len(subtitles), 13)
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'sbv'
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['subtitlesformat'] = 'vtt'
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles'][0]
self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7')
sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
DL = FakeYDL()
DL.params['listsubtitles'] = True
@ -85,11 +85,20 @@ class TestYoutubeSubtitles(unittest.TestCase):
def test_youtube_automatic_captions(self):
DL = FakeYDL()
DL.params['writeautomaticsub'] = True
DL.params['subtitleslang'] = 'it'
DL.params['subtitleslangs'] = ['it']
IE = YoutubeIE(DL)
info_dict = IE.extract('8YoUxe5ncPo')
sub = info_dict[0]['subtitles'][0]
self.assertTrue(sub[2] is not None)
sub = info_dict[0]['subtitles']['it']
self.assertTrue(sub is not None)
def test_youtube_multiple_langs(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
langs = ['it', 'fr', 'de']
DL.params['subtitleslangs'] = langs
IE = YoutubeIE(DL)
subtitles = IE.extract('QRS8MkLhQmM')[0]['subtitles']
for lang in langs:
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
if __name__ == '__main__':
unittest.main()

View File

@ -63,6 +63,17 @@ class FileDownloader(object):
converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix)
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
(hours, eta_mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
return '%02d:%02d' % (mins, secs)
else:
return '%02d:%02d:%02d' % (hours, mins, secs)
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
@ -78,10 +89,7 @@ class FileDownloader(object):
return '--:--'
rate = float(current) / dif
eta = int((float(total) - float(current)) / rate)
(eta_mins, eta_secs) = divmod(eta, 60)
if eta_mins > 99:
return '--:--'
return '%02d:%02d' % (eta_mins, eta_secs)
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
@ -230,12 +238,14 @@ class FileDownloader(object):
"""Report it was impossible to resume download."""
self.to_screen(u'[download] Unable to resume')
def report_finish(self):
def report_finish(self, data_len_str, tot_time):
"""Report download finished."""
if self.params.get('noprogress', False):
self.to_screen(u'[download] Download completed')
else:
self.to_screen(u'')
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
self.to_screen(u'\r%s[download] 100%% of %s in %s' %
(clear_line, data_len_str, self.format_seconds(tot_time)))
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename)
@ -329,6 +339,35 @@ class FileDownloader(object):
self.report_error(u'mplayer exited with code %d' % retval)
return False
def _download_m3u8_with_ffmpeg(self, filename, url):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
# Check for ffmpeg first
try:
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
except (OSError, IOError):
self.report_error(u'm3u8 download detected but "%s" could not be run' % args[0] )
return False
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr(u"\n")
self.report_error(u'ffmpeg exited with code %d' % retval)
return False
def _do_download(self, filename, info_dict):
url = info_dict['url']
@ -354,6 +393,10 @@ class FileDownloader(object):
if url.startswith('mms') or url.startswith('rtsp'):
return self._download_with_mplayer(filename, url)
# m3u8 manifest are downloaded with ffmpeg
if determine_ext(url) == u'm3u8':
return self._download_m3u8_with_ffmpeg(filename, url)
tmpfilename = self.temp_name(filename)
stream = None
@ -505,7 +548,7 @@ class FileDownloader(object):
self.report_error(u'Did not get any data blocks')
return False
stream.close()
self.report_finish()
self.report_finish(data_len_str, (time.time() - start))
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)

View File

@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
return dict((program, executable(program)) for program in programs)
def run_ffmpeg(self, path, out_path, opts):
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
if not self._exes['ffmpeg'] and not self._exes['avconv']:
raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)]
files_cmd = []
for path in input_paths:
files_cmd.extend(['-i', encodeFilename(path)])
cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
+ opts +
[encodeFilename(self._ffmpeg_filename_argument(out_path))])
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if p.returncode != 0:
@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor):
msg = stderr.strip().split('\n')[-1]
raise FFmpegPostProcessorError(msg)
def run_ffmpeg(self, path, out_path, opts):
self.run_ffmpeg_multiple_files([path], out_path, opts)
def _ffmpeg_filename_argument(self, fn):
# ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
if fn.startswith(u'-'):
@ -100,7 +108,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self._nopostoverwrites = nopostoverwrites
def get_audio_codec(self, path):
if not self._exes['ffprobe'] and not self._exes['avprobe']: return None
if not self._exes['ffprobe'] and not self._exes['avprobe']:
raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
try:
cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
@ -128,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
raise AudioConversionError(err.message)
raise AudioConversionError(err.msg)
def run(self, information):
path = information['filepath']
@ -198,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
except:
etype,e,tb = sys.exc_info()
if isinstance(e, AudioConversionError):
msg = u'audio conversion failed: ' + e.message
msg = u'audio conversion failed: ' + e.msg
else:
msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
raise PostProcessingError(msg)
@ -208,7 +217,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
except:
self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
self._downloader.report_warning(u'Cannot update utime of audio file')
information['filepath'] = new_path
return self._nopostoverwrites,information
@ -231,3 +240,227 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
information['format'] = self._preferedformat
information['ext'] = self._preferedformat
return False,information
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
_lang_map = {
'aa': 'aar',
'ab': 'abk',
'ae': 'ave',
'af': 'afr',
'ak': 'aka',
'am': 'amh',
'an': 'arg',
'ar': 'ara',
'as': 'asm',
'av': 'ava',
'ay': 'aym',
'az': 'aze',
'ba': 'bak',
'be': 'bel',
'bg': 'bul',
'bh': 'bih',
'bi': 'bis',
'bm': 'bam',
'bn': 'ben',
'bo': 'bod',
'br': 'bre',
'bs': 'bos',
'ca': 'cat',
'ce': 'che',
'ch': 'cha',
'co': 'cos',
'cr': 'cre',
'cs': 'ces',
'cu': 'chu',
'cv': 'chv',
'cy': 'cym',
'da': 'dan',
'de': 'deu',
'dv': 'div',
'dz': 'dzo',
'ee': 'ewe',
'el': 'ell',
'en': 'eng',
'eo': 'epo',
'es': 'spa',
'et': 'est',
'eu': 'eus',
'fa': 'fas',
'ff': 'ful',
'fi': 'fin',
'fj': 'fij',
'fo': 'fao',
'fr': 'fra',
'fy': 'fry',
'ga': 'gle',
'gd': 'gla',
'gl': 'glg',
'gn': 'grn',
'gu': 'guj',
'gv': 'glv',
'ha': 'hau',
'he': 'heb',
'hi': 'hin',
'ho': 'hmo',
'hr': 'hrv',
'ht': 'hat',
'hu': 'hun',
'hy': 'hye',
'hz': 'her',
'ia': 'ina',
'id': 'ind',
'ie': 'ile',
'ig': 'ibo',
'ii': 'iii',
'ik': 'ipk',
'io': 'ido',
'is': 'isl',
'it': 'ita',
'iu': 'iku',
'ja': 'jpn',
'jv': 'jav',
'ka': 'kat',
'kg': 'kon',
'ki': 'kik',
'kj': 'kua',
'kk': 'kaz',
'kl': 'kal',
'km': 'khm',
'kn': 'kan',
'ko': 'kor',
'kr': 'kau',
'ks': 'kas',
'ku': 'kur',
'kv': 'kom',
'kw': 'cor',
'ky': 'kir',
'la': 'lat',
'lb': 'ltz',
'lg': 'lug',
'li': 'lim',
'ln': 'lin',
'lo': 'lao',
'lt': 'lit',
'lu': 'lub',
'lv': 'lav',
'mg': 'mlg',
'mh': 'mah',
'mi': 'mri',
'mk': 'mkd',
'ml': 'mal',
'mn': 'mon',
'mr': 'mar',
'ms': 'msa',
'mt': 'mlt',
'my': 'mya',
'na': 'nau',
'nb': 'nob',
'nd': 'nde',
'ne': 'nep',
'ng': 'ndo',
'nl': 'nld',
'nn': 'nno',
'no': 'nor',
'nr': 'nbl',
'nv': 'nav',
'ny': 'nya',
'oc': 'oci',
'oj': 'oji',
'om': 'orm',
'or': 'ori',
'os': 'oss',
'pa': 'pan',
'pi': 'pli',
'pl': 'pol',
'ps': 'pus',
'pt': 'por',
'qu': 'que',
'rm': 'roh',
'rn': 'run',
'ro': 'ron',
'ru': 'rus',
'rw': 'kin',
'sa': 'san',
'sc': 'srd',
'sd': 'snd',
'se': 'sme',
'sg': 'sag',
'si': 'sin',
'sk': 'slk',
'sl': 'slv',
'sm': 'smo',
'sn': 'sna',
'so': 'som',
'sq': 'sqi',
'sr': 'srp',
'ss': 'ssw',
'st': 'sot',
'su': 'sun',
'sv': 'swe',
'sw': 'swa',
'ta': 'tam',
'te': 'tel',
'tg': 'tgk',
'th': 'tha',
'ti': 'tir',
'tk': 'tuk',
'tl': 'tgl',
'tn': 'tsn',
'to': 'ton',
'tr': 'tur',
'ts': 'tso',
'tt': 'tat',
'tw': 'twi',
'ty': 'tah',
'ug': 'uig',
'uk': 'ukr',
'ur': 'urd',
'uz': 'uzb',
've': 'ven',
'vi': 'vie',
'vo': 'vol',
'wa': 'wln',
'wo': 'wol',
'xh': 'xho',
'yi': 'yid',
'yo': 'yor',
'za': 'zha',
'zh': 'zho',
'zu': 'zul',
}
def __init__(self, downloader=None, subtitlesformat='srt'):
super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
self._subformat = subtitlesformat
@classmethod
def _conver_lang_code(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
return cls._lang_map.get(code[:2])
def run(self, information):
if information['ext'] != u'mp4':
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
sub_langs = [key for key in information['subtitles']]
filename = information['filepath']
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
for (i, lang) in enumerate(sub_langs):
opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
lang_code = self._conver_lang_code(lang)
if lang_code is not None:
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
opts.extend(['-f', 'mp4'])
temp_filename = filename + u'.temp'
self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, information

View File

@ -76,7 +76,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video
listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitleslang: Language of the subtitles to download
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
@ -97,6 +97,7 @@ class YoutubeDL(object):
def __init__(self, params):
"""Create a FileDownloader object with the given options."""
self._ies = []
self._ies_instances = {}
self._pps = []
self._progress_hooks = []
self._download_retcode = 0
@ -111,8 +112,21 @@ class YoutubeDL(object):
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
self._ies_instances[ie.ie_key()] = ie
ie.set_downloader(self)
def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
the _ies list, if there's no instance it will create a new one and add
it to the extractor list.
"""
ie = self._ies_instances.get(ie_key)
if ie is None:
ie = get_info_extractor(ie_key)()
self.add_info_extractor(ie)
return ie
def add_default_info_extractors(self):
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
@ -264,7 +278,7 @@ class YoutubeDL(object):
self.report_error(u'Erroneous output template')
return None
except ValueError as err:
self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
return None
def _match_entry(self, info_dict):
@ -294,9 +308,7 @@ class YoutubeDL(object):
'''
if ie_key:
ie = get_info_extractor(ie_key)()
ie.set_downloader(self)
ies = [ie]
ies = [self.get_info_extractor(ie_key)]
else:
ies = self._ies
@ -448,7 +460,8 @@ class YoutubeDL(object):
if self.params.get('forceid', False):
compat_print(info_dict['id'])
if self.params.get('forceurl', False):
compat_print(info_dict['url'])
# For RTMP URLs, also include the playpath
compat_print(info_dict['url'] + info_dict.get('play_path', u''))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
compat_print(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and 'description' in info_dict:
@ -483,34 +496,21 @@ class YoutubeDL(object):
self.report_error(u'Cannot write description file ' + descfn)
return
if (self.params.get('writesubtitles', False) or self.params.get('writeautomaticsub')) and 'subtitles' in info_dict and info_dict['subtitles']:
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub'),
self.params.get('allsubtitles', False)])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitle = info_dict['subtitles'][0]
(sub_error, sub_lang, sub) = subtitle
sub_format = self.params.get('subtitlesformat')
if sub_error:
self.report_warning("Some error while getting the subtitles")
else:
try:
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
except (OSError, IOError):
self.report_error(u'Cannot write subtitles file ' + descfn)
return
if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat')
for subtitle in subtitles:
(sub_error, sub_lang, sub) = subtitle
if sub_error:
self.report_warning("Some error while getting the subtitles")
else:
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
continue
try:
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
@ -547,7 +547,7 @@ class YoutubeDL(object):
try:
success = self.fd._do_download(filename, info_dict)
except (OSError, IOError) as err:
raise UnavailableVideoError()
raise UnavailableVideoError(err)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error(u'unable to download video data: %s' % str(err))
return
@ -594,7 +594,7 @@ class YoutubeDL(object):
# No clear decision yet, let IE decide
keep_video = keep_video_wish
except PostProcessingError as e:
self.to_stderr(u'ERROR: ' + e.msg)
self.report_error(e.msg)
if keep_video is False and not self.params.get('keepvideo', False):
try:
self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)

View File

@ -27,6 +27,8 @@ __authors__ = (
'Johny Mo Swag',
'Axel Noack',
'Albert Kim',
'Pierre Rudloff',
'Huarong Huo',
)
__license__ = 'Public Domain'
@ -44,6 +46,7 @@ import sys
import warnings
import platform
from .utils import *
from .update import update_self
from .version import __version__
@ -82,6 +85,9 @@ def parseOpts(overrideArguments=None):
return "".join(opts)
def _comma_separated_values_options_callback(option, opt_str, value, parser):
setattr(parser.values, option.dest, value.split(','))
def _find_term_columns():
columns = os.environ.get('COLUMNS', None)
if columns:
@ -95,6 +101,16 @@ def parseOpts(overrideArguments=None):
pass
return None
def _hide_login_info(opts):
opts = list(opts)
for private_opt in ['-p', '--password', '-u', '--username']:
try:
i = opts.index(private_opt)
opts[i+1] = '<PRIVATE>'
except ValueError:
pass
return opts
max_width = 80
max_help_position = 80
@ -119,6 +135,7 @@ def parseOpts(overrideArguments=None):
selection = optparse.OptionGroup(parser, 'Video Selection')
authentication = optparse.OptionGroup(parser, 'Authentication Options')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
downloader = optparse.OptionGroup(parser, 'Download Options')
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@ -176,7 +193,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT',
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats',
@ -185,27 +202,29 @@ def parseOpts(overrideArguments=None):
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
video_format.add_option('-F', '--list-formats',
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
video_format.add_option('--write-sub', '--write-srt',
subtitles.add_option('--write-sub', '--write-srt',
action='store_true', dest='writesubtitles',
help='write subtitle file (currently youtube only)', default=False)
video_format.add_option('--write-auto-sub', '--write-automatic-sub',
subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub',
help='write automatic subtitle file (currently youtube only)', default=False)
video_format.add_option('--only-sub',
subtitles.add_option('--only-sub',
action='store_true', dest='skip_download',
help='[deprecated] alias of --skip-download', default=False)
video_format.add_option('--all-subs',
subtitles.add_option('--all-subs',
action='store_true', dest='allsubtitles',
help='downloads all the available subtitles of the video (currently youtube only)', default=False)
video_format.add_option('--list-subs',
help='downloads all the available subtitles of the video', default=False)
subtitles.add_option('--list-subs',
action='store_true', dest='listsubtitles',
help='lists all available subtitles for the video (currently youtube only)', default=False)
video_format.add_option('--sub-format',
help='lists all available subtitles for the video', default=False)
subtitles.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT',
help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
video_format.add_option('--sub-lang', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
subtitles.add_option('--sub-lang', '--sub-langs', '--srt-lang',
action='callback', dest='subtitleslang', metavar='LANGS', type='str',
default=[], callback=_comma_separated_values_options_callback,
help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'')
downloader.add_option('-r', '--rate-limit',
dest='ratelimit', metavar='LIMIT', help='maximum download rate (e.g. 50k or 44.6m)')
@ -320,6 +339,8 @@ def parseOpts(overrideArguments=None):
help='keeps the video file on disk after the post-processing; the video is erased by default')
postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
help='do not overwrite post-processed files; the post-processed files are overwritten by default')
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)')
parser.add_option_group(general)
@ -328,6 +349,7 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(filesystem)
parser.add_option_group(verbosity)
parser.add_option_group(video_format)
parser.add_option_group(subtitles)
parser.add_option_group(authentication)
parser.add_option_group(postproc)
@ -347,9 +369,9 @@ def parseOpts(overrideArguments=None):
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
if opts.verbose:
sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args
@ -398,6 +420,8 @@ def _real_main(argv=None):
batchurls = batchfd.readlines()
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
if opts.verbose:
sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
@ -418,6 +442,10 @@ def _real_main(argv=None):
proxy_handler = compat_urllib_request.ProxyHandler(proxies)
https_handler = make_HTTPS_handler(opts)
opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
opener.addheaders =[]
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
@ -565,7 +593,7 @@ def _real_main(argv=None):
'allsubtitles': opts.allsubtitles,
'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat,
'subtitleslang': opts.subtitleslang,
'subtitleslangs': opts.subtitleslang,
'matchtitle': decodeOption(opts.matchtitle),
'rejecttitle': decodeOption(opts.rejecttitle),
'max_downloads': opts.max_downloads,
@ -595,7 +623,7 @@ def _real_main(argv=None):
sys.exc_clear()
except:
pass
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
ydl.add_default_info_extractors()
@ -605,6 +633,8 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
# Update version
if opts.update_self:

202
youtube_dl/aes.py Normal file
View File

@ -0,0 +1,202 @@
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
import base64
from math import ceil
from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16
def aes_ctr_decrypt(data, key, counter):
"""
Decrypt with aes in counter mode
@param {int[]} data cipher
@param {int[]} key 16/24/32-Byte cipher key
@param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
returns the next counter block
@returns {int[]} decrypted data
"""
expanded_key = key_expansion(key)
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
decrypted_data=[]
for i in range(block_count):
counter_block = counter.next_value()
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
block += [0]*(BLOCK_SIZE_BYTES - len(block))
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
decrypted_data += xor(block, cipher_counter_block)
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
def key_expansion(data):
"""
Generate key schedule
@param {int[]} data 16/24/32-Byte cipher key
@returns {int[]} 176/208/240-Byte expanded key
"""
data = data[:] # copy
rcon_iteration = 1
key_size_bytes = len(data)
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
while len(data) < expanded_key_size_bytes:
temp = data[-4:]
temp = key_schedule_core(temp, rcon_iteration)
rcon_iteration += 1
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
for _ in range(3):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
if key_size_bytes == 32:
temp = data[-4:]
temp = sub_bytes(temp)
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
data = data[:expanded_key_size_bytes]
return data
def aes_encrypt(data, expanded_key):
"""
Encrypt one block with aes
@param {int[]} data 16-Byte state
@param {int[]} expanded_key 176/208/240-Byte expanded key
@returns {int[]} 16-Byte cipher
"""
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
for i in range(1, rounds+1):
data = sub_bytes(data)
data = shift_rows(data)
if i != rounds:
data = mix_columns(data)
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
return data
def aes_decrypt_text(data, password, key_size_bytes):
"""
Decrypt text
- The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
- The cipher key is retrieved by encrypting the first 16 Byte of 'password'
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
- Mode of operation is 'counter'
@param {str} data Base64 encoded string
@param {str,unicode} password Password (will be encoded with utf-8)
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
@returns {str} Decrypted data
"""
NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data))
password = bytes_to_intlist(password.encode('utf-8'))
key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
nonce = data[:NONCE_LENGTH_BYTES]
cipher = data[NONCE_LENGTH_BYTES:]
class Counter:
__value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
def next_value(self):
temp = self.__value
self.__value = inc(self.__value)
return temp
decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
plaintext = intlist_to_bytes(decrypted_data)
return plaintext
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
MIX_COLUMN_MATRIX = ((2,3,1,1),
(1,2,3,1),
(1,1,2,3),
(3,1,1,2))
def sub_bytes(data):
return [SBOX[x] for x in data]
def rotate(data):
return data[1:] + [data[0]]
def key_schedule_core(data, rcon_iteration):
data = rotate(data)
data = sub_bytes(data)
data[0] = data[0] ^ RCON[rcon_iteration]
return data
def xor(data1, data2):
return [x^y for x, y in zip(data1, data2)]
def mix_column(data):
data_mixed = []
for row in range(4):
mixed = 0
for column in range(4):
addend = data[column]
if MIX_COLUMN_MATRIX[row][column] in (2,3):
addend <<= 1
if addend > 0xff:
addend &= 0xff
addend ^= 0x1b
if MIX_COLUMN_MATRIX[row][column] == 3:
addend ^= data[column]
mixed ^= addend & 0xff
data_mixed.append(mixed)
return data_mixed
def mix_columns(data):
data_mixed = []
for i in range(4):
column = data[i*4 : (i+1)*4]
data_mixed += mix_column(column)
return data_mixed
def shift_rows(data):
data_shifted = []
for column in range(4):
for row in range(4):
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
return data_shifted
def inc(data):
data = data[:] # copy
for i in range(len(data)-1,-1,-1):
if data[i] == 255:
data[i] = 0
else:
data[i] = data[i] + 1
break
return data

View File

@ -1,3 +1,5 @@
from .appletrailers import AppleTrailersIE
from .addanime import AddAnimeIE
from .archiveorg import ArchiveOrgIE
from .ard import ARDIE
from .arte import ArteTvIE
@ -6,16 +8,21 @@ from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .c56 import C56IE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cnn import CNNIE
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
from .daum import DaumIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .escapist import EscapistIE
@ -29,6 +36,7 @@ from .gametrailers import GametrailersIE
from .generic import GenericIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .hark import HarkIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
@ -36,23 +44,37 @@ from .ign import IGNIE, OneUPIE
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE
from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .kankan import KankanIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
from .metacafe import MetacafeIE
from .mit import TechTVMITIE, MITIE
from .mixcloud import MixcloudIE
from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
from .ro220 import Ro220IE
from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE
from .slashdot import SlashdotIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
@ -63,16 +85,19 @@ from .ted import TEDIE
from .tf1 import TF1IE
from .thisav import ThisAVIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
from .unistra import UnistraIE
from .ustream import UstreamIE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE
from .weibo import WeiboIE
from .wimp import WimpIE
@ -93,6 +118,7 @@ from .youtube import (
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeRecommendedIE,
YoutubeWatchLaterIE,
YoutubeFavouritesIE,
)
from .zdf import ZDFIE
@ -105,12 +131,14 @@ _ALL_CLASSES = [
]
_ALL_CLASSES.append(GenericIE)
def gen_extractors():
""" Return a list of an instance of every supported extractor.
The order does matter; the first extractor matched is the one handling the URL.
"""
return [klass() for klass in _ALL_CLASSES]
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
return globals()[ie_name+'IE']

View File

@ -0,0 +1,75 @@
import re
from .common import InfoExtractor
from ..utils import (
compat_HTTPError,
compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse,
ExtractorError,
)
class AddAnimeIE(InfoExtractor):
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
IE_NAME = u'AddAnime'
_TEST = {
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
u'file': u'24MR3YO5SAS9.flv',
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
u'info_dict': {
u"description": u"One Piece 606",
u"title": u"One Piece 606"
}
}
def _real_extract(self, url):
try:
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
except ExtractorError as ee:
if not isinstance(ee.cause, compat_HTTPError):
raise
redir_webpage = ee.cause.read().decode('utf-8')
action = self._search_regex(
r'<form id="challenge-form" action="([^"]+)"',
redir_webpage, u'Redirect form')
vc = self._search_regex(
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
redir_webpage, u'redirect vc value')
av = re.search(
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
redir_webpage)
if av is None:
raise ExtractorError(u'Cannot find redirect math task')
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
parsed_url = compat_urllib_parse_urlparse(url)
av_val = av_res + len(parsed_url.netloc)
confirm_url = (
parsed_url.scheme + u'://' + parsed_url.netloc +
action + '?' +
compat_urllib_parse.urlencode({
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
self._download_webpage(
confirm_url, video_id,
note=u'Confirming after redirect')
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
webpage, u'video file URL')
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
return {
'_type': 'video',
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': video_title,
'description': video_description
}

View File

@ -0,0 +1,166 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class AppleTrailersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TEST = {
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
u"playlist": [
{
u"file": u"manofsteel-trailer4.mov",
u"md5": u"11874af099d480cc09e103b189805d5f",
u"info_dict": {
u"duration": 111,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
u"title": u"Trailer 4",
u"upload_date": u"20130523",
u"uploader_id": u"wb",
},
},
{
u"file": u"manofsteel-trailer3.mov",
u"md5": u"07a0a262aae5afe68120eed61137ab34",
u"info_dict": {
u"duration": 182,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
u"title": u"Trailer 3",
u"upload_date": u"20130417",
u"uploader_id": u"wb",
},
},
{
u"file": u"manofsteel-trailer.mov",
u"md5": u"e401fde0813008e3307e54b6f384cff1",
u"info_dict": {
u"duration": 148,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
u"title": u"Trailer",
u"upload_date": u"20121212",
u"uploader_id": u"wb",
},
},
{
u"file": u"manofsteel-teaser.mov",
u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
u"info_dict": {
u"duration": 93,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
u"title": u"Teaser",
u"upload_date": u"20120721",
u"uploader_id": u"wb",
},
}
]
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie = mobj.group('movie')
uploader_id = mobj.group('company')
playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
playlist_snippet = self._download_webpage(playlist_url, movie)
playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
playlist_html = u'<html>' + playlist_cleaned + u'</html>'
size_cache = {}
doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = []
for li in doc.findall('./div/ul/li'):
title = li.find('.//h3').text
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
date_el = li.find('.//p')
upload_date = None
m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
if m:
upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
runtime_el = date_el.find('./br')
m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
duration = None
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
formats = []
for formats_el in li.findall('.//a'):
if formats_el.attrib['class'] != 'OverlayPanel':
continue
target = formats_el.attrib['target']
format_code = formats_el.text
if 'Automatic' in format_code:
continue
size_q = formats_el.attrib['href']
size_id = size_q.rpartition('#videos-')[2]
if size_id not in size_cache:
size_url = url + size_q
sizepage_html = self._download_webpage(
size_url, movie,
note=u'Downloading size info %s' % size_id,
errnote=u'Error while downloading size info %s' % size_id,
)
_doc = xml.etree.ElementTree.fromstring(sizepage_html)
size_cache[size_id] = _doc
sizepage_doc = size_cache[size_id]
links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
for vid_a in links:
href = vid_a.get('href')
if not href.endswith(target):
continue
detail_q = href.partition('#')[0]
detail_url = url + '/' + detail_q
m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
detail_id = m.group('detail_id')
detail_html = self._download_webpage(
detail_url, movie,
note=u'Downloading detail %s %s' % (detail_id, size_id),
errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
)
detail_doc = xml.etree.ElementTree.fromstring(detail_html)
movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
assert movie_link_el.get('class') == 'movieLink'
movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
ext = determine_ext(movie_link)
assert ext == 'mov'
formats.append({
'format': format_code,
'ext': ext,
'url': movie_link,
})
info = {
'_type': 'video',
'id': video_id,
'title': title,
'formats': formats,
'title': title,
'duration': duration,
'thumbnail': thumbnail,
'upload_date': upload_date,
'uploader_id': uploader_id,
'user_agent': 'QuickTime compatible (youtube-dl)',
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['ext']
playlist.append(info)
return {
'_type': 'playlist',
'id': movie,
'entries': playlist,
}

View File

@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):
"""
_EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
_VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html'
_LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
@classmethod
def suitable(cls, url):
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL))
return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))
# TODO implement Live Stream
# from ..utils import compat_urllib_parse
@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):
lang = mobj.group('lang')
return self._extract_video(url, id, lang)
mobj = re.match(self._LIVEWEB_URL, url)
if mobj is not None:
name = mobj.group('name')
lang = mobj.group('lang')
return self._extract_liveweb(url, name, lang)
if re.search(self._LIVE_URL, video_id) is not None:
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
# self.extractLiveStream(url)
@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):
info_dict = {'id': player_info['VID'],
'title': player_info['VTI'],
'description': player_info['VDE'],
'description': player_info.get('VDE'),
'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),
'thumbnail': player_info['programImage'],
'ext': 'flv',
@ -98,12 +105,14 @@ class ArteTvIE(InfoExtractor):
l = 'F'
elif lang == 'de':
l = 'A'
regexes = [r'VO?%s' % l, r'V%s-ST.' % l]
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
formats = filter(_match_lang, formats)
# We order the formats by quality
formats = sorted(formats, key=lambda f: int(f['height']))
# Prefer videos without subtitles in the same language
formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)
# Pick the best quality
format_info = formats[-1]
if format_info['mediaType'] == u'rtmp':
@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):
'url': video_url,
'ext': 'flv',
}
def _extract_liveweb(self, url, name, lang):
"""Extract form http://liveweb.arte.tv/"""
webpage = self._download_webpage(url, name)
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
video_id, u'Downloading information')
config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
event_doc = config_doc.find('event')
url_node = event_doc.find('video').find('urlHd')
if url_node is None:
url_node = video_doc.find('urlSd')
return {'id': video_id,
'title': event_doc.find('name%s' % lang.capitalize()).text,
'url': url_node.text.replace('MP4', 'mp4'),
'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
}

View File

@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
_TEST ={
u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
u'file': u'93440716.mp4',
u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
u'file': u'93440716.flv',
u'md5': u'e59995ac63d0457783ea05f93f12a866',
u'info_dict': {
u'title': u'网事知多少 第32期车怒',
},

View File

@ -0,0 +1,35 @@
# coding: utf-8
import re
from .common import InfoExtractor
class Canalc2IE(InfoExtractor):
_IE_NAME = 'canalc2.tv'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
_TEST = {
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
u'file': u'12163.mp4',
u'md5': u'060158428b650f896c542dfbb3d6487f',
u'info_dict': {
u'title': u'Terrasses du Numérique'
}
}
def _real_extract(self, url):
video_id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, video_id)
file_name = self._search_regex(
r"so\.addVariable\('file','(.*?)'\);",
webpage, 'file name')
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
title = self._html_search_regex(
r'class="evenement8">(.*?)</a>', webpage, u'title')
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': title,
}

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
from ..utils import unified_strdate
class CanalplusIE(InfoExtractor):
_VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr'

View File

@ -0,0 +1,58 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import determine_ext
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{
u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
u'info_dict': {
u'title': u'Nadal wins 8th French Open title',
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
},
},
{
u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
u"info_dict": {
u"title": "Student's epic speech stuns new freshmen",
u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
path = mobj.group('path')
page_title = mobj.group('title')
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
info_xml = self._download_webpage(info_url, page_title)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
formats = []
for f in info.findall('files/file'):
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
if mf is not None:
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
formats = sorted(formats)
(_,_,_, video_path) = formats[-1]
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
return {'id': info.attrib['id'],
'title': info.find('headline').text,
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': thumbnails[-1][1],
'thumbnails': thumbs_dict,
'description': info.find('description').text,
}

View File

@ -4,15 +4,16 @@ import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse_urlparse,
determine_ext,
ExtractorError,
)
class CollegeHumorIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
_TEST = {
_TESTS = [{
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
u'file': u'6902724.mp4',
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
u'title': u'Comic-Con Cosplay Catastrophe',
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
},
}
},
{
u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
u'file': u'3505939.mp4',
u'md5': u'c51ca16b82bb456a4397987791a835f5',
u'info_dict': {
u'title': u'Font Conference',
u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -49,11 +59,12 @@ class CollegeHumorIE(InfoExtractor):
info['description'] = videoNode.findall('./description')[0].text
info['title'] = videoNode.findall('./caption')[0].text
info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
manifest_url = videoNode.findall('./file')[0].text
next_url = videoNode.findall('./file')[0].text
except IndexError:
raise ExtractorError(u'Invalid metadata XML file')
manifest_url += '?hdcore=2.10.3'
if next_url.endswith(u'manifest.f4m'):
manifest_url = next_url + '?hdcore=2.10.3'
manifestXml = self._download_webpage(manifest_url, video_id,
u'Downloading XML manifest',
u'Unable to download video info XML')
@ -65,9 +76,12 @@ class CollegeHumorIE(InfoExtractor):
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
except IndexError as err:
raise ExtractorError(u'Invalid manifest file')
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
info['ext'] = 'mp4'
return [info]
else:
# Old-style direct links
info['url'] = next_url
info['ext'] = determine_ext(info['url'])
return info

View File

@ -47,7 +47,8 @@ class InfoExtractor(object):
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump).
subtitles: The subtitle file contents.
subtitles: The subtitle file contents as a dictionary in the format
{language: subtitles}.
view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
@ -77,7 +78,13 @@ class InfoExtractor(object):
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url) is not None
# This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url) is not None
@classmethod
def working(cls):
@ -107,6 +114,11 @@ class InfoExtractor(object):
"""Real extraction process. Redefine in subclasses."""
pass
@classmethod
def ie_key(cls):
"""A string for getting the InfoExtractor with get_info_extractor"""
return cls.__name__[:-2]
@property
def IE_NAME(self):
return type(self).__name__[:-2]
@ -122,7 +134,7 @@ class InfoExtractor(object):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is None:
errnote = u'Unable to download webpage'
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
""" Returns a tuple (page content as string, URL handle) """
@ -133,12 +145,17 @@ class InfoExtractor(object):
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
if m:
encoding = m.group(1)
else:
m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
webpage_bytes[:1024])
if m:
encoding = m.group(1).decode('ascii')
else:
encoding = 'utf-8'
webpage_bytes = urlh.read()
if self._downloader.params.get('dump_intermediate_pages', False):
try:
url = url_or_request.get_full_url()

View File

@ -1,9 +1,12 @@
import re
import json
import itertools
from .common import InfoExtractor
from ..utils import (
compat_urllib_request,
get_element_by_attribute,
get_element_by_id,
ExtractorError,
)
@ -18,7 +21,7 @@ class DailymotionIE(InfoExtractor):
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
u"uploader": u"Alex and Van .",
u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
@ -52,7 +55,8 @@ class DailymotionIE(InfoExtractor):
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
# TODO: support choosing qualities
@ -77,3 +81,31 @@ class DailymotionIE(InfoExtractor):
'ext': video_extension,
'thumbnail': info['thumbnail_url']
}]
class DailymotionPlaylistIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
playlist_id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in video_ids]
return {'_type': 'playlist',
'id': playlist_id,
'title': get_element_by_id(u'playlist_name', webpage),
'entries': entries,
}

View File

@ -0,0 +1,74 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
determine_ext,
)
class DaumIE(InfoExtractor):
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
IE_NAME = u'daum.net'
_TEST = {
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
u'file': u'52554690.mp4',
u'info_dict': {
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
u'upload_date': u'20130831',
u'duration': 3868,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
webpage, u'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info_xml = self._download_webpage(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
u'Downloading video info')
urls_xml = self._download_webpage(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
self.to_screen(u'%s: Getting video urls' % video_id)
formats = []
for format_el in urls.findall('result/output_list/output_list'):
profile = format_el.attrib['profile']
format_query = compat_urllib_parse.urlencode({
'vid': full_id,
'profile': profile,
})
url_xml = self._download_webpage(
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
video_id, note=False)
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
format_url = url_doc.find('result/url').text
formats.append({
'url': format_url,
'ext': determine_ext(format_url),
'format_id': profile,
})
info = {
'id': video_id,
'title': info.find('TITLE').text,
'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage),
'description': info.find('CONTENTS').text,
'duration': int(info.find('DURATION').text),
'upload_date': info.find('REGDTTM').text[:8],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,39 @@
import re
import json
from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor):
_IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = {
u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
u'file': u'11213.mp4',
u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
"info_dict": {
"title": "attaque-chimique-syrienne-du-21-aout-2013-1"
}
}
def _real_extract(self, url):
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
video_id = self._search_regex(
r"flashvars.pvg_id=\"(\d+)\";",
webpage, 'ID')
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ video_id)
info = self._download_webpage(json_url, title,
'Downloading JSON config')
video_url = json.loads(info)['renditions'][0]['url']
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': title,
}

View File

@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"',
video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
webpage, u'video URL', flags=re.DOTALL)
title = self._html_search_regex((r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>",
r'<title>(?P<title>[^<]+?)</title>'), webpage, 'title', flags=re.DOTALL)
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
}
return [info]

View File

@ -8,11 +8,13 @@ from ..utils import (
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
ExtractorError,
)
from .brightcove import BrightcoveIE
class GenericIE(InfoExtractor):
IE_DESC = u'Generic downloader that works on some sites'
_VALID_URL = r'.*'
@ -107,8 +109,13 @@ class GenericIE(InfoExtractor):
return new_url
def _real_extract(self, url):
try:
new_url = self._test_redirect(url)
if new_url: return [self.url_result(new_url)]
if new_url:
return [self.url_result(new_url)]
except compat_urllib_error.HTTPError:
# This may be a stupid server that doesn't like HEAD, our UA, or so
pass
video_id = url.split('/')[-1]
try:
@ -119,7 +126,7 @@ class GenericIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
self.report_extraction(video_id)
# Look for BrigthCove:
# Look for BrightCove:
m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
if m_brightcove is not None:
self.to_screen(u'Brightcove video detected.')
@ -144,6 +151,9 @@ class GenericIE(InfoExtractor):
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None:
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
if mobj is None:
# HTML5 video
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
@ -152,8 +162,9 @@ class GenericIE(InfoExtractor):
if mobj.group(1) is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_url = compat_urllib_parse.unquote(mobj.group(1))
video_id = os.path.basename(video_url)
video_url = mobj.group(1)
video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
# here's a fun little line of code for you:
video_extension = os.path.splitext(video_id)[1][1:]

View File

@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
webpage, 'title', default=u'NA')
# Step 2, Simulate clicking the image box to launch video
DOMAIN = 'https://plus.google.com'
video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
DOMAIN = 'https://plus.google.com/'
video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
webpage, u'video page URL')
if not video_page.startswith(DOMAIN):
video_page = DOMAIN + video_page

View File

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
import re
import json
from .common import InfoExtractor
from ..utils import determine_ext
class HarkIE(InfoExtractor):
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
_TEST = {
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
u'file': u'mmbzyhkgny.mp3',
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
u'info_dict': {
u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
u'duration': 11,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
json_url = "http://www.hark.com/clips/%s.json" %(video_id)
info_json = self._download_webpage(json_url, video_id)
info = json.loads(info_json)
final_url = info['url']
return {'id': video_id,
'url' : final_url,
'title': info['name'],
'ext': determine_ext(final_url),
'description': info['description'],
'thumbnail': info['image_original'],
'duration': info['duration'],
}

View File

@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
Some videos of it.ign.com are also supported
"""
_VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = u'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name_or_id = mobj.group('name_or_id')
page_type = mobj.group('type')
webpage = self._download_webpage(url, name_or_id)
if page_type == 'articles':
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
return self.url_result(video_url, ie='IGN')
video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE,
@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system."""
_VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
_VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class InaIE(InfoExtractor):
"""Information Extractor for Ina.fr"""
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
_VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*'
_TEST = {
u'url': u'www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
u'file': u'I12055569.mp4',

View File

@ -0,0 +1,47 @@
# coding: utf-8
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor):
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm'
_TEST = {
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
u'file': u'5182.mp4',
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
u'info_dict': {
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, title)
m_download = re.search(r'<param name="flashvars" value="config=(.*?)" />', webpage)
xml_link = m_download.group(1)
id = re.search(r'http://www.jeuxvideo.com/config/\w+/0011/(.*?)/\d+_player\.xml', xml_link).group(1)
xml_config = self._download_webpage(xml_link, title,
'Downloading XML config')
config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
info = re.search(r'<format\.json>(.*?)</format\.json>',
xml_config, re.MULTILINE|re.DOTALL).group(1)
info = json.loads(info)['versions'][0]
video_url = 'http://video720.jeuxvideo.com/' + info['file']
return {'id': id,
'title' : config.find('titre_video').text,
'ext' : 'mp4',
'url' : video_url,
'description': self._og_search_description(webpage),
'thumbnail': config.find('image').text,
}

View File

@ -0,0 +1,39 @@
import re
from .common import InfoExtractor
from ..utils import determine_ext
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
_TEST = {
u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
u'file': u'48863.flv',
u'md5': u'29aca1e47ae68fc28804aca89f29507e',
u'info_dict': {
u'title': u'Ready To Go',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
gcid = gcids[-1]
video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
video_id, u'Downloading video url info')
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
video_url = 'http://%s%s' % (ip, path)
return {'id': video_id,
'title': title,
'url': video_url,
'ext': determine_ext(video_url),
}

View File

@ -4,10 +4,10 @@ from .common import InfoExtractor
class KeekIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
_VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
IE_NAME = u'keek'
_TEST = {
u'url': u'http://www.keek.com/ytdl/keeks/NODfbab',
u'url': u'https://www.keek.com/ytdl/keeks/NODfbab',
u'file': u'NODfbab.mp4',
u'md5': u'9b0636f8c0f7614afa4ea5e4c6e57e83',
u'info_dict': {

View File

@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, u'uploader nickname', fatal=False)
return {

View File

@ -0,0 +1,74 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_id,
)
class TechTVMITIE(InfoExtractor):
IE_NAME = u'techtv.mit.edu'
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
_TEST = {
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
u'file': u'25418.mp4',
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
u'info_dict': {
u'title': u'MIT DNA Learning Center Set',
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
raw_page = self._download_webpage(
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
raw_page, u'base url')
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
u'video formats')
formats = json.loads(formats_json)
formats = sorted(formats, key=lambda f: f['bitrate'])
title = get_element_by_id('edit-title', clean_page)
description = clean_html(get_element_by_id('edit-description', clean_page))
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
raw_page, u'thumbnail', flags=re.DOTALL)
return {'id': video_id,
'title': title,
'url': base_url + formats[-1]['url'].replace('mp4:', ''),
'ext': 'mp4',
'description': description,
'thumbnail': thumbnail,
}
class MITIE(TechTVMITIE):
IE_NAME = u'video.mit.edu'
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
_TEST = {
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
u'file': u'21783.mp4',
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
u'info_dict': {
u'title': u'The Government is Profiling You',
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
u'embed url')
return self.url_result(embed_url, ie='TechTVMIT')

View File

@ -0,0 +1,64 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
determine_ext,
)
class MuzuTVIE(InfoExtractor):
_VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)'
IE_NAME = u'muzu.tv'
_TEST = {
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
u'file': u'1981454.mp4',
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
u'info_dict': {
u'title': u'Cat Walk (Original Mix)',
u'description': u'md5:90e868994de201b2570e4e5854e19420',
u'uploader': u'MarcAshken featuring SOS',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_data = compat_urllib_parse.urlencode({'format': 'json',
'url': url,
})
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
video_id, u'Downloading video info')
info = json.loads(video_info_page)
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
video_id, u'Downloading player info')
video_info = json.loads(player_info_page)['videos'][0]
for quality in ['1080' , '720', '480', '360']:
if video_info.get('v%s' % quality):
break
data = compat_urllib_parse.urlencode({'ai': video_id,
# Even if each time you watch a video the hash changes,
# it seems to work for different videos, and it will work
# even if you use any non empty string as a hash
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
'device': 'web',
'qv': quality,
})
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
video_id, u'Downloading video url')
video_url_info = json.loads(video_url_page)
video_url = video_url_info['url']
return {'id': video_id,
'title': info['title'],
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': info['thumbnail_url'],
'description': info['description'],
'uploader': info['author_name'],
}

View File

@ -2,11 +2,13 @@ import binascii
import base64
import hashlib
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_ord,
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
)
@ -16,7 +18,7 @@ from ..utils import (
class MyVideoIE(InfoExtractor):
"""Information Extractor for myvideo.de."""
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo'
_TEST = {
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):
'ext': video_ext,
}]
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
u'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:

View File

@ -0,0 +1,73 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
)
class NaverIE(InfoExtractor):
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
_TEST = {
u'url': u'http://tvcast.naver.com/v/81652',
u'file': u'81652.mp4',
u'info_dict': {
u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
u'upload_date': u'20130903',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
raise ExtractorError(u'couldn\'t extract vid and key')
vid = m_id.group(1)
key = m_id.group(2)
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
query_urls = compat_urllib_parse.urlencode({
'masterVid': vid,
'protocol': 'p2p',
'inKey': key,
})
info_xml = self._download_webpage(
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
video_id, u'Downloading video info')
urls_xml = self._download_webpage(
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
video_id, u'Downloading video formats info')
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
formats = []
for format_el in urls.findall('EncodingOptions/EncodingOption'):
domain = format_el.find('Domain').text
if domain.startswith('rtmp'):
continue
formats.append({
'url': domain + format_el.find('uri').text,
'ext': 'mp4',
'width': int(format_el.find('width').text),
'height': int(format_el.find('height').text),
})
info = {
'id': video_id,
'title': info.find('Subject').text,
'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': info.find('WriteDate').text.replace('.', ''),
'view_count': int(info.find('PlayCount').text),
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info

View File

@ -0,0 +1,33 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import find_xpath_attr, compat_str
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
_TEST = {
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
u'file': u'52753292.flv',
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
u'info_dict': {
u'title': u'Crew emerges after four-month Mars food study',
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
return {'id': video_id,
'title': info.find('headline').text,
'ext': 'flv',
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
'description': compat_str(info.find('caption').text),
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
}

View File

@ -0,0 +1,52 @@
import re
import json
from .common import InfoExtractor
from ..utils import unescapeHTML
class OoyalaIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)'
_TEST = {
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4',
u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c',
u'info_dict': {
u'title': u'Explaining Data Recovery from Hard Drives and SSDs',
u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
},
}
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',
'title': unescapeHTML(info['title']),
'url': info['url'],
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
embedCode = mobj.group('id')
player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
player = self._download_webpage(player_url, embedCode)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, u'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode)
videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info)
if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
return {'_type': 'playlist',
'id': embedCode,
'title': unescapeHTML(videos_more_info['title']),
'entries': videos,
}
else:
return self._extract_result(videos_info[0], videos_more_info)

View File

@ -0,0 +1,54 @@
# coding: utf-8
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
ExtractorError,
find_xpath_attr,
)
class ORFIE(InfoExtractor):
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
playlist = json.loads(playlist_json)
videos = []
ns = '{http://tempuri.org/XMLSchema.xsd}'
xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
webpage_description = self._og_search_description(webpage)
for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
# Get best quality url
rtmp_url = None
for q in ['Q6A', 'Q4A', 'Q1A']:
video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
if video_url is not None:
rtmp_url = video_url.text
break
if rtmp_url is None:
raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
description = self._html_search_regex(
r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
u'description', default=webpage_description, flags=re.DOTALL)
videos.append({
'_type': 'video',
'id': info['id'],
'title': info['title'],
'url': rtmp_url,
'ext': 'flv',
'description': description,
})
return videos

View File

@ -0,0 +1,34 @@
import re
import json
from .common import InfoExtractor
class PBSIE(InfoExtractor):
_VALID_URL = r'https?://video.pbs.org/video/(?P<id>\d+)/?'
_TEST = {
u'url': u'http://video.pbs.org/video/2365006249/',
u'file': u'2365006249.mp4',
u'md5': 'ce1888486f0908d555a8093cac9a7362',
u'info_dict': {
u'title': u'A More Perfect Union',
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
u'duration': 3190,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info_page = self._download_webpage(info_url, video_id)
info =json.loads(info_page)
return {'id': video_id,
'title': info['title'],
'url': info['alternate_encoding']['url'],
'ext': 'mp4',
'description': info['program'].get('description'),
'thumbnail': info.get('image_url'),
'duration': info.get('duration'),
}

View File

@ -0,0 +1,42 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
compat_parse_qs,
)
class Ro220IE(InfoExtractor):
IE_NAME = '220.ro'
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
u'file': u'LYV6doKo7f.mp4',
u'md5': u'03af18b73a07b4088753930db7a34add',
u'info_dict': {
u"title": u"Luati-le Banii sez 4 ep 1",
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
flashVars_str = self._search_regex(
r'<param name="flashVars" value="([^"]+)"',
webpage, u'flashVars')
flashVars = compat_parse_qs(flashVars_str)
info = {
'_type': 'video',
'id': video_id,
'ext': 'mp4',
'url': flashVars['videoURL'][0],
'title': flashVars['title'][0],
'description': clean_html(flashVars['desc'][0]),
'thumbnail': flashVars['preview'][0],
}
return info

View File

@ -0,0 +1,49 @@
import re
import json
from .common import InfoExtractor
from ..utils import unified_strdate, determine_ext
class RoxwelIE(InfoExtractor):
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
_TEST = {
u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
u'file': u'passionpittakeawalklive.flv',
u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
u'info_dict': {
u'title': u'Take A Walk (live)',
u'uploader': u'Passion Pit',
u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
},
u'skip': u'Requires rtmpdump',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
filename = mobj.group('filename')
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
info_page = self._download_webpage(info_url, filename,
u'Downloading video info')
self.report_extraction(filename)
info = json.loads(info_page)
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
best_rate = rtmp_rates[-1]
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
ext = determine_ext(rtmp_url)
if ext == 'f4v':
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
return {'id': filename,
'title': info['title'],
'url': rtmp_url,
'ext': 'flv',
'description': info['description'],
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
'uploader': info['artist'],
'uploader_id': info['artistname'],
'upload_date': unified_strdate(info['dbdate']),
}

View File

@ -0,0 +1,126 @@
# encoding: utf-8
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError,
)
class RTLnowIE(InfoExtractor):
"""Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
u'info_dict': {
u'upload_date': u'20070416',
u'title': u'Ahornallee - Folge 1 - Der Einzug',
u'description': u'Folge 1 - Der Einzug',
},
u'params': {
u'skip_download': True,
},
u'skip': u'Only works from Germany',
},
{
u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
u'file': u'69756.flv',
u'info_dict': {
u'upload_date': u'20120519',
u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
},
u'params': {
u'skip_download': True,
},
u'skip': u'Only works from Germany',
},
{
u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
u'file': u'13883.flv',
u'info_dict': {
u'upload_date': u'20090627',
u'title': u'Voxtours - Südafrika-Reporter II',
u'description': u'Südafrika-Reporter II',
},
u'params': {
u'skip_download': True,
},
},
{
u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
u'file': u'99205.flv',
u'info_dict': {
u'upload_date': u'20080928',
u'title': u'Medicopter 117 - Angst!',
u'description': u'Angst!',
u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
},
u'params': {
u'skip_download': True,
},
}]
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
webpage_url = u'http://' + mobj.group('url')
video_page_url = u'http://' + mobj.group('base_url')
video_id = mobj.group(u'video_id')
webpage = self._download_webpage(webpage_url, video_id)
note_m = re.search(r'''(?sx)
<div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
<div[ ]id="playerteaser">''', webpage)
if note_m:
msg = clean_html(note_m.group(1))
raise ExtractorError(msg)
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
webpage, u'title')
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
webpage, u'playerdata_url')
playerdata = self._download_webpage(playerdata_url, video_id)
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
if mobj:
video_description = mobj.group(u'description')
if mobj.group('upload_date_Y'):
video_upload_date = mobj.group('upload_date_Y')
else:
video_upload_date = u'20' + mobj.group('upload_date_y')
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
else:
video_description = None
video_upload_date = None
self._downloader.report_warning(u'Unable to extract description and upload date')
# Thumbnail: not every video has an thumbnail
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
if mobj:
video_thumbnail = mobj.group(u'thumbnail')
else:
video_thumbnail = None
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
video_url = mobj.group(u'url')
video_play_path = u'mp4:' + mobj.group(u'play_path')
video_player_url = video_page_url + u'includes/vodplayer.swf'
return [{
'id': video_id,
'url': video_url,
'play_path': video_play_path,
'page_url': video_page_url,
'player_url': video_player_url,
'ext': 'flv',
'title': video_title,
'description': video_description,
'upload_date': video_upload_date,
'thumbnail': video_thumbnail,
}]

View File

@ -0,0 +1,23 @@
import re
from .common import InfoExtractor
class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
u'info_dict': {
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
return self.url_result(ooyala_url, 'Ooyala')

View File

@ -0,0 +1,90 @@
# encoding: utf-8
import json
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class SohuIE(InfoExtractor):
_VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
_TEST = {
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
u'file': u'382479172.mp4',
u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
u'info_dict': {
u'title': u'MVFar East Movement《The Illest》',
},
}
def _real_extract(self, url):
def _fetch_data(vid_id):
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id)
data_json = self._download_webpage(
data_url, video_id,
note=u'Downloading JSON data for ' + str(vid_id))
return json.loads(data_json)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
webpage, u'video title')
title = raw_title.partition('-')[0].strip()
vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
u'video path')
data = _fetch_data(vid)
QUALITIES = ('ori', 'super', 'high', 'nor')
vid_ids = [data['data'][q + 'Vid']
for q in QUALITIES
if data['data'][q + 'Vid'] != 0]
if not vid_ids:
raise ExtractorError(u'No formats available for this video')
# For now, we just pick the highest available quality
vid_id = vid_ids[-1]
format_data = data if vid == vid_id else _fetch_data(vid_id)
part_count = format_data['data']['totalBlocks']
allot = format_data['allot']
prot = format_data['prot']
clipsURL = format_data['data']['clipsURL']
su = format_data['data']['su']
playlist = []
for i in range(part_count):
part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
(allot, prot, clipsURL[i], su[i]))
part_str = self._download_webpage(
part_url, video_id,
note=u'Downloading part %d of %d' % (i+1, part_count))
part_info = part_str.split('|')
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
video_info = {
'id': '%s_part%02d' % (video_id, i + 1),
'title': title,
'url': video_url,
'ext': 'mp4',
}
playlist.append(video_info)
if len(playlist) == 1:
info = playlist[0]
info['id'] = video_id
else:
info = {
'_type': 'playlist',
'entries': playlist,
'id': video_id,
}
return info

View File

@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urlparse,
ExtractorError,
unified_strdate,
@ -22,6 +23,7 @@ class SoundcloudIE(InfoExtractor):
_VALID_URL = r'''^(?:https?://)?
(?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$)
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|(?P<widget>w.soundcloud.com/player/?.*?url=.*)
)
'''
IE_NAME = u'soundcloud'
@ -79,6 +81,9 @@ class SoundcloudIE(InfoExtractor):
if track_id is not None:
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
full_title = track_id
elif mobj.group('widget'):
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
return self.url_result(query['url'][0], ie='Soundcloud')
else:
# extract uploader (which is in the url)
uploader = mobj.group(1)

View File

@ -5,13 +5,13 @@ from .common import InfoExtractor
class StatigramIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)'
_TEST = {
u'url': u'http://statigr.am/p/484091715184808010_284179915',
u'file': u'484091715184808010_284179915.mp4',
u'md5': u'deda4ff333abe2e118740321e992605b',
u'url': u'http://statigr.am/p/522207370455279102_24101272',
u'file': u'522207370455279102_24101272.mp4',
u'md5': u'6eb93b882a3ded7c378ee1d6884b1814',
u'info_dict': {
u"uploader_id": u"videoseconds",
u"title": u"Instagram photo by @videoseconds"
}
u'uploader_id': u'aguynamedpatrick',
u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
},
}
def _real_extract(self, url):

View File

@ -33,7 +33,7 @@ class TeamcocoIE(InfoExtractor):
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_webpage(data_url, video_id, 'Downloading data webpage')
video_url = self._html_search_regex(r'<file type="high".*?>(.*?)</file>',
video_url = self._html_search_regex(r'<file [^>]*type="high".*?>(.*?)</file>',
data, u'video URL')
return [{

View File

@ -6,20 +6,17 @@ import re
from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""
TF1 uses the wat.tv player, currently it can only download videos with the
html5 player enabled, it cannot download HD videos.
"""
_WORKING = False
"""TF1 uses the wat.tv player."""
_VALID_URL = r'http://videos.tf1.fr/.*-(.*?).html'
_TEST = {
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
u'file': u'10635995.mp4',
u'md5': u'66789d3e91278d332f75e1feb7aea327',
u'md5': u'2e378cc28b9957607d5e88f274e637d8',
u'info_dict': {
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
}
},
u'skip': u'Sometimes wat serves the whole file with the --test option',
}
def _real_extract(self, url):

View File

@ -0,0 +1,73 @@
import json
import re
import xml.etree.ElementTree
from .common import InfoExtractor
class TriluliluIE(InfoExtractor):
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
_TEST = {
u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
u'file': u"big-buck-bunny-1.mp4",
u'info_dict': {
u"title": u"Big Buck Bunny",
u"description": u":) pentru copilul din noi",
},
# Server ignores Range headers (--test)
u"params": {
u"skip_download": True
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage)
log_str = self._search_regex(
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
log = json.loads(log_str)
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
u'video-formats2' % log)
format_str = self._download_webpage(
format_url, video_id,
note=u'Downloading formats',
errnote=u'Error while downloading formats')
format_doc = xml.etree.ElementTree.fromstring(format_str)
video_url_template = (
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
u'&source=site&hash=%(hash)s&username=%(userid)s&'
u'key=ministhebest&format=%%s&sig=&exp=' %
log)
formats = [
{
'format': fnode.text,
'url': video_url_template % fnode.text,
}
for fnode in format_doc.findall('./formats/format')
]
info = {
'_type': 'video',
'id': video_id,
'formats': formats,
'title': title,
'description': description,
'thumbnail': thumbnail,
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = formats[-1]['format'].partition('-')[0]
return info

View File

@ -0,0 +1,32 @@
import re
from .common import InfoExtractor
class UnistraIE(InfoExtractor):
_VALID_URL = r'http://utv.unistra.fr/(?:index|video).php\?id_video\=(\d+)'
_TEST = {
u'url': u'http://utv.unistra.fr/video.php?id_video=154',
u'file': u'154.mp4',
u'md5': u'736f605cfdc96724d55bb543ab3ced24',
u'info_dict': {
u'title': u'M!ss Yella',
u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
},
}
def _real_extract(self, url):
id = re.match(self._VALID_URL, url).group(1)
webpage = self._download_webpage(url, id)
file = re.search(r'file: "(.*?)",', webpage).group(1)
title = self._html_search_regex(r'<title>UTV - (.*?)</', webpage, u'title')
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file
return {'id': id,
'title': title,
'ext': 'mp4',
'url': video_url,
'description': self._html_search_regex(r'<meta name="Description" content="(.*?)"', webpage, u'description', flags=re.DOTALL),
'thumbnail': self._search_regex(r'image: "(.*?)"', webpage, u'thumbnail'),
}

View File

@ -0,0 +1,56 @@
import re
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
get_element_by_id,
clean_html,
)
class VeeHDIE(InfoExtractor):
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
_TEST = {
u'url': u'http://veehd.com/video/4686958',
u'file': u'4686958.mp4',
u'info_dict': {
u'title': u'Time Lapse View from Space ( ISS)',
u'uploader_id': u'spotted',
u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
webpage, u'player path')
player_url = compat_urlparse.urljoin(url, player_path)
player_page = self._download_webpage(player_url, video_id,
u'Downloading player page')
config_json = self._search_regex(r'value=\'config=({.+?})\'',
player_page, u'config json')
config = json.loads(config_json)
video_url = compat_urlparse.unquote(config['clip']['url'])
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
webpage, u'uploader')
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
webpage, u'thumbnail')
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
webpage, u'description', flags=re.DOTALL)
return {
'_type': 'video',
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'description': description,
}

View File

@ -8,10 +8,10 @@ from ..utils import (
class VevoIE(InfoExtractor):
"""
Accecps urls from vevo.com or in the format 'vevo:{id}'
Accepts urls from vevo.com or in the format 'vevo:{id}'
(currently used by MTVIE)
"""
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$'
_VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*?)(\?|$)'
_TEST = {
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
u'file': u'GB1101300280.mp4',
@ -19,7 +19,7 @@ class VevoIE(InfoExtractor):
u'info_dict': {
u"upload_date": u"20130624",
u"uploader": u"Hurts",
u"title": u"Somebody To Die For"
u"title": u"Somebody to Die For"
}
}
@ -35,12 +35,12 @@ class VevoIE(InfoExtractor):
self.report_extraction(video_id)
video_info = json.loads(info_json)
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):(?P<url>.*?)"', links_webpage))
m_urls = list(re.finditer(r'<video src="(?P<ext>.*?):/?(?P<url>.*?)"', links_webpage))
if m_urls is None or len(m_urls) == 0:
raise ExtractorError(u'Unable to extract video url')
# They are sorted from worst to best quality
m_url = m_urls[-1]
video_url = base_url + m_url.group('url')
video_url = base_url + '/' + m_url.group('url')
ext = m_url.group('ext')
return {'url': video_url,

View File

@ -0,0 +1,48 @@
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
find_xpath_attr,
determine_ext,
)
class VideofyMeIE(InfoExtractor):
_VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
IE_NAME = u'videofy.me'
_TEST = {
u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
u'file': u'1100701.mp4',
u'md5': u'c77d700bdc16ae2e9f3c26019bd96143',
u'info_dict': {
u'title': u'This is VideofyMe',
u'description': None,
u'uploader': u'VideofyMe',
u'uploader_id': u'thisisvideofyme',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
video_id)
config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
video = config.find('video')
sources = video.find('sources')
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
for key in ['on', 'av', 'off']] if node is not None)
video_url = url_node.find('url').text
return {'id': video_id,
'title': video.find('title').text,
'url': video_url,
'ext': determine_ext(video_url),
'thumbnail': video.find('thumb').text,
'description': video.find('description').text,
'uploader': config.find('blog/name').text,
'uploader_id': video.find('identifier').text,
'view_count': re.search(r'\d+', video.find('views').text).group(),
}

View File

@ -1,5 +1,6 @@
import json
import re
import itertools
from .common import InfoExtractor
from ..utils import (
@ -19,7 +20,8 @@ class VimeoIE(InfoExtractor):
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$'
_NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo'
_TEST = {
_TESTS = [
{
u'url': u'http://vimeo.com/56015672',
u'file': u'56015672.mp4',
u'md5': u'8879b6cc097e987f02484baf890129e5',
@ -28,9 +30,31 @@ class VimeoIE(InfoExtractor):
u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
u"uploader_id": u"user7108434",
u"uploader": u"Filippo Valsorda",
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550"
}
}
u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
},
},
{
u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
u'file': u'68093876.mp4',
u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82',
u'note': u'Vimeo Pro video (#1197)',
u'info_dict': {
u'uploader_id': u'openstreetmapus',
u'uploader': u'OpenStreetMap US',
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
},
},
{
u'url': u'http://player.vimeo.com/video/54469442',
u'file': u'54469442.mp4',
u'md5': u'619b811a4417aa4abe78dc653becf511',
u'note': u'Videos that embed the url in the player page',
u'info_dict': {
u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
u'uploader': u'The BLN & Business of Software',
},
},
]
def _login(self):
(username, password) = self._get_login_info()
@ -82,7 +106,9 @@ class VimeoIE(InfoExtractor):
video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
if mobj.group('direct_link') or mobj.group('pro'):
elif mobj.group('pro'):
url = 'http://player.vimeo.com/video/' + video_id
elif mobj.group('direct_link'):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
@ -96,7 +122,8 @@ class VimeoIE(InfoExtractor):
# Extract the config JSON
try:
config = webpage.split(' = {config:')[1].split(',assets:')[0]
config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
webpage, u'info section', flags=re.DOTALL)
config = json.loads(config)
except:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@ -116,12 +143,22 @@ class VimeoIE(InfoExtractor):
video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
# Extract video thumbnail
video_thumbnail = config["video"]["thumbnail"]
video_thumbnail = config["video"].get("thumbnail")
if video_thumbnail is None:
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
# Extract video description
video_description = None
try:
video_description = get_element_by_attribute("itemprop", "description", webpage)
if video_description: video_description = clean_html(video_description)
else: video_description = u''
except AssertionError as err:
# On some pages like (http://player.vimeo.com/video/54469442) the
# html tags are not closed, python 2.6 cannot handle it
if err.args[0] == 'we should not get here!':
pass
else:
raise
# Extract upload date
video_upload_date = None
@ -138,14 +175,15 @@ class VimeoIE(InfoExtractor):
# TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
files = { 'hd': [], 'sd': [], 'other': []}
config_files = config["video"].get("files") or config["request"].get("files")
for codec_name, codec_extension in codecs:
if codec_name in config["video"]["files"]:
if 'hd' in config["video"]["files"][codec_name]:
if codec_name in config_files:
if 'hd' in config_files[codec_name]:
files['hd'].append((codec_name, codec_extension, 'hd'))
elif 'sd' in config["video"]["files"][codec_name]:
elif 'sd' in config_files[codec_name]:
files['sd'].append((codec_name, codec_extension, 'sd'))
else:
files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
for quality in ('hd', 'sd', 'other'):
if len(files[quality]) > 0:
@ -157,6 +195,10 @@ class VimeoIE(InfoExtractor):
else:
raise ExtractorError(u'No known codec found')
video_url = None
if isinstance(config_files[video_codec], dict):
video_url = config_files[video_codec][video_quality].get("url")
if video_url is None:
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
%(video_id, sig, timestamp, video_quality, video_codec.upper())
@ -171,3 +213,31 @@ class VimeoIE(InfoExtractor):
'thumbnail': video_thumbnail,
'description': video_description,
}]
class VimeoChannelIE(InfoExtractor):
IE_NAME = u'vimeo:channel'
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
channel_id, u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
webpage, u'channel title')
return {'_type': 'playlist',
'id': channel_id,
'title': channel_title,
'entries': entries,
}

View File

@ -6,23 +6,22 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
unified_strdate,
)
class WatIE(InfoExtractor):
_WORKING = False
_VALID_URL=r'http://www.wat.tv/.*-(?P<shortID>.*?)_.*?.html'
IE_NAME = 'wat.tv'
_TEST = {
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
u'file': u'10631273.mp4',
u'md5': u'0a4fe7870f31eaeabb5e25fd8da8414a',
u'md5': u'd8b2231e1e333acd12aad94b80937e19',
u'info_dict': {
u'title': u'World War Z - Philadelphia VOST',
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
}
},
u'skip': u'Sometimes wat serves the whole file with the --test option',
}
def download_video_info(self, real_id):
@ -59,20 +58,8 @@ class WatIE(InfoExtractor):
# Otherwise we can continue and extract just one part, we have to use
# the short id for getting the video url
player_data = compat_urllib_parse.urlencode({'shortVideoId': short_id,
'html5': '1'})
player_info = self._download_webpage('http://www.wat.tv/player?' + player_data,
real_id, u'Downloading player info')
player = json.loads(player_info)['player']
html5_player = self._html_search_regex(r'iframe src="(.*?)"', player,
'html5 player')
player_webpage = self._download_webpage(html5_player, real_id,
u'Downloading player webpage')
video_url = self._search_regex(r'urlhtml5 : "(.*?)"', player_webpage,
'video url')
info = {'id': real_id,
'url': video_url,
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
'ext': 'mp4',
'title': first_chapter['title'],
'thumbnail': first_chapter['preview'],

View File

@ -21,6 +21,13 @@ class WorldStarHipHopIE(InfoExtractor):
webpage_src = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?',
webpage_src)
if m_vevo_id is not None:
self.to_screen(u'Vevo video detected:')
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
video_url = self._search_regex(r'so\.addVariable\("file","(.*?)"\)',
webpage_src, u'video URL')

View File

@ -3,7 +3,8 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
unescapeHTML,
determine_ext,
ExtractorError,
)
@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
video_url = compat_urllib_parse.unquote(mobj.group('file'))
else:
video_url = mobj.group('server')+'/key='+mobj.group('file')
video_extension = video_url.split('.')[-1]
video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
webpage, u'title')
# Can't see the description anywhere in the UI
# video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
# webpage, u'description', fatal=False)
# if video_description: video_description = unescapeHTML(video_description)
# Only a few videos have an description
mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
if mobj:
video_description = unescapeHTML(mobj.group('description'))
else:
video_description = None
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
if mobj:
@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
return [{
'id': video_id,
'url': video_url,
'ext': video_extension,
'ext': determine_ext(video_url),
'title': video_title,
# 'description': video_description,
'description': video_description,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail

View File

@ -12,14 +12,16 @@ from ..utils import (
unescapeHTML,
unified_strdate,
)
from ..aes import (
aes_decrypt_text
)
class YouPornIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
_TEST = {
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
u'file': u'505835.mp4',
u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
u'info_dict': {
u"upload_date": u"20101221",
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
# Get all of the links from the page
LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
links = re.findall(LINK_RE, download_list_html)
if(len(links) == 0):
# Get link of hd video if available
mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
if mobj != None:
encrypted_video_url = mobj.group(u'encrypted_video_url')
video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
links = [video_url] + links
if not links:
raise ExtractorError(u'ERROR: no known formats available for video')
self.to_screen(u'Links found: %d' % len(links))
@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
self._print_formats(formats)
return
req_format = self._downloader.params.get('format', None)
req_format = self._downloader.params.get('format', 'best')
self.to_screen(u'Format: %s' % req_format)
if req_format is None or req_format == 'best':

View File

@ -135,37 +135,107 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_VALID_URL = r"""^
(
(?:https?://)? # http(s):// (optional)
(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|(?: # or the v= param in all its forms
(?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #!
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
v=
)
)? # optional -> youtube.com/xxxx is OK
))
|youtu\.be/ # just youtu.be/xxxx
)
)? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
$"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
# Listed in order of quality
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '84', '102', '83', '101', '82', '100',
# Dash video
'138', '137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '171', '139',
]
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
# Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '102', '84', '101', '83', '100', '82',
# Dash video
'138', '248', '137', '247', '136', '246', '245',
'244', '135', '243', '134', '242', '133', '160',
# Dash audio
'172', '141', '171', '140', '139',
]
_video_formats_map = {
'flv': ['35', '34', '6', '5'],
'3gp': ['36', '17', '13'],
'mp4': ['38', '37', '22', '18'],
'webm': ['46', '45', '44', '43'],
}
_video_extensions = {
'13': '3gp',
'17': 'mp4',
'17': '3gp',
'18': 'mp4',
'22': 'mp4',
'36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
'44': 'webm',
'45': 'webm',
'46': 'webm',
# 3d videos
'82': 'mp4',
'83': 'mp4',
'84': 'mp4',
'85': 'mp4',
'100': 'webm',
'101': 'webm',
'102': 'webm',
# Apple HTTP Live Streaming
'92': 'mp4',
'93': 'mp4',
'94': 'mp4',
'95': 'mp4',
'96': 'mp4',
'132': 'mp4',
'151': 'mp4',
# Dash mp4
'133': 'mp4',
'134': 'mp4',
'135': 'mp4',
'136': 'mp4',
'137': 'mp4',
'138': 'mp4',
'139': 'mp4',
'140': 'mp4',
'141': 'mp4',
'160': 'mp4',
# Dash webm
'171': 'webm',
'172': 'webm',
'242': 'webm',
'243': 'webm',
'244': 'webm',
'245': 'webm',
'246': 'webm',
'247': 'webm',
'248': 'webm',
}
_video_dimensions = {
'5': '240x400',
@ -176,13 +246,76 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'22': '720x1280',
'34': '360x640',
'35': '480x854',
'36': '240x320',
'37': '1080x1920',
'38': '3072x4096',
'43': '360x640',
'44': '480x854',
'45': '720x1280',
'46': '1080x1920',
'82': '360p',
'83': '480p',
'84': '720p',
'85': '1080p',
'92': '240p',
'93': '360p',
'94': '480p',
'95': '720p',
'96': '1080p',
'100': '360p',
'101': '480p',
'102': '720p',
'132': '240p',
'151': '72p',
'133': '240p',
'134': '360p',
'135': '480p',
'136': '720p',
'137': '1080p',
'138': '>1080p',
'139': '48k',
'140': '128k',
'141': '256k',
'160': '192p',
'171': '128k',
'172': '256k',
'242': '240p',
'243': '360p',
'244': '480p',
'245': '480p',
'246': '480p',
'247': '720p',
'248': '1080p',
}
_special_itags = {
'82': '3D',
'83': '3D',
'84': '3D',
'85': '3D',
'100': '3D',
'101': '3D',
'102': '3D',
'133': 'DASH Video',
'134': 'DASH Video',
'135': 'DASH Video',
'136': 'DASH Video',
'137': 'DASH Video',
'138': 'DASH Video',
'139': 'DASH Audio',
'140': 'DASH Audio',
'141': 'DASH Audio',
'160': 'DASH Video',
'171': 'DASH Audio',
'172': 'DASH Audio',
'242': 'DASH Video',
'243': 'DASH Video',
'244': 'DASH Video',
'245': 'DASH Video',
'246': 'DASH Video',
'247': 'DASH Video',
'248': 'DASH Video',
}
IE_NAME = u'youtube'
_TESTS = [
{
@ -215,8 +348,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
u"info_dict": {
u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
u"uploader": u"IconaPop",
u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop"
}
},
@ -232,6 +365,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
u"uploader_id": u"justintimberlakeVEVO"
}
},
{
u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
u'file': u'TGi3HqYrWHE.mp4',
u'note': u'm3u8 video',
u'info_dict': {
u'title': u'Triathlon - Men - London 2012 Olympic Games',
u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
u'uploader': u'olympic',
u'upload_date': u'20120807',
u'uploader_id': u'olympic',
},
u'params': {
u'skip_download': True,
},
},
]
@ -281,37 +429,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
elif len(s) == 89:
return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
elif len(s) == 88:
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
elif len(s) == 87:
return s[4:23] + s[86] + s[24:85]
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
elif len(s) == 85:
return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
elif len(s) == 84:
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83:
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:]
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82:
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
elif len(s) == 81:
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
elif len(s) == 80:
return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
elif len(s) == 79:
return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _decrypt_signature_age_gate(self, s):
# The videos with age protection use another player, so the algorithms
# can be different.
if len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
else:
# Fallback to the other algortihms
return self._decrypt_signature(s)
def _get_available_subtitles(self, video_id):
self.report_video_subtitles_download(video_id)
request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'unable to download video subtitles: %s' % compat_str(err), None)
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {}
sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
if not sub_lang_list:
return (u'video doesn\'t have subtitles', None)
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}
return sub_lang_list
def _list_available_subtitles(self, video_id):
@ -320,8 +486,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _request_subtitle(self, sub_lang, sub_name, video_id, format):
"""
Return tuple:
(error_message, sub_lang, sub)
Return the subtitle as a string or None if they are not found
"""
self.report_video_subtitles_request(video_id, sub_lang, format)
params = compat_urllib_parse.urlencode({
@ -334,21 +499,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub:
return (u'Did not fetch video subtitles', None, None)
return (None, sub_lang, sub)
self._downloader.report_warning(u'Did not fetch video subtitles')
return
return sub
def _request_automatic_caption(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
sub_lang = self._downloader.params.get('subtitleslang') or 'en'
sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
sub_format = self._downloader.params.get('subtitlesformat')
self.to_screen(u'%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
if mobj is None:
return [(err_msg, None, None)]
self._downloader.report_warning(err_msg)
return {}
player_config = json.loads(mobj.group(1))
try:
args = player_config[u'args']
@ -363,46 +531,51 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
})
subtitles_url = caption_url + '&' + params
sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
return [(None, sub_lang, sub)]
except KeyError:
return [(err_msg, None, None)]
return {sub_lang: sub}
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
except (KeyError, ExtractorError):
self._downloader.report_warning(err_msg)
return {}
def _extract_subtitle(self, video_id):
def _extract_subtitles(self, video_id):
"""
Return a list with a tuple:
[(error_message, sub_lang, sub)]
Return a dictionary: {language: subtitles} or {} if the subtitles
couldn't be found
"""
sub_lang_list = self._get_available_subtitles(video_id)
available_subs_list = self._get_available_subtitles(video_id)
sub_format = self._downloader.params.get('subtitlesformat')
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
return [(sub_lang_list[0], None, None)]
if self._downloader.params.get('subtitleslang', False):
sub_lang = self._downloader.params.get('subtitleslang')
elif 'en' in sub_lang_list:
sub_lang = 'en'
if not available_subs_list: #There was some error, it didn't get the available subtitles
return {}
if self._downloader.params.get('allsubtitles', False):
sub_lang_list = available_subs_list
else:
sub_lang = list(sub_lang_list.keys())[0]
if not sub_lang in sub_lang_list:
return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
if self._downloader.params.get('subtitleslangs', False):
reqested_langs = self._downloader.params.get('subtitleslangs')
elif 'en' in available_subs_list:
reqested_langs = ['en']
else:
reqested_langs = [list(available_subs_list.keys())[0]]
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
return [subtitle]
def _extract_all_subtitles(self, video_id):
sub_lang_list = self._get_available_subtitles(video_id)
sub_format = self._downloader.params.get('subtitlesformat')
if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
return [(sub_lang_list[0], None, None)]
subtitles = []
sub_lang_list = {}
for sub_lang in reqested_langs:
if not sub_lang in available_subs_list:
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
continue
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
subtitles = {}
for sub_lang in sub_lang_list:
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
subtitles.append(subtitle)
if subtitle:
subtitles[sub_lang] = subtitle
return subtitles
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
self._video_dimensions.get(x, '???'),
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
def _extract_id(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -411,6 +584,69 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = mobj.group(2)
return video_id
def _get_video_url_list(self, url_map):
"""
Transform a dictionary in the format {itag:url} to a list of (itag, url)
with the requested formats.
"""
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
if format_limit is not None and format_limit in available_formats:
format_list = available_formats[available_formats.index(format_limit):]
else:
format_list = available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video')
if self._downloader.params.get('listformats', None):
self._print_formats(existing_formats)
return
if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
# available in the specified format. For example,
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
if rf in self._video_formats_map:
for srf in self._video_formats_map[rf]:
if srf in url_map:
video_url_list = [(srf, url_map[srf])]
break
else:
continue
break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
return video_url_list
def _extract_from_m3u8(self, manifest_url, video_id):
url_map = {}
def _get_urls(_manifest):
lines = _manifest.split('\n')
urls = filter(lambda l: l and not l.startswith('#'),
lines)
return urls
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
formats_urls = _get_urls(manifest)
for format_url in formats_urls:
itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
url_map[itag] = format_url
return url_map
def _real_extract(self, url):
if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
@ -534,25 +770,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# subtitles
video_subtitles = None
if self._downloader.params.get('writesubtitles', False):
video_subtitles = self._extract_subtitle(video_id)
if video_subtitles:
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('writeautomaticsub', False):
if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_subtitles(video_id)
elif self._downloader.params.get('writeautomaticsub', False):
video_subtitles = self._request_automatic_caption(video_id, video_webpage)
(sub_error, sub_lang, sub) = video_subtitles[0]
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_all_subtitles(video_id)
for video_subtitle in video_subtitles:
(sub_error, sub_lang, sub) = video_subtitle
if sub_error:
self._downloader.report_warning(sub_error)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
@ -565,7 +786,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
try:
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
@ -579,6 +799,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if m_s is not None:
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
if m_s is not None:
if 'url_encoded_fmt_stream_map' in video_info:
video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
else:
video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
elif 'adaptive_fmts' in video_info:
if 'url_encoded_fmt_stream_map' in video_info:
video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
else:
video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
except ValueError:
pass
@ -600,8 +831,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
s = url_data['s'][0]
if age_gate:
player_version = self._search_regex(r'ad3-(.+?)\.swf',
video_info['ad3_module'][0], 'flash player',
fatal=False)
video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
'flash player', fatal=False)
player = 'flash player %s' % player_version
else:
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
@ -609,41 +840,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(s), parts_sizes, url_data['itag'][0], player))
signature = self._decrypt_signature(url_data['s'][0])
encrypted_sig = url_data['s'][0]
if age_gate:
signature = self._decrypt_signature_age_gate(encrypted_sig)
else:
signature = self._decrypt_signature(encrypted_sig)
url += '&signature=' + signature
if 'ratebypass' not in url:
url += '&ratebypass=yes'
url_map[url_data['itag'][0]] = url
format_limit = self._downloader.params.get('format_limit', None)
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
if format_limit is not None and format_limit in available_formats:
format_list = available_formats[available_formats.index(format_limit):]
else:
format_list = available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
raise ExtractorError(u'no known formats available for video')
if self._downloader.params.get('listformats', None):
self._print_formats(existing_formats)
video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
if req_format is None or req_format == 'best':
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
elif req_format in ('-1', 'all'):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
# For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
url_map = self._extract_from_m3u8(manifest_url, video_id)
video_url_list = self._get_video_url_list(url_map)
if not video_url_list:
return
else:
raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
@ -652,8 +867,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
video_format = '{0} - {1}'.format(format_param if format_param else video_extension,
self._video_dimensions.get(format_param, '???'))
video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
self._video_dimensions.get(format_param, '???'),
' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
results.append({
'id': video_id,
@ -705,10 +921,9 @@ class YoutubePlaylistIE(InfoExtractor):
# Download playlist videos from API
playlist_id = mobj.group(1) or mobj.group(2)
page_num = 1
videos = []
while True:
for page_num in itertools.count(1):
start_index = self._MAX_RESULTS * (page_num - 1) + 1
if start_index >= 1000:
self._downloader.report_warning(u'Max number of results reached')
@ -730,9 +945,11 @@ class YoutubePlaylistIE(InfoExtractor):
for entry in response['feed']['entry']:
index = entry['yt$position']['$t']
if 'media$group' in entry and 'media$player' in entry['media$group']:
videos.append((index, entry['media$group']['media$player']['url']))
page_num += 1
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
videos.append((
index,
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
))
videos = [v[1] for v in sorted(videos)]
@ -776,9 +993,7 @@ class YoutubeChannelIE(InfoExtractor):
# Download any subsequent channel pages using the json-based channel_ajax query
if self._MORE_PAGES_INDICATOR in page:
while True:
pagenum = pagenum + 1
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_webpage(url, channel_id,
u'Downloading page #%s' % pagenum)
@ -800,13 +1015,16 @@ class YoutubeChannelIE(InfoExtractor):
class YoutubeUserIE(InfoExtractor):
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
IE_NAME = u'youtube:user'
def suitable(cls, url):
if YoutubeIE.suitable(url): return False
else: return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url):
# Extract username
mobj = re.match(self._VALID_URL, url)
@ -821,22 +1039,23 @@ class YoutubeUserIE(InfoExtractor):
# all of them.
video_ids = []
pagenum = 0
while True:
for pagenum in itertools.count(0):
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
page = self._download_webpage(gdata_url, username,
u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
try:
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
# Extract video identifiers
ids_in_page = []
for mobj in re.finditer(self._VIDEO_INDICATOR, page):
if mobj.group(1) not in ids_in_page:
ids_in_page.append(mobj.group(1))
for entry in response['feed']['entry']:
ids_in_page.append(entry['id']['$t'].split('/')[-1])
video_ids.extend(ids_in_page)
# A little optimization - if current page is not
@ -848,8 +1067,6 @@ class YoutubeUserIE(InfoExtractor):
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break
pagenum += 1
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
return [self.playlist_result(url_results, playlist_title = username)]
@ -920,10 +1137,15 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
"""
_LOGIN_REQUIRED = True
_PAGING_STEP = 30
# use action_load_personal_feed instead of action_load_system_feed
_PERSONAL_FEED = False
@property
def _FEED_TEMPLATE(self):
return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
action = 'action_load_system_feed'
if self._PERSONAL_FEED:
action = 'action_load_personal_feed'
return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
@property
def IE_NAME(self):
@ -942,7 +1164,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
u'Downloading page %s' % i)
info = json.loads(info)
feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
if info['paging'] is None:
@ -961,11 +1183,18 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = u'Youtube Recommended videos'
class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
_FEED_NAME = 'watch_later'
_PLAYLIST_TITLE = u'Youtube Watch Later'
_PAGING_STEP = 100
_PERSONAL_FEED = True
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
_VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
_LOGIN_REQUIRED = True
def _real_extract(self, url):

View File

@ -1,19 +1,20 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import email.utils
import errno
import gzip
import io
import json
import locale
import os
import platform
import re
import socket
import sys
import traceback
import zlib
import email.utils
import socket
import datetime
try:
import urllib.request as compat_urllib_request
@ -60,6 +61,11 @@ try:
except ImportError: # Python 2
import httplib as compat_http_client
try:
from urllib.error import HTTPError as compat_HTTPError
except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError
try:
from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL
@ -207,7 +213,7 @@ if sys.version_info >= (2,7):
def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z]+$', key)
assert re.match(r'^[a-zA-Z@]*$', val)
assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr)
else:
@ -489,7 +495,7 @@ def make_HTTPS_handler(opts):
class ExtractorError(Exception):
"""Error during info extraction."""
def __init__(self, msg, tb=None, expected=False):
def __init__(self, msg, tb=None, expected=False, cause=None):
""" tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
"""
@ -497,11 +503,12 @@ class ExtractorError(Exception):
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True
if not expected:
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.'
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
super(ExtractorError, self).__init__(msg)
self.traceback = tb
self.exc_info = sys.exc_info() # preserve original exception
self.cause = cause
def format_traceback(self):
if self.traceback is None:
@ -622,8 +629,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
old_resp = resp
# gzip
if resp.headers.get('Content-encoding', '') == 'gzip':
gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
content = resp.read()
gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
try:
uncompressed = io.BytesIO(gz.read())
except IOError as original_ioerror:
# There may be junk add the end of the file
# See http://stackoverflow.com/q/4928560/35070 for details
for i in range(1, 1024):
try:
gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
uncompressed = io.BytesIO(gz.read())
except IOError:
continue
break
else:
raise original_ioerror
resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
@ -657,6 +679,9 @@ def determine_ext(url, default_ext=u'unknown_video'):
else:
return default_ext
def subtitles_filename(filename, sub_lang, sub_format):
return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
def date_from_str(date_str):
"""
Return a datetime object from a string in the format YYYYMMDD or
@ -708,3 +733,31 @@ class DateRange(object):
return self.start <= date <= self.end
def __str__(self):
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
def platform_name():
""" Returns the platform name as a compat_str """
res = platform.platform()
if isinstance(res, bytes):
res = res.decode(preferredencoding())
assert isinstance(res, compat_str)
return res
def bytes_to_intlist(bs):
if not bs:
return []
if isinstance(bs[0], int): # Python 3
return list(bs)
else:
return [ord(c) for c in bs]
def intlist_to_bytes(xs):
if not xs:
return b''
if isinstance(chr(0), bytes): # Python 2
return ''.join([chr(x) for x in xs])
else:
return bytes(xs)

View File

@ -1,2 +1,2 @@
__version__ = '2013.07.24.2'
__version__ = '2013.09.06.1'