Compare commits

..

120 Commits

Author SHA1 Message Date
7e5e8306fd release 2013.10.06 2013-10-06 07:13:14 +02:00
41e8bca4d0 [viddler] Add basic support (Fixes #1520) 2013-10-06 07:12:47 +02:00
f4aac741d5 Move try_rm to test helpers 2013-10-06 05:47:17 +02:00
226113c880 Merge remote-tracking branch 'origin/tox' 2013-10-05 22:47:44 +02:00
8932a66e49 [fixup] remove unnecessary commented function 2013-10-05 16:38:37 -04:00
79cfb46d42 add tox configuration file for easy testing 2013-10-05 16:08:48 -04:00
00fcc17aee add capability to suppress expected warnings in tests 2013-10-05 15:55:58 -04:00
e94b783c74 [googleplus] Fix upload_date detection 2013-10-05 16:38:33 +02:00
97dae9ae07 [bliptv] Make sure video ID is a string 2013-10-05 16:12:29 +02:00
c3fef636b5 [dailymotion] Fix playlist extraction
The html code has changed, make the video ids extraction more solid.
2013-10-04 14:07:29 +02:00
46e28a84ca [brightcove] Fix up some broken HTML (#1553) 2013-10-04 11:53:49 +02:00
17ad2b3fb1 [yahoo] Switch ext of test 2013-10-04 11:44:56 +02:00
5e2a60db4a [yahoo] Fix test title 2013-10-04 11:44:02 +02:00
cd214418f6 [redtube] pep8 2013-10-04 11:41:57 +02:00
ba2d9f213e [jeuxvideo] fix video file md5sum 2013-10-04 11:38:56 +02:00
7f8ae73a5d Include length in player cache ID
Some videos use the same player with IDs of multiple lengths.
See https://travis-ci.org/rg3/youtube-dl/jobs/12126506#L319 for an example.
2013-10-04 11:36:06 +02:00
466880f531 [yahoo] Do not try to run rtmpdump on travis 2013-10-04 11:34:12 +02:00
9f1f6d2437 [rtlnow] Skip test on travis 2013-10-04 11:33:14 +02:00
9e0f897f6b [francetv] Use common format for ID of generation-quoi subextractor 2013-10-04 11:30:47 +02:00
c0f6aa876f Merge remote-tracking branch 'origin/master' 2013-10-04 11:14:20 +02:00
d93bdee9a6 [comedycentral] Prepare for generic video extraction (#980) 2013-10-04 11:14:10 +02:00
f13d09332d [mtv] Prepare for #980 2013-10-04 11:10:04 +02:00
2f5865cc6d Clarify that url and ext are optional when formats is given (#980) 2013-10-04 11:09:43 +02:00
deefc05b88 Document formats (for #980) 2013-10-04 10:40:42 +02:00
0d8cb1cc14 [ted] Prepare #980 merge 2013-10-04 10:32:34 +02:00
a90b9fd209 Merge pull request #1551 from rzhxeo/flickr
[FlickrIE] Fix HTTPS url
2013-10-03 23:14:12 -07:00
829493439a [FlickrIE] Fix HTTPS url 2013-10-04 07:47:40 +02:00
3cd022f6e6 Merge remote-tracking branch 'rzhxeo/rtl_ntv' 2013-10-04 00:59:11 +02:00
abefd1f7c4 Merge remote-tracking branch 'rzhxeo/rtl_upload_date' 2013-10-04 00:58:35 +02:00
c21315f273 [youtube] new static 82 signature 2013-10-04 00:43:01 +02:00
9ab1018b1a release 2013.10.04 2013-10-04 00:38:19 +02:00
da0a5d2d6e [france2] Add support for URLs without video IDs (Fixes #1547) 2013-10-04 00:34:36 +02:00
ee6adb166c [ign] Support more urls and detect multiple videos in articles (fixes #1543) 2013-10-02 20:59:34 +02:00
be8fe32c92 Fix help of --cachedir 2013-10-02 14:37:19 +02:00
c38b1e776d [youtube] Simplify cache_dir code (#1529) 2013-10-02 08:41:14 +02:00
4f8bf17f23 Merge remote-tracking branch 'holomorph/master' 2013-10-02 08:23:53 +02:00
ca40186c75 [youtube] Fix static 82 signature (Closes #1539) 2013-10-02 08:20:00 +02:00
a8c6b24155 [youtube] Support videos without a title (Fixes #1391, Closes #1542) 2013-10-02 07:25:35 +02:00
bd8e5c7ca2 Merge pull request #1531 from rg3/no-playlist
[youtube] implement --no-playlist to only download current video
2013-10-01 10:08:20 -07:00
7c61bd36bb [youtube] correct --no-playlist for python3 2013-10-01 11:58:13 -04:00
c54283824c [dailymotion] Detect vevo videos (fixes #1532)
All videos from the Vevo user, just embed videos from vevo.com
2013-10-01 15:05:41 +02:00
52f15da2ca release 2013.10.01.1 2013-10-01 14:44:26 +02:00
44d466559e Properly handle stream meap not being present 2013-10-01 14:44:09 +02:00
05751eb047 release 2013.10.01 2013-10-01 11:43:54 +02:00
f10503db67 Handle videos without url_encoded_fmt_stream_map (Fixes #1535) 2013-10-01 11:39:11 +02:00
adfeafe9e1 [RTLnowIE] Allow video description without upload date
Some videos (feature films) have no upload date.
2013-10-01 07:22:49 +02:00
4c62a16f4f [RTLnowIE] Add support for http://n-tvnow.de 2013-10-01 06:55:30 +02:00
c0de39e6d4 Merge pull request #2 from rg3/master
Update
2013-09-30 21:39:58 -07:00
fa55675593 Support XDG base directory specification 2013-09-30 18:22:38 -04:00
d4d9920a26 add test for --no-playlist 2013-09-30 18:01:17 -04:00
47192f92d8 implement --no-playlist to only download current video - closes #755 2013-09-30 16:26:25 -04:00
722076a123 [rtlnow] Replace one of the tests
The video is no longer available.
2013-09-29 23:07:26 +02:00
bb4aa62cf7 [appletrailers] The request for the settings must have the trailer name in lower case (fixes #1329) 2013-09-29 20:59:19 +02:00
843530568f [appletrailers] Rework extraction (fixes #1387)
The exraction was broken:
* The includes page contains img elements that need to be fixed.
* Use the 'itunes.inc' page, it contains a json dictionary for each trailer with information.
* Get the formats from 'includes/settings{trailer_name}.json'
* Use urljoin to allow urls with a fragment identifier to work

Removed the thumbnail urls from the tests, they are different now.
2013-09-29 20:49:58 +02:00
138a5454b5 release 2013.09.29 2013-09-29 14:38:37 +02:00
d279037036 [update] Prevent cmd window popup on Windows (Fixes #1478) 2013-09-29 14:37:06 +02:00
46353f6783 [update] Look for .exe extension on Windows (Fixes #745) 2013-09-29 14:37:00 +02:00
70922df8b5 [dailymotion] Disable the family filter in the playlists (fixes #1524) 2013-09-29 12:44:02 +02:00
9c15e9de84 [yahoo] Fix video extraction (fixes #1521)
There's no need to use two different methods.
Now we can also download videos over http if possible.
Also run the test for rtmp videos, but skip the download.
2013-09-28 21:19:52 +02:00
123c10608d Merge branch 'master' of github.com:rg3/youtube-dl 2013-09-28 15:43:38 +02:00
0b7c2485b6 [zdf] Add support for hash URLs and simplify (#1518) 2013-09-28 15:43:34 +02:00
9abb32045a [youtube] Add hlsvp to the error message if it can't be found and remove the live stream test
It's no longer available, other olympics streams have the same problem.
2013-09-27 15:06:27 +02:00
f490e77e77 [youtube] Set the thumbnail to None if it can't be extracted 2013-09-27 14:22:36 +02:00
2dc592991a [youtube] update description of test 2013-09-27 14:20:52 +02:00
0a60edcfa9 Don't fail if the video thumbnail couldn't be downloaded (fixes #1516)
Just report a warning
2013-09-27 14:19:19 +02:00
c53f9d30c8 Merge branch 'master' of github.com:rg3/youtube-dl 2013-09-27 13:09:58 +02:00
509f398292 Remove youtube_genalgo (#1515)
With the automatic signature extraction, this script has become superfluous now
2013-09-27 13:09:24 +02:00
74bab3f0a4 Don't embed subtitles if the list is empty or the field is not set (fixes #1510) 2013-09-27 08:08:43 +02:00
8574862991 Merge remote-tracking branch 'rzhxeo/RTL_T' 2013-09-27 06:25:04 +02:00
2de957c7e1 Merge remote-tracking branch 'rzhxeo/RTL' 2013-09-27 06:23:10 +02:00
920de7a27d [youtube] Fix 83 signature (Closes #1511) 2013-09-27 06:15:21 +02:00
63efc427cd [RTLnowIE] Clean video title
The title of some videos has the following format:
Series - Episode | Series online schauen bei ... NOW
2013-09-27 06:00:37 +02:00
ce65fb6c76 [RTLnowIE] Add support for http://rtlnitronow.de 2013-09-27 05:50:16 +02:00
4de1994b6e [brightcove] Use direct url for the tests
The test_all_urls.py test failed because BrightcoveIE doesn't match them.
2013-09-26 18:59:56 +02:00
592882aa9f [brightcove] Support videos that only provide flv versions (fixes #1504)
Moved the test from generic.py to brightcove.py
2013-09-26 13:54:31 +02:00
b98d6a1e19 release 2013.09.24.2 2013-09-24 21:55:34 +02:00
29c7a63df8 Remove debugging code 2013-09-24 21:55:25 +02:00
8b25323ae2 release 2013.09.24.1 2013-09-24 21:40:47 +02:00
f426de8460 Merge remote-tracking branch 'origin/master' 2013-09-24 21:40:30 +02:00
695dc094ab Merge branch 'automatic-signatures' 2013-09-24 21:40:08 +02:00
e80d861064 Revert "[southparkstudios] Fix mgid extraction"
This reverts commit 0fd49457f5.

It seems that the redesign was temporary.
2013-09-24 21:39:38 +02:00
2cdeb20135 release 2013.09.24 2013-09-24 21:28:06 +02:00
7f74773254 Add option --no-cache-dir 2013-09-24 21:26:10 +02:00
f2c327fd39 Fix 86 signature (#1494) 2013-09-24 21:20:42 +02:00
e35e4ddc9a Fix output of --youtube-print-sig-code when counting down to 0 2013-09-24 21:18:03 +02:00
c3c88a2664 Allow opts.cachedir == None to disable cache 2013-09-24 21:04:43 +02:00
bb0eee71e7 [youtube] Update one of the test's description 2013-09-24 21:04:13 +02:00
6f56389b88 [youtube] update algos for length 86 and 84 (fixes #1494) 2013-09-24 21:02:00 +02:00
5b333c1ce6 [francetv] Add an extractor for Generation Quoi (closes #1475) 2013-09-23 21:41:54 +02:00
a825f33030 [francetv] Add an extractor for France2 2013-09-23 21:28:33 +02:00
92f618f2e2 Merge remote-tracking branch 'origin/master' 2013-09-23 11:24:49 +02:00
81ec7c7901 [facebook] Allow untitled videos (Fixes #1484) 2013-09-23 11:24:33 +02:00
dd5d2eb03c If the file is already downloaded include the size in the progress hook 2013-09-22 23:39:30 +02:00
4ae720042c Include the eta and the speed in the progress hooks
Useful when listening to the progress hook, for example in a GUI.
2013-09-22 23:31:39 +02:00
c705320f48 Correct test strings 2013-09-22 12:18:16 +02:00
d2d8f89531 Do not warn if fallback is without alternatives (because we did not get the flash player URL) 2013-09-22 12:18:10 +02:00
bdde940e90 [youtube] Improve flash player URL handling 2013-09-22 12:17:42 +02:00
45f4a76dbc Work around nosetests nosiness 2013-09-22 11:45:29 +02:00
13dc64ce74 [youtube] Remove _decrypt_signature_age_gate 2013-09-22 11:17:21 +02:00
c35f9e72ce Move cachedir doc 2013-09-22 11:09:25 +02:00
f8061589e6 [youtube] Actually pass in cachedir option 2013-09-22 10:51:33 +02:00
0ca96d48c7 [youtube] Improve source code quality 2013-09-22 10:37:23 +02:00
4ba146f35d Update static signatures 2013-09-22 10:31:25 +02:00
edf3e38ebd [youtube] Improve cache and add an option to print the extracted signatures 2013-09-22 10:30:02 +02:00
c4417ddb61 [youtube] Add filesystem signature cache 2013-09-22 00:35:03 +02:00
4a2080e407 [youku] better error handling
blocked videos used to cause death by TypeError, now we report what the
server says
2013-09-21 20:50:31 +02:00
2f2ffea9ca Clarify a couple of calls 2013-09-21 15:34:29 +02:00
ba552f542f Use reader instead of indexing 2013-09-21 15:32:37 +02:00
8379969834 Prepare signature function caching 2013-09-21 15:19:48 +02:00
95dbd2f990 Change test target (Verified with node.js) 2013-09-21 15:10:38 +02:00
a7177865b1 Implement more opcodes 2013-09-21 14:48:12 +02:00
e0df6211cc Restore accidentally deleted commits
That's what happens if you let Windows machines write :(
2013-09-21 14:40:35 +02:00
b00ca882a4 [livestream] Fix events extraction (fixes #1467) 2013-09-21 13:50:52 +02:00
39baacc49f [dailymotion] Add an extractor for users (closes #1476) 2013-09-21 12:45:53 +02:00
3a1d48d6de [dailymotion] Raise ExtractorError if the dailymotion response reports an error 2013-09-21 12:15:54 +02:00
34308b30d6 Warn if no locale is set (#1474) 2013-09-21 11:48:07 +02:00
bc1506f8c0 Merge branch 'master' of github.com:rg3/youtube-dl 2013-09-21 11:10:30 +02:00
b61067fa4f Abort if extractaudio is given without a variable extension (#1470) 2013-09-21 11:10:22 +02:00
69b227a9bc [southparkstudios] add support for http://www.southparkstudios.com/full-episodes/* urls (closes #1469) 2013-09-21 10:58:43 +02:00
0fd49457f5 [southparkstudios] Fix mgid extraction 2013-09-21 10:51:25 +02:00
43 changed files with 1644 additions and 540 deletions

2
.gitignore vendored
View File

@ -24,3 +24,5 @@ updates_key.pem
*.flv
*.mp4
*.part
test/testdata
.tox

View File

@ -30,6 +30,11 @@ which means you can modify it, redistribute it or use it however you like.
--extractor-descriptions Output descriptions of all supported extractors
--proxy URL Use the specified HTTP/HTTPS proxy
--no-check-certificate Suppress HTTPS certificate validation.
--cache-dir None Location in the filesystem where youtube-dl can
store downloaded information permanently. By
default $XDG_CACHE_HOME/youtube-dl or ~/.cache
/youtube-dl .
--no-cache-dir Disable filesystem caching
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
@ -46,6 +51,7 @@ which means you can modify it, redistribute it or use it however you like.
--date DATE download only videos uploaded in this date
--datebefore DATE download only videos uploaded before this date
--dateafter DATE download only videos uploaded after this date
--no-playlist download only the currently playing video
## Download Options:
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)

View File

@ -1,116 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
# Generate youtube signature algorithm from test cases
import sys
tests = [
# 93 - vfl79wBKW 2013/07/20
(u"qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
u".>/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ098765'321mnbvcxzasdfghjklpoiu"),
# 92 - vflQw-fB4 2013/07/17
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
# 91 - vfl79wBKW 2013/07/20 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~",
"/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"),
# 90
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
# 89
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
"/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
# 88 - vflapUV9V 2013/08/28
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
"ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
# 86 - vfluy6kdb 2013/09/06
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
# 85 - vflkuzxcs 2013/09/11
('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
# 82 - vflGNjMhJ 2013/09/12
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),
# 81 - vflLC8JvQ 2013/07/25
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
# 80 - vflZK4ZYR 2013/08/23 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
"wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
# 79 - vflLC8JvQ 2013/07/25 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
"Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
]
tests_age_gate = [
# 86 - vflqinMWD
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
]
def find_matching(wrong, right):
idxs = [wrong.index(c) for c in right]
return compress(idxs)
return ('s[%d]' % i for i in idxs)
def compress(idxs):
def _genslice(start, end, step):
starts = '' if start == 0 else str(start)
ends = ':%d' % (end+step)
steps = '' if step == 1 else (':%d' % step)
return 's[%s%s%s]' % (starts, ends, steps)
step = None
for i, prev in zip(idxs[1:], idxs[:-1]):
if step is not None:
if i - prev == step:
continue
yield _genslice(start, prev, step)
step = None
continue
if i - prev in [-1, 1]:
step = i - prev
start = prev
continue
else:
yield 's[%d]' % prev
if step is None:
yield 's[%d]' % i
else:
yield _genslice(start, i, step)
def _assert_compress(inp, exp):
res = list(compress(inp))
if res != exp:
print('Got %r, expected %r' % (res, exp))
assert res == exp
_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]'])
_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]'])
_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]'])
def gen(wrong, right, indent):
code = ' + '.join(find_matching(wrong, right))
return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code)
def genall(tests):
indent = ' ' * 8
return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests)
def main():
print(genall(tests))
print(u' Age gate:')
print(genall(tests_age_gate))
if __name__ == '__main__':
main()

View File

@ -1,6 +1,9 @@
import errno
import io
import json
import os.path
import re
import types
import youtube_dl.extractor
from youtube_dl import YoutubeDL, YoutubeDLHandler
@ -20,19 +23,41 @@ PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "para
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
parameters = json.load(pf)
def try_rm(filename):
""" Remove a file if it exists """
try:
os.remove(filename)
except OSError as ose:
if ose.errno != errno.ENOENT:
raise
class FakeYDL(YoutubeDL):
def __init__(self):
self.result = []
# Different instances of the downloader can't share the same dictionary
# some test set the "sublang" parameter, which would break the md5 checks.
self.params = dict(parameters)
def to_screen(self, s):
params = dict(parameters)
super(FakeYDL, self).__init__(params)
self.result = []
def to_screen(self, s, skip_eol=None):
print(s)
def trouble(self, s, tb=None):
raise Exception(s)
def download(self, x):
self.result.append(x)
def expect_warning(self, regex):
# Silence an expected warning matching a regex
old_report_warning = self.report_warning
def report_warning(self, message):
if re.match(regex, message): return
old_report_warning(message)
self.report_warning = types.MethodType(report_warning, self)
def get_testcases():
for ie in youtube_dl.extractor.gen_extractors():
t = getattr(ie, '_TEST', None)

View File

@ -2,8 +2,6 @@
import sys
import unittest
import json
import io
import hashlib
# Allow direct execution
@ -45,15 +43,18 @@ class TestDailymotionSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
def test_list_subtitles(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
self.DL.expect_warning(u'Automatic Captions not supported by this server')
self.DL.params['writeautomaticsub'] = True
self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True

View File

@ -1,6 +1,5 @@
#!/usr/bin/env python
import errno
import hashlib
import io
import os
@ -28,14 +27,6 @@ opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, You
compat_urllib_request.install_opener(opener)
socket.setdefaulttimeout(10)
def _try_rm(filename):
""" Remove a file if it exists """
try:
os.remove(filename)
except OSError as ose:
if ose.errno != errno.ENOENT:
raise
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class YoutubeDL(youtube_dl.YoutubeDL):
@ -54,7 +45,7 @@ def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
from helper import get_testcases
from helper import get_testcases, try_rm
defs = get_testcases()
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
@ -97,9 +88,9 @@ def generator(test_case):
test_cases = test_case.get('playlist', [test_case])
for tc in test_cases:
_try_rm(tc['file'])
_try_rm(tc['file'] + '.part')
_try_rm(tc['file'] + '.info.json')
try_rm(tc['file'])
try_rm(tc['file'] + '.part')
try_rm(tc['file'] + '.info.json')
try:
for retry in range(1, RETRIES + 1):
try:
@ -145,9 +136,9 @@ def generator(test_case):
self.assertTrue(key in info_dict.keys() and info_dict[key])
finally:
for tc in test_cases:
_try_rm(tc['file'])
_try_rm(tc['file'] + '.part')
_try_rm(tc['file'] + '.info.json')
try_rm(tc['file'])
try_rm(tc['file'] + '.part')
try_rm(tc['file'] + '.info.json')
return test_template

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python
# encoding: utf-8
import sys
import unittest
@ -8,7 +9,14 @@ import json
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
from youtube_dl.extractor import (
DailymotionPlaylistIE,
DailymotionUserIE,
VimeoChannelIE,
UstreamChannelIE,
SoundcloudUserIE,
LivestreamIE,
)
from youtube_dl.utils import *
from helper import FakeYDL
@ -26,6 +34,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'SPORT')
self.assertTrue(len(result['entries']) > 20)
def test_dailymotion_user(self):
dl = FakeYDL()
ie = DailymotionUserIE(dl)
result = ie.extract('http://www.dailymotion.com/user/generation-quoi/')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Génération Quoi')
self.assertTrue(len(result['entries']) >= 26)
def test_vimeo_channel(self):
dl = FakeYDL()
ie = VimeoChannelIE(dl)
@ -50,5 +66,13 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'9615865')
self.assertTrue(len(result['entries']) >= 12)
def test_livestream_event(self):
dl = FakeYDL()
ie = LivestreamIE(dl)
result = ie.extract('http://new.livestream.com/tedx/cityenglish')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4)
if __name__ == '__main__':
unittest.main()

View File

@ -27,6 +27,14 @@ class TestYoutubeLists(unittest.TestCase):
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
def test_youtube_playlist_noplaylist(self):
dl = FakeYDL()
dl.params['noplaylist'] = True
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
self.assertEqual(result['_type'], 'url')
self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg')
def test_issue_673(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)

View File

@ -0,0 +1,80 @@
#!/usr/bin/env python
import io
import re
import string
import sys
import unittest
# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import YoutubeIE
from youtube_dl.utils import compat_str, compat_urlretrieve
_TESTS = [
(
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
u'js',
86,
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
),
(
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
u'js',
85,
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
),
(
u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
u'swf',
82,
u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
),
]
class TestSignature(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
if not os.path.exists(self.TESTDATA_DIR):
os.mkdir(self.TESTDATA_DIR)
def make_tfunc(url, stype, sig_length, expected_sig):
basename = url.rpartition('/')[2]
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
assert m, '%r should follow URL format' % basename
test_id = m.group(1)
def test_func(self):
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
compat_urlretrieve(url, fn)
ie = YoutubeIE()
if stype == 'js':
with io.open(fn, encoding='utf-8') as testf:
jscode = testf.read()
func = ie._parse_sig_js(jscode)
else:
assert stype == 'swf'
with open(fn, 'rb') as testf:
swfcode = testf.read()
func = ie._parse_sig_swf(swfcode)
src_sig = compat_str(string.printable[:sig_length])
got_sig = func(src_sig)
self.assertEqual(got_sig, expected_sig)
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
setattr(TestSignature, test_func.__name__, test_func)
for test_spec in _TESTS:
make_tfunc(*test_spec)
if __name__ == '__main__':
unittest.main()

View File

@ -2,8 +2,6 @@
import sys
import unittest
import json
import io
import hashlib
# Allow direct execution
@ -56,6 +54,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
self.DL.expect_warning(u'Video doesn\'t have automatic captions')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
@ -66,6 +65,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.DL.expect_warning(u'video doesn\'t have subtitles')
self.url = 'sAjKT8FhjI8'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True

5
tox.ini Normal file
View File

@ -0,0 +1,5 @@
[tox]
envlist = py26,py27,py33
[testenv]
deps = nose
commands = nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose test

View File

@ -77,26 +77,43 @@ class FileDownloader(object):
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
return None
return float(byte_counter) / float(data_len) * 100.0
@staticmethod
def format_percent(percent):
if percent is None:
return '---.-%'
return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
return '--:--'
return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
return '--:--'
return None
rate = float(current) / dif
eta = int((float(total) - float(current)) / rate)
return int((float(total) - float(current)) / rate)
@staticmethod
def format_eta(eta):
if eta is None:
return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
return None
return float(bytes) / dif
@staticmethod
def format_speed(speed):
if speed is None:
return '%10s' % '---b/s'
return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
@ -205,11 +222,14 @@ class FileDownloader(object):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
eta_str = self.format_eta(eta)
percent_str = self.format_percent(percent)
speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
@ -378,6 +398,7 @@ class FileDownloader(object):
self._hook_progress({
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
@ -524,13 +545,14 @@ class FileDownloader(object):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
eta = None
else:
percent_str = self.calc_percent(byte_counter, data_len)
eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent_str, data_len_str, speed_str, eta_str)
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
@ -538,6 +560,8 @@ class FileDownloader(object):
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'speed': speed,
})
# Apply rate limit
@ -580,6 +604,8 @@ class FileDownloader(object):
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
* eta: The estimated time in seconds, None if unknown
* speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.

View File

@ -444,8 +444,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
if information['ext'] != u'mp4':
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
sub_langs = [key for key in information['subtitles']]
if not information.get('subtitles'):
self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
return True, information
sub_langs = [key for key in information['subtitles']]
filename = information['filepath']
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]

View File

@ -81,6 +81,9 @@ class YoutubeDL(object):
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache.
noplaylist: Download single video instead of a playlist if in doubt.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@ -104,6 +107,17 @@ class YoutubeDL(object):
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
if (sys.version_info >= (3,) and sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
and not params['restrictfilenames']):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
self.report_warning(
u'Assuming --restrict-filenames isnce file system encoding '
u'cannot encode all charactes. '
u'Set the LC_ALL environment variable to fix this.')
params['restrictfilenames'] = True
self.params = params
self.fd = FileDownloader(self, self.params)
@ -532,11 +546,15 @@ class YoutubeDL(object):
thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename))
try:
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
(info_dict['extractor'], info_dict['id'], thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning(u'Unable to download thumbnail "%s": %s' %
(info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):

View File

@ -167,6 +167,12 @@ def parseOpts(overrideArguments=None):
help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
'--cache-dir', dest='cachedir', default=get_cachedir(),
help='Location in the filesystem where youtube-dl can store downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl .')
general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
help='Disable filesystem caching')
selection.add_option('--playlist-start',
@ -181,6 +187,7 @@ def parseOpts(overrideArguments=None):
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
authentication.add_option('-u', '--username',
@ -272,6 +279,10 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems(very verbose)')
verbosity.add_option('--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
@ -359,9 +370,13 @@ def parseOpts(overrideArguments=None):
else:
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
commandLineConf = sys.argv[1:]
@ -551,6 +566,10 @@ def _real_main(argv=None):
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(title)s-%(id)s.%(ext)s')
if '%(ext)s' not in outtmpl and opts.extractaudio:
parser.error(u'Cannot download a video and extract audio into the same'
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
# YoutubeDL
ydl = YoutubeDL({
@ -585,6 +604,7 @@ def _real_main(argv=None):
'progress_with_newline': opts.progress_with_newline,
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'noplaylist': opts.noplaylist,
'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
@ -609,6 +629,8 @@ def _real_main(argv=None):
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'daterange': date,
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
})
if opts.verbose:
@ -642,7 +664,7 @@ def _real_main(argv=None):
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose, sys.argv[0])
update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:

View File

@ -18,7 +18,11 @@ from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
DailymotionUserIE,
)
from .daum import DaumIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
@ -38,6 +42,8 @@ from .flickr import FlickrIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
France2IE,
GenerationQuoiIE
)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
@ -111,6 +117,7 @@ from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
from .vice import ViceIE
from .viddler import ViddlerIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE

View File

@ -1,8 +1,10 @@
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
determine_ext,
)
@ -14,10 +16,9 @@ class AppleTrailersIE(InfoExtractor):
u"playlist": [
{
u"file": u"manofsteel-trailer4.mov",
u"md5": u"11874af099d480cc09e103b189805d5f",
u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8",
u"info_dict": {
u"duration": 111,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
u"title": u"Trailer 4",
u"upload_date": u"20130523",
u"uploader_id": u"wb",
@ -25,10 +26,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-trailer3.mov",
u"md5": u"07a0a262aae5afe68120eed61137ab34",
u"md5": u"b8017b7131b721fb4e8d6f49e1df908c",
u"info_dict": {
u"duration": 182,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
u"title": u"Trailer 3",
u"upload_date": u"20130417",
u"uploader_id": u"wb",
@ -36,10 +36,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-trailer.mov",
u"md5": u"e401fde0813008e3307e54b6f384cff1",
u"md5": u"d0f1e1150989b9924679b441f3404d48",
u"info_dict": {
u"duration": 148,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
u"title": u"Trailer",
u"upload_date": u"20121212",
u"uploader_id": u"wb",
@ -47,10 +46,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-teaser.mov",
u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
u"md5": u"5fe08795b943eb2e757fa95cb6def1cb",
u"info_dict": {
u"duration": 93,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
u"title": u"Teaser",
u"upload_date": u"20120721",
u"uploader_id": u"wb",
@ -59,87 +57,61 @@ class AppleTrailersIE(InfoExtractor):
]
}
_JSON_RE = r'iTunes.playURL\((.*?)\);'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie = mobj.group('movie')
uploader_id = mobj.group('company')
playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
playlist_snippet = self._download_webpage(playlist_url, movie)
playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# with xml.etree.ElementTree.fromstring
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
playlist_html = u'<html>' + playlist_cleaned + u'</html>'
size_cache = {}
doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = []
for li in doc.findall('./div/ul/li'):
title = li.find('.//h3').text
on_click = li.find('.//a').attrib['onClick']
trailer_info_json = self._search_regex(self._JSON_RE,
on_click, u'trailer info')
trailer_info = json.loads(trailer_info_json)
title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
upload_date = trailer_info['posted'].replace('-', '')
date_el = li.find('.//p')
upload_date = None
m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
if m:
upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
runtime_el = date_el.find('./br')
m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
runtime = trailer_info['runtime']
m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
duration = None
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
first_url = trailer_info['url']
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
settings = json.loads(settings_json)
formats = []
for formats_el in li.findall('.//a'):
if formats_el.attrib['class'] != 'OverlayPanel':
continue
target = formats_el.attrib['target']
format_code = formats_el.text
if 'Automatic' in format_code:
continue
size_q = formats_el.attrib['href']
size_id = size_q.rpartition('#videos-')[2]
if size_id not in size_cache:
size_url = url + size_q
sizepage_html = self._download_webpage(
size_url, movie,
note=u'Downloading size info %s' % size_id,
errnote=u'Error while downloading size info %s' % size_id,
)
_doc = xml.etree.ElementTree.fromstring(sizepage_html)
size_cache[size_id] = _doc
sizepage_doc = size_cache[size_id]
links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
for vid_a in links:
href = vid_a.get('href')
if not href.endswith(target):
continue
detail_q = href.partition('#')[0]
detail_url = url + '/' + detail_q
m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
detail_id = m.group('detail_id')
detail_html = self._download_webpage(
detail_url, movie,
note=u'Downloading detail %s %s' % (detail_id, size_id),
errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
)
detail_doc = xml.etree.ElementTree.fromstring(detail_html)
movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
assert movie_link_el.get('class') == 'movieLink'
movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
ext = determine_ext(movie_link)
assert ext == 'mov'
formats.append({
'format': format_code,
'ext': ext,
'url': movie_link,
})
for format in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
formats.append({
'url': format_url,
'ext': determine_ext(format_url),
'format': format['type'],
'width': format['width'],
'height': int(format['height']),
})
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = {
'_type': 'video',

View File

@ -115,7 +115,7 @@ class BlipTVIE(InfoExtractor):
ext = umobj.group(1)
info = {
'id': data['item_id'],
'id': compat_str(data['item_id']),
'url': video_url,
'uploader': data['display_name'],
'upload_date': upload_date,

View File

@ -1,3 +1,5 @@
# encoding: utf-8
import re
import json
import xml.etree.ElementTree
@ -7,15 +9,39 @@ from ..utils import (
compat_urllib_parse,
find_xpath_attr,
compat_urlparse,
ExtractorError,
)
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
_PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
# There is a test for Brigtcove in GenericIE, that way we test both the download
# and the detection of videos, and we don't have to find an URL that is always valid
_TESTS = [
{
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
u'uploader': u'8TV',
u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
}
},
{
# From http://medianetwork.oracle.com/video/player/1785452137001
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
u'file': u'1785452137001.flv',
u'info_dict': {
u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
u'description': u'John Rose speaks at the JVM Language Summit, August 1, 2012.',
u'uploader': u'Oracle',
},
},
]
@classmethod
def _build_brighcove_url(cls, object_str):
@ -23,6 +49,11 @@ class BrightcoveIE(InfoExtractor):
Build a Brightcove url from a xml string containing
<object class="BrightcoveExperience">{params}</object>
"""
# Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
object_str = re.sub(r'(<param name="[^"]+" value="[^"]+")>',
lambda m: m.group(1) + '/>', object_str)
object_doc = xml.etree.ElementTree.fromstring(object_str)
assert u'BrightcoveExperience' in object_doc.attrib['class']
params = {'flashID': object_doc.attrib['id'],
@ -72,15 +103,27 @@ class BrightcoveIE(InfoExtractor):
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
def _extract_video_info(self, video_info):
renditions = video_info['renditions']
renditions = sorted(renditions, key=lambda r: r['size'])
best_format = renditions[-1]
info = {
'id': video_info['id'],
'title': video_info['displayName'],
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
'uploader': video_info.get('publisherName'),
}
return {'id': video_info['id'],
'title': video_info['displayName'],
'url': best_format['defaultURL'],
renditions = video_info.get('renditions')
if renditions:
renditions = sorted(renditions, key=lambda r: r['size'])
best_format = renditions[-1]
info.update({
'url': best_format['defaultURL'],
'ext': 'mp4',
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
'uploader': video_info.get('publisherName'),
}
})
elif video_info.get('FLVFullLengthURL') is not None:
info.update({
'url': video_info['FLVFullLengthURL'],
'ext': 'flv',
})
else:
raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
return info

View File

@ -51,12 +51,12 @@ class ComedyCentralIE(InfoExtractor):
'400': 'mp4',
}
_video_dimensions = {
'3500': '1280x720',
'2200': '960x540',
'1700': '768x432',
'1200': '640x360',
'750': '512x288',
'400': '384x216',
'3500': (1280, 720),
'2200': (960, 540),
'1700': (768, 432),
'1200': (640, 360),
'750': (512, 288),
'400': (384, 216),
}
@classmethod
@ -64,11 +64,13 @@ class ComedyCentralIE(InfoExtractor):
"""Receives a URL and returns True if suitable for this IE."""
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
@staticmethod
def _transform_rtmp_url(rtmp_video_url):
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError(u'Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
return base + m.group('finalid')
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@ -155,40 +157,31 @@ class ComedyCentralIE(InfoExtractor):
self._downloader.report_error(u'unable to download ' + mediaId + ': No videos found')
continue
if self._downloader.params.get('listformats', None):
self._print_formats([i[0] for i in turls])
return
# For now, just pick the highest bitrate
format,rtmp_video_url = turls[-1]
# Get the format arg from the arg stream
req_format = self._downloader.params.get('format', None)
# Select format if we can find one
for f,v in turls:
if f == req_format:
format, rtmp_video_url = f, v
break
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
if not m:
raise ExtractorError(u'Cannot transform RTMP url')
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
video_url = base + m.group('finalid')
formats = []
for format, rtmp_video_url in turls:
w, h = self._video_dimensions.get(format, (None, None))
formats.append({
'url': self._transform_rtmp_url(rtmp_video_url),
'ext': self._video_extensions.get(format, 'mp4'),
'format_id': format,
'height': h,
'width': w,
})
effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
info = {
'id': shortMediaId,
'url': video_url,
'formats': formats,
'uploader': showId,
'upload_date': officialDate,
'title': effTitle,
'ext': 'mp4',
'format': format,
'thumbnail': None,
'description': compat_str(officialTitle),
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
results.append(info)
return results

View File

@ -35,6 +35,8 @@ class InfoExtractor(object):
title: Video title, unescaped.
ext: Video filename extension.
Instead of url and ext, formats can also specified.
The following fields are optional:
format: The video format, defaults to ext (used for --get-format)
@ -52,8 +54,19 @@ class InfoExtractor(object):
view_count: How many users have watched the video on the platform.
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
formats: A list of dictionaries for each format available, it must
be ordered from worst to best quality. Potential fields:
* url Mandatory. The URL of the video file
* ext Will be calculated from url if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
Calculated from width and height if missing.
* format_id A short description of the format
("mp4_h264_opus" or "19")
* width Width of the video, if known
* height Height of the video, if known
The fields should all be Unicode strings.
Unless mentioned otherwise, the fields should be Unicode strings.
Subclasses of this one should re-define the _real_initialize() and
_real_extract() methods and define a _VALID_URL regexp.

View File

@ -10,25 +10,49 @@ from ..utils import (
compat_str,
get_element_by_attribute,
get_element_by_id,
orderedSet,
ExtractorError,
)
class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
return request
class DailymotionIE(SubtitlesInfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
IE_NAME = u'dailymotion'
_TEST = {
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
_TESTS = [
{
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
u"uploader": u"Amphora Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
},
# Vevo video
{
u'url': u'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
u'file': u'USUV71301934.mp4',
u'info_dict': {
u'title': u'Roar (Official)',
u'uploader': u'Katy Perry',
u'upload_date': u'20130905',
},
u'params': {
u'skip_download': True,
},
u'skip': u'VEVO is only available in some countries',
},
]
def _real_extract(self, url):
# Extract id and simplified title from URL
@ -40,13 +64,21 @@ class DailymotionIE(SubtitlesInfoExtractor):
url = 'http://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
request = self._build_request(url)
webpage = self._download_webpage(request, video_id)
# Extract URL, uploader and title from webpage
self.report_extraction(video_id)
# It may just embed a vevo video:
m_vevo = re.search(
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)',
webpage)
if m_vevo is not None:
vevo_id = m_vevo.group('id')
self.to_screen(u'Vevo video detected: %s' % vevo_id)
return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
# Looking for official user
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
@ -63,6 +95,9 @@ class DailymotionIE(SubtitlesInfoExtractor):
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
if info.get('error') is not None:
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
raise ExtractorError(msg, expected=True)
# TODO: support choosing qualities
@ -110,29 +145,56 @@ class DailymotionIE(SubtitlesInfoExtractor):
return {}
class DailymotionPlaylistIE(InfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
def _extract_entries(self, id):
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
playlist_id, u'Downloading page %s' % pagenum)
request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
video_ids.extend(re.findall(r'data-id="(.+?)"', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in orderedSet(video_ids)]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in video_ids]
return {'_type': 'playlist',
'id': playlist_id,
'title': get_element_by_id(u'playlist_name', webpage),
'entries': entries,
'entries': self._extract_entries(playlist_id),
}
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = u'dailymotion:user'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/user/.+?".*?>.*?</a>.*?</div>'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')
webpage = self._download_webpage(url, user)
full_user = self._html_search_regex(
r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user),
webpage, u'user', flags=re.DOTALL)
return {
'_type': 'playlist',
'id': user,
'title': full_user,
'entries': self._extract_entries(user),
}

View File

@ -106,8 +106,8 @@ class FacebookIE(InfoExtractor):
video_duration = int(video_data['video_duration'])
thumbnail = video_data['thumbnail_src']
video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>',
webpage, u'title')
video_title = self._html_search_regex(
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
info = {
'id': video_id,

View File

@ -9,7 +9,7 @@ from ..utils import (
class FlickrIE(InfoExtractor):
"""Information Extractor for Flickr videos"""
_VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
_VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
_TEST = {
u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
u'file': u'5645318632.mp4',

View File

@ -1,6 +1,7 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
import json
from .common import InfoExtractor
from ..utils import (
@ -65,3 +66,64 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
webpage = self._download_webpage(url, page_title)
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
return self._extract_video(video_id)
class France2IE(FranceTVBaseInfoExtractor):
IE_NAME = u'france2.fr'
_VALID_URL = r'''(?x)https?://www\.france2\.fr/
(?:
emissions/.*?/videos/(?P<id>\d+)
| emission/(?P<key>[^/?]+)
)'''
_TEST = {
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
u'file': u'75540104.mp4',
u'info_dict': {
u'title': u'13h15, le samedi...',
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
},
u'params': {
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('key'):
webpage = self._download_webpage(url, mobj.group('key'))
video_id = self._html_search_regex(
r'''(?x)<div\s+class="video-player">\s*
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
class="francetv-video-player">''',
webpage, u'video ID')
else:
video_id = mobj.group('id')
return self._extract_video(video_id)
class GenerationQuoiIE(InfoExtractor):
IE_NAME = u'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
_TEST = {
u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
u'info_dict': {
u'title': u'Génération Quoi - Garde à Vous',
u'uploader': u'Génération Quoi',
},
u'params': {
# It uses Dailymotion
u'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
info_json = self._download_webpage(info_url, name)
info = json.loads(info_json)
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
ie='Dailymotion')

View File

@ -29,17 +29,6 @@ class GenericIE(InfoExtractor):
u"title": u"R\u00e9gis plante sa Jeep"
}
},
{
u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
u'file': u'2371591881001.mp4',
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
u'note': u'Test Brightcove downloads and detection in GenericIE',
u'info_dict': {
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
u'uploader': u'8TV',
u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
}
},
]
def report_download_webpage(self, video_id):

View File

@ -41,7 +41,8 @@ class GooglePlusIE(InfoExtractor):
# Extract update date
upload_date = self._html_search_regex(
['title="Timestamp">(.*?)</a>', r'<a.+?class="g-M.+?>(.+?)</a>'],
r'''(?x)<a.+?class="o-T-s\s[^"]+"\s+style="display:\s*none"\s*>
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
webpage, u'upload date', fatal=False)
if upload_date:
# Convert timestring to a format suitable for filename

View File

@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
Some videos of it.ign.com are also supported
"""
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
IE_NAME = u'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
@ -21,15 +21,39 @@ class IGNIE(InfoExtractor):
r'id="my_show_video">.*?<p>(.*?)</p>',
]
_TEST = {
u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
u'info_dict': {
u'title': u'The Last of Us Review',
u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
}
}
_TESTS = [
{
u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
u'info_dict': {
u'title': u'The Last of Us Review',
u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
}
},
{
u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
u'playlist': [
{
u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
u'info_dict': {
u'title': u'GTA 5 Video Review',
u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
},
},
{
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
u'info_dict': {
u'title': u'GTA 5\'s Twisted Beauty in Super Slow Motion',
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
},
},
],
u'params': {
u'skip_download': True,
},
},
]
def _find_video_id(self, webpage):
res_id = [r'data-video-id="(.+?)"',
@ -46,6 +70,13 @@ class IGNIE(InfoExtractor):
if page_type == 'articles':
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
return self.url_result(video_url, ie='IGN')
elif page_type != 'video':
multiple_urls = re.findall(
'<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
webpage)
if multiple_urls:
return [self.url_result(u, ie='IGN') for u in multiple_urls]
video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE,
@ -87,6 +118,9 @@ class OneUPIE(IGNIE):
}
}
# Override IGN tests
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('name_or_id')

View File

@ -12,7 +12,7 @@ class JeuxVideoIE(InfoExtractor):
_TEST = {
u'url': u'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
u'file': u'5182.mp4',
u'md5': u'e0fdb0cd3ce98713ef9c1e1e025779d0',
u'md5': u'046e491afb32a8aaac1f44dd4ddd54ee',
u'info_dict': {
u'title': u'GC 2013 : Tearaway nous présente ses papiers d\'identité',
u'description': u'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',

View File

@ -2,7 +2,12 @@ import re
import json
from .common import InfoExtractor
from ..utils import compat_urllib_parse_urlparse, compat_urlparse
from ..utils import (
compat_urllib_parse_urlparse,
compat_urlparse,
get_meta_content,
ExtractorError,
)
class LivestreamIE(InfoExtractor):
@ -35,8 +40,11 @@ class LivestreamIE(InfoExtractor):
if video_id is None:
# This is an event page:
api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'',
webpage, 'api url')
player = get_meta_content('twitter:player', webpage)
if player is None:
raise ExtractorError('Couldn\'t extract event api url')
api_url = player.replace('/player', '')
api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
info = json.loads(self._download_webpage(api_url, event_name,
u'Downloading event info'))
videos = [self._extract_video_info(video_data['data'])

View File

@ -54,23 +54,26 @@ class MTVIE(InfoExtractor):
def _get_thumbnail_url(self, uri, itemdoc):
return 'http://mtv.mtvnimages.com/uri/' + uri
def _extract_video_url(self, metadataXml):
def _extract_video_formats(self, metadataXml):
if '/error_country_block.swf' in metadataXml:
raise ExtractorError(u'This video is not available from your country.', expected=True)
mdoc = xml.etree.ElementTree.fromstring(metadataXml.encode('utf-8'))
renditions = mdoc.findall('.//rendition')
# For now, always pick the highest quality.
rendition = renditions[-1]
try:
_,_,ext = rendition.attrib['type'].partition('/')
format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
rtmp_video_url = rendition.find('./src').text
except KeyError:
raise ExtractorError('Invalid rendition field.')
video_url = self._transform_rtmp_url(rtmp_video_url)
return {'ext': ext, 'url': video_url, 'format': format}
formats = []
for rendition in mdoc.findall('.//rendition'):
try:
_, _, ext = rendition.attrib['type'].partition('/')
rtmp_video_url = rendition.find('./src').text
formats.append({'ext': ext,
'url': self._transform_rtmp_url(rtmp_video_url),
'format_id': rendition.get('bitrate'),
'width': int(rendition.get('width')),
'height': int(rendition.get('height')),
})
except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.')
return formats
def _get_video_info(self, itemdoc):
uri = itemdoc.find('guid').text
@ -81,19 +84,25 @@ class MTVIE(InfoExtractor):
mediagen_url += '&acceptMethods=fms'
mediagen_page = self._download_webpage(mediagen_url, video_id,
u'Downloading video urls')
video_info = self._extract_video_url(mediagen_page)
description_node = itemdoc.find('description')
if description_node is not None:
description = description_node.text
else:
description = None
video_info.update({'title': itemdoc.find('title').text,
'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description,
})
return video_info
info = {
'title': itemdoc.find('title').text,
'formats': self._extract_video_formats(mediagen_page),
'id': video_id,
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
'description': description,
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info
def _get_videos_info(self, uri):
video_id = self._id_from_uri(uri)

View File

@ -14,24 +14,25 @@ class RedTubeIE(InfoExtractor):
}
}
def _real_extract(self,url):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_extension = 'mp4'
video_extension = 'mp4'
webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
video_url = self._html_search_regex(r'<source src="(.+?)" type="video/mp4">',
webpage, u'video URL')
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
video_title = self._html_search_regex('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
video_title = self._html_search_regex(
r'<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
webpage, u'title')
return [{
return {
'id': video_id,
'url': video_url,
'ext': video_extension,
'title': video_title,
}]
}

View File

@ -8,8 +8,8 @@ from ..utils import (
)
class RTLnowIE(InfoExtractor):
"""Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
_VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/|(?:www\.)?n-tvnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
@ -61,8 +61,35 @@ class RTLnowIE(InfoExtractor):
u'params': {
u'skip_download': True,
},
},
{
u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
u'file': u'127367.flv',
u'info_dict': {
u'upload_date': u'20130926',
u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
},
u'params': {
u'skip_download': True,
},
},
{
u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
u'file': u'124903.flv',
u'info_dict': {
u'upload_date': u'20130101',
u'title': u'Top Gear vom 01.01.2013',
u'description': u'Episode 1',
},
u'params': {
u'skip_download': True,
},
u'skip': u'Only works from Germany',
}]
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
@ -79,20 +106,23 @@ class RTLnowIE(InfoExtractor):
msg = clean_html(note_m.group(1))
raise ExtractorError(msg)
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
video_title = self._html_search_regex(r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>',
webpage, u'title')
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
webpage, u'playerdata_url')
playerdata = self._download_webpage(playerdata_url, video_id)
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata)
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata)
if mobj:
video_description = mobj.group(u'description')
if mobj.group('upload_date_Y'):
video_upload_date = mobj.group('upload_date_Y')
else:
elif mobj.group('upload_date_y'):
video_upload_date = u'20' + mobj.group('upload_date_y')
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
else:
video_upload_date = None
if video_upload_date:
video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d')
else:
video_description = None
video_upload_date = None

View File

@ -5,7 +5,7 @@ from .mtv import MTVIE, _media_xml_tag
class SouthParkStudiosIE(MTVIE):
IE_NAME = u'southparkstudios.com'
_VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P<id>\d+)'
_VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$)'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
@ -23,7 +23,11 @@ class SouthParkStudiosIE(MTVIE):
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
return itemdoc.find(search_path).attrib['url']
thumb_node = itemdoc.find(search_path)
if thumb_node is None:
return None
else:
return thumb_node.attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@ -77,12 +77,20 @@ class TEDIE(InfoExtractor):
thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
webpage, 'thumbnail')
formats = [{
'ext': 'mp4',
'url': stream['file'],
'format': stream['id']
} for stream in info['htmlStreams']]
info = {
'id': info['id'],
'url': info['htmlStreams'][-1]['file'],
'ext': 'mp4',
'title': title,
'thumbnail': thumbnail,
'description': desc,
}
'id': info['id'],
'title': title,
'thumbnail': thumbnail,
'description': desc,
'formats': formats,
}
# TODO: Remove when #980 has been merged
info.update(info['formats'][-1])
return info

View File

@ -0,0 +1,64 @@
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
)
class ViddlerIE(InfoExtractor):
_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[0-9]+)'
_TEST = {
u"url": u"http://www.viddler.com/v/43903784",
u'file': u'43903784.mp4',
u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
u'info_dict': {
u"title": u"Video Made Easy",
u"uploader": u"viddler",
u"duration": 100.89,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
embed_url = mobj.group('domain') + u'/embed/' + video_id
webpage = self._download_webpage(embed_url, video_id)
video_sources_code = self._search_regex(
r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
video_sources = json.loads(video_sources_code.replace("'", '"'))
formats = [{
'url': video_url,
'format': format_id,
} for video_url, format_id in video_sources.items()]
title = self._html_search_regex(
r"title\s*:\s*'([^']*)'", webpage, u'title')
uploader = self._html_search_regex(
r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
duration_s = self._html_search_regex(
r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
duration = float(duration_s) if duration_s else None
thumbnail = self._html_search_regex(
r"thumbnail\s*:\s*'([^']*)'",
webpage, u'thumbnail', fatal=False)
info = {
'_type': 'video',
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': duration,
'formats': formats,
}
# TODO: Remove when #980 has been merged
info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
info.update(info['formats'][-1])
return info

View File

@ -1,4 +1,3 @@
import datetime
import itertools
import json
import re
@ -6,86 +5,89 @@ import re
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
compat_urlparse,
determine_ext,
clean_html,
)
class YahooIE(InfoExtractor):
IE_DESC = u'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_TEST = {
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.flv',
u'md5': u'2e717f169c1be93d84d3794a00d4a325',
u'info_dict': {
u"title": u"Julian Smith & Travis Legg Watch Julian Smith"
_TESTS = [
{
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.flv',
u'info_dict': {
u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
u'description': u'Julian and Travis watch Julian Smith',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
},
u'skip': u'Requires rtmpdump'
}
{
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
u'file': u'103000935.flv',
u'info_dict': {
u'title': u'Codefellas - The Cougar Lies with Spanish Moss',
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage)
if m_id is None:
# TODO: Check which url parameters are required
info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage')
info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
<description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
<media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.*
<media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB"
'''
self.report_extraction(video_id)
m_info = re.search(info_re, webpage, re.VERBOSE|re.DOTALL)
if m_info is None:
raise ExtractorError(u'Unable to extract video info')
video_title = m_info.group('title')
video_description = m_info.group('description')
video_thumb = m_info.group('thumb')
video_date = m_info.group('date')
video_date = datetime.datetime.strptime(video_date, '%m/%d/%Y').strftime('%Y%m%d')
# TODO: Find a way to get mp4 videos
rest_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
webpage = self._download_webpage(rest_url, video_id, u'Downloading video url webpage')
m_rest = re.search(r'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage)
video_url = m_rest.group('url')
video_path = m_rest.group('path')
if m_rest is None:
raise ExtractorError(u'Unable to extract video url')
items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
webpage, u'items', flags=re.MULTILINE)
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
meta = info['meta']
else: # We have to use a different method if another id is defined
long_id = m_id.group('new_id')
info_url = 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id + '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335'
webpage = self._download_webpage(info_url, video_id, u'Downloading info json')
json_str = re.search(r'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage).group(1)
info = json.loads(json_str)
res = info[u'query'][u'results'][u'mediaObj'][0]
stream = res[u'streams'][0]
video_path = stream[u'path']
video_url = stream[u'host']
meta = res[u'meta']
video_title = meta[u'title']
video_description = meta[u'description']
video_thumb = meta[u'thumbnail']
video_date = None # I can't find it
formats = []
for s in info['streams']:
format_info = {
'width': s.get('width'),
'height': s.get('height'),
'bitrate': s.get('bitrate'),
}
host = s['host']
path = s['path']
if host.startswith('rtmp'):
format_info.update({
'url': host,
'play_path': path,
'ext': 'flv',
})
else:
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
format_info['ext'] = determine_ext(format_url)
formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
info = {
'id': video_id,
'title': meta['title'],
'formats': formats,
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info
info_dict = {
'id': video_id,
'url': video_url,
'play_path': video_path,
'title':video_title,
'description': video_description,
'thumbnail': video_thumb,
'upload_date': video_date,
'ext': 'flv',
}
return info_dict
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = u'Yahoo screen search'

View File

@ -66,6 +66,12 @@ class YoukuIE(InfoExtractor):
self.report_extraction(video_id)
try:
config = json.loads(jsondata)
error_code = config['data'][0].get('error_code')
if error_code:
# -8 means blocked outside China.
error = config['data'][0].get('error') # Chinese and English, separated by newline.
raise ExtractorError(error or u'Server reported error %i' % error_code,
expected=True)
video_title = config['data'][0]['title']
seed = config['data'][0]['seed']
@ -89,6 +95,7 @@ class YoukuIE(InfoExtractor):
fileid = config['data'][0]['streamfileids'][format]
keys = [s['k'] for s in config['data'][0]['segs'][format]]
# segs is usually a dictionary, but an empty *list* if an error occured.
except (UnicodeDecodeError, ValueError, KeyError):
raise ExtractorError(u'Unable to extract info section')

View File

@ -1,28 +1,39 @@
# coding: utf-8
import collections
import errno
import io
import itertools
import json
import netrc
import os.path
import re
import socket
import itertools
import string
import struct
import traceback
import xml.etree.ElementTree
import zlib
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_chr,
compat_http_client,
compat_parse_qs,
compat_urllib_error,
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_str,
clean_html,
get_cachedir,
get_element_by_id,
ExtractorError,
unescapeHTML,
unified_strdate,
orderedSet,
write_json_file,
)
class YoutubeBaseInfoExtractor(InfoExtractor):
@ -352,7 +363,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"info_dict": {
u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
u"description": u"md5:5b292926389560516e384ac437c0ec07",
u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop"
}
@ -369,21 +380,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader_id": u"justintimberlakeVEVO"
}
},
{
u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
u'file': u'TGi3HqYrWHE.mp4',
u'note': u'm3u8 video',
u'info_dict': {
u'title': u'Triathlon - Men - London 2012 Olympic Games',
u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
u'uploader': u'olympic',
u'upload_date': u'20120807',
u'uploader_id': u'olympic',
},
u'params': {
u'skip_download': True,
},
},
]
@ -393,6 +389,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if YoutubePlaylistIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs)
self._player_cache = {}
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self.to_screen(u'%s: Downloading video webpage' % video_id)
@ -413,9 +413,658 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected')
def _decrypt_signature(self, s):
def _extract_signature_function(self, video_id, player_url, slen):
id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
player_url)
player_type = id_m.group('ext')
player_id = id_m.group('id')
# Read from filesystem cache
func_id = '%s_%s_%d' % (player_type, player_id, slen)
assert os.path.basename(func_id) == func_id
cache_dir = get_cachedir(self._downloader.params)
cache_enabled = cache_dir is not None
if cache_enabled:
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
func_id + '.json')
try:
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
cache_spec = json.load(cachef)
return lambda s: u''.join(s[i] for i in cache_spec)
except IOError:
pass # No cache available
if player_type == 'js':
code = self._download_webpage(
player_url, video_id,
note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url)
res = self._parse_sig_js(code)
elif player_type == 'swf':
urlh = self._request_webpage(
player_url, video_id,
note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url)
code = urlh.read()
res = self._parse_sig_swf(code)
else:
assert False, 'Invalid player type %r' % player_type
if cache_enabled:
try:
test_string = u''.join(map(compat_chr, range(slen)))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
try:
os.makedirs(os.path.dirname(cache_fn))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file(cache_spec, cache_fn)
except Exception:
tb = traceback.format_exc()
self._downloader.report_warning(
u'Writing cache to %r failed: %s' % (cache_fn, tb))
return res
def _print_sig_code(self, func, slen):
def gen_sig_code(idxs):
def _genslice(start, end, step):
starts = u'' if start == 0 else str(start)
ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
steps = u'' if step == 1 else (u':%d' % step)
return u's[%s%s%s]' % (starts, ends, steps)
step = None
start = '(Never used)' # Quelch pyflakes warnings - start will be
# set as soon as step is set
for i, prev in zip(idxs[1:], idxs[:-1]):
if step is not None:
if i - prev == step:
continue
yield _genslice(start, prev, step)
step = None
continue
if i - prev in [-1, 1]:
step = i - prev
start = prev
continue
else:
yield u's[%d]' % prev
if step is None:
yield u's[%d]' % i
else:
yield _genslice(start, i, step)
test_string = u''.join(map(compat_chr, range(slen)))
cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = u' + '.join(gen_sig_code(cache_spec))
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
self.to_screen(u'Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
r'signature=([a-zA-Z]+)', jscode,
u'Initial JS player signature function name')
functions = {}
def argidx(varname):
return string.lowercase.index(varname)
def interpret_statement(stmt, local_vars, allow_recursion=20):
if allow_recursion < 0:
raise ExtractorError(u'Recursion limit reached')
if stmt.startswith(u'var '):
stmt = stmt[len(u'var '):]
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
r'=(?P<expr>.*)$', stmt)
if ass_m:
if ass_m.groupdict().get('index'):
def assign(val):
lvar = local_vars[ass_m.group('out')]
idx = interpret_expression(ass_m.group('index'),
local_vars, allow_recursion)
assert isinstance(idx, int)
lvar[idx] = val
return val
expr = ass_m.group('expr')
else:
def assign(val):
local_vars[ass_m.group('out')] = val
return val
expr = ass_m.group('expr')
elif stmt.startswith(u'return '):
assign = lambda v: v
expr = stmt[len(u'return '):]
else:
raise ExtractorError(
u'Cannot determine left side of statement in %r' % stmt)
v = interpret_expression(expr, local_vars, allow_recursion)
return assign(v)
def interpret_expression(expr, local_vars, allow_recursion):
if expr.isdigit():
return int(expr)
if expr.isalpha():
return local_vars[expr]
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
if m:
member = m.group('member')
val = local_vars[m.group('in')]
if member == 'split("")':
return list(val)
if member == 'join("")':
return u''.join(val)
if member == 'length':
return len(val)
if member == 'reverse()':
return val[::-1]
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
if slice_m:
idx = interpret_expression(
slice_m.group('idx'), local_vars, allow_recursion-1)
return val[idx:]
m = re.match(
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
if m:
val = local_vars[m.group('in')]
idx = interpret_expression(m.group('idx'), local_vars,
allow_recursion-1)
return val[idx]
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
if m:
a = interpret_expression(m.group('a'),
local_vars, allow_recursion)
b = interpret_expression(m.group('b'),
local_vars, allow_recursion)
return a % b
m = re.match(
r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr)
if m:
fname = m.group('func')
if fname not in functions:
functions[fname] = extract_function(fname)
argvals = [int(v) if v.isdigit() else local_vars[v]
for v in m.group('args').split(',')]
return functions[fname](argvals)
raise ExtractorError(u'Unsupported JS expression %r' % expr)
def extract_function(funcname):
func_m = re.search(
r'function ' + re.escape(funcname) +
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
jscode)
argnames = func_m.group('args').split(',')
def resf(args):
local_vars = dict(zip(argnames, args))
for stmt in func_m.group('code').split(';'):
res = interpret_statement(stmt, local_vars)
return res
return resf
initial_function = extract_function(funcname)
return lambda s: initial_function([s])
def _parse_sig_swf(self, file_contents):
if file_contents[1:3] != b'WS':
raise ExtractorError(
u'Not an SWF file; header is %r' % file_contents[:3])
if file_contents[:1] == b'C':
content = zlib.decompress(file_contents[8:])
else:
raise NotImplementedError(u'Unsupported compression format %r' %
file_contents[:1])
def extract_tags(content):
pos = 0
while pos < len(content):
header16 = struct.unpack('<H', content[pos:pos+2])[0]
pos += 2
tag_code = header16 >> 6
tag_len = header16 & 0x3f
if tag_len == 0x3f:
tag_len = struct.unpack('<I', content[pos:pos+4])[0]
pos += 4
assert pos+tag_len <= len(content)
yield (tag_code, content[pos:pos+tag_len])
pos += tag_len
code_tag = next(tag
for tag_code, tag in extract_tags(content)
if tag_code == 82)
p = code_tag.index(b'\0', 4) + 1
code_reader = io.BytesIO(code_tag[p:])
# Parse ABC (AVM2 ByteCode)
def read_int(reader=None):
if reader is None:
reader = code_reader
res = 0
shift = 0
for _ in range(5):
buf = reader.read(1)
assert len(buf) == 1
b = struct.unpack('<B', buf)[0]
res = res | ((b & 0x7f) << shift)
if b & 0x80 == 0:
break
shift += 7
return res
def u30(reader=None):
res = read_int(reader)
assert res & 0xf0000000 == 0
return res
u32 = read_int
def s32(reader=None):
v = read_int(reader)
if v & 0x80000000 != 0:
v = - ((v ^ 0xffffffff) + 1)
return v
def read_string(reader=None):
if reader is None:
reader = code_reader
slen = u30(reader)
resb = reader.read(slen)
assert len(resb) == slen
return resb.decode('utf-8')
def read_bytes(count, reader=None):
if reader is None:
reader = code_reader
resb = reader.read(count)
assert len(resb) == count
return resb
def read_byte(reader=None):
resb = read_bytes(1, reader=reader)
res = struct.unpack('<B', resb)[0]
return res
# minor_version + major_version
read_bytes(2 + 2)
# Constant pool
int_count = u30()
for _c in range(1, int_count):
s32()
uint_count = u30()
for _c in range(1, uint_count):
u32()
double_count = u30()
read_bytes((double_count-1) * 8)
string_count = u30()
constant_strings = [u'']
for _c in range(1, string_count):
s = read_string()
constant_strings.append(s)
namespace_count = u30()
for _c in range(1, namespace_count):
read_bytes(1) # kind
u30() # name
ns_set_count = u30()
for _c in range(1, ns_set_count):
count = u30()
for _c2 in range(count):
u30()
multiname_count = u30()
MULTINAME_SIZES = {
0x07: 2, # QName
0x0d: 2, # QNameA
0x0f: 1, # RTQName
0x10: 1, # RTQNameA
0x11: 0, # RTQNameL
0x12: 0, # RTQNameLA
0x09: 2, # Multiname
0x0e: 2, # MultinameA
0x1b: 1, # MultinameL
0x1c: 1, # MultinameLA
}
multinames = [u'']
for _c in range(1, multiname_count):
kind = u30()
assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
if kind == 0x07:
u30() # namespace_idx
name_idx = u30()
multinames.append(constant_strings[name_idx])
else:
multinames.append('[MULTINAME kind: %d]' % kind)
for _c2 in range(MULTINAME_SIZES[kind]):
u30()
# Methods
method_count = u30()
MethodInfo = collections.namedtuple(
'MethodInfo',
['NEED_ARGUMENTS', 'NEED_REST'])
method_infos = []
for method_id in range(method_count):
param_count = u30()
u30() # return type
for _ in range(param_count):
u30() # param type
u30() # name index (always 0 for youtube)
flags = read_byte()
if flags & 0x08 != 0:
# Options present
option_count = u30()
for c in range(option_count):
u30() # val
read_bytes(1) # kind
if flags & 0x80 != 0:
# Param names present
for _ in range(param_count):
u30() # param name
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
method_infos.append(mi)
# Metadata
metadata_count = u30()
for _c in range(metadata_count):
u30() # name
item_count = u30()
for _c2 in range(item_count):
u30() # key
u30() # value
def parse_traits_info():
trait_name_idx = u30()
kind_full = read_byte()
kind = kind_full & 0x0f
attrs = kind_full >> 4
methods = {}
if kind in [0x00, 0x06]: # Slot or Const
u30() # Slot id
u30() # type_name_idx
vindex = u30()
if vindex != 0:
read_byte() # vkind
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
u30() # disp_id
method_idx = u30()
methods[multinames[trait_name_idx]] = method_idx
elif kind == 0x04: # Class
u30() # slot_id
u30() # classi
elif kind == 0x05: # Function
u30() # slot_id
function_idx = u30()
methods[function_idx] = multinames[trait_name_idx]
else:
raise ExtractorError(u'Unsupported trait kind %d' % kind)
if attrs & 0x4 != 0: # Metadata present
metadata_count = u30()
for _c3 in range(metadata_count):
u30() # metadata index
return methods
# Classes
TARGET_CLASSNAME = u'SignatureDecipher'
searched_idx = multinames.index(TARGET_CLASSNAME)
searched_class_id = None
class_count = u30()
for class_id in range(class_count):
name_idx = u30()
if name_idx == searched_idx:
# We found the class we're looking for!
searched_class_id = class_id
u30() # super_name idx
flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present
u30() # protected_ns_idx
intrf_count = u30()
for _c2 in range(intrf_count):
u30()
u30() # iinit
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
if searched_class_id is None:
raise ExtractorError(u'Target class %r not found' %
TARGET_CLASSNAME)
method_names = {}
method_idxs = {}
for class_id in range(class_count):
u30() # cinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
if class_id == searched_class_id:
method_names.update(trait_methods.items())
method_idxs.update(dict(
(idx, name)
for name, idx in trait_methods.items()))
# Scripts
script_count = u30()
for _c in range(script_count):
u30() # init
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
# Method bodies
method_body_count = u30()
Method = collections.namedtuple('Method', ['code', 'local_count'])
methods = {}
for _c in range(method_body_count):
method_idx = u30()
u30() # max_stack
local_count = u30()
u30() # init_scope_depth
u30() # max_scope_depth
code_length = u30()
code = read_bytes(code_length)
if method_idx in method_idxs:
m = Method(code, local_count)
methods[method_idxs[method_idx]] = m
exception_count = u30()
for _c2 in range(exception_count):
u30() # from
u30() # to
u30() # target
u30() # exc_type
u30() # var_name
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
assert p + code_reader.tell() == len(code_tag)
assert len(methods) == len(method_idxs)
method_pyfunctions = {}
def extract_function(func_name):
if func_name in method_pyfunctions:
return method_pyfunctions[func_name]
if func_name not in methods:
raise ExtractorError(u'Cannot find function %r' % func_name)
m = methods[func_name]
def resfunc(args):
registers = ['(this)'] + list(args) + [None] * m.local_count
stack = []
coder = io.BytesIO(m.code)
while True:
opcode = struct.unpack('!B', coder.read(1))[0]
if opcode == 36: # pushbyte
v = struct.unpack('!B', coder.read(1))[0]
stack.append(v)
elif opcode == 44: # pushstring
idx = u30(coder)
stack.append(constant_strings[idx])
elif opcode == 48: # pushscope
# We don't implement the scope register, so we'll just
# ignore the popped value
stack.pop()
elif opcode == 70: # callproperty
index = u30(coder)
mname = multinames[index]
arg_count = u30(coder)
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == u'split':
assert len(args) == 1
assert isinstance(args[0], compat_str)
assert isinstance(obj, compat_str)
if args[0] == u'':
res = list(obj)
else:
res = obj.split(args[0])
stack.append(res)
elif mname == u'slice':
assert len(args) == 1
assert isinstance(args[0], int)
assert isinstance(obj, list)
res = obj[args[0]:]
stack.append(res)
elif mname == u'join':
assert len(args) == 1
assert isinstance(args[0], compat_str)
assert isinstance(obj, list)
res = args[0].join(obj)
stack.append(res)
elif mname in method_pyfunctions:
stack.append(method_pyfunctions[mname](args))
else:
raise NotImplementedError(
u'Unsupported property %r on %r'
% (mname, obj))
elif opcode == 72: # returnvalue
res = stack.pop()
return res
elif opcode == 79: # callpropvoid
index = u30(coder)
mname = multinames[index]
arg_count = u30(coder)
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == u'reverse':
assert isinstance(obj, list)
obj.reverse()
else:
raise NotImplementedError(
u'Unsupported (void) property %r on %r'
% (mname, obj))
elif opcode == 93: # findpropstrict
index = u30(coder)
mname = multinames[index]
res = extract_function(mname)
stack.append(res)
elif opcode == 97: # setproperty
index = u30(coder)
value = stack.pop()
idx = stack.pop()
obj = stack.pop()
assert isinstance(obj, list)
assert isinstance(idx, int)
obj[idx] = value
elif opcode == 98: # getlocal
index = u30(coder)
stack.append(registers[index])
elif opcode == 99: # setlocal
index = u30(coder)
value = stack.pop()
registers[index] = value
elif opcode == 102: # getproperty
index = u30(coder)
pname = multinames[index]
if pname == u'length':
obj = stack.pop()
assert isinstance(obj, list)
stack.append(len(obj))
else: # Assume attribute access
idx = stack.pop()
assert isinstance(idx, int)
obj = stack.pop()
assert isinstance(obj, list)
stack.append(obj[idx])
elif opcode == 128: # coerce
u30(coder)
elif opcode == 133: # coerce_s
assert isinstance(stack[-1], (type(None), compat_str))
elif opcode == 164: # modulo
value2 = stack.pop()
value1 = stack.pop()
res = value1 % value2
stack.append(res)
elif opcode == 208: # getlocal_0
stack.append(registers[0])
elif opcode == 209: # getlocal_1
stack.append(registers[1])
elif opcode == 210: # getlocal_2
stack.append(registers[2])
elif opcode == 211: # getlocal_3
stack.append(registers[3])
elif opcode == 214: # setlocal_2
registers[2] = stack.pop()
elif opcode == 215: # setlocal_3
registers[3] = stack.pop()
else:
raise NotImplementedError(
u'Unsupported opcode %d' % opcode)
method_pyfunctions[func_name] = resfunc
return resfunc
initial_function = extract_function(u'decipher')
return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature"""
if player_url is not None:
try:
player_id = (player_url, len(s))
if player_id not in self._player_cache:
func = self._extract_signature_function(
video_id, player_url, len(s)
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
if self._downloader.params.get('youtube_print_sig_code'):
self._print_sig_code(func, len(s))
return func(s)
except Exception:
tb = traceback.format_exc()
self._downloader.report_warning(
u'Automatic signature extraction failed: ' + tb)
self._downloader.report_warning(
u'Warning: Falling back to static signature algorithm')
return self._static_decrypt_signature(
s, video_id, player_url, age_gate)
def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
if age_gate:
# The videos with age protection use another player, so the
# algorithms can be different.
if len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
if len(s) == 93:
return s[86:29:-1] + s[88] + s[28:5:-1]
elif len(s) == 92:
@ -431,15 +1080,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 87:
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86:
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
elif len(s) == 85:
return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
elif len(s) == 84:
return s[81:36:-1] + s[0] + s[35:2:-1]
return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
elif len(s) == 82:
return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37]
elif len(s) == 81:
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
elif len(s) == 80:
@ -450,15 +1099,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
def _decrypt_signature_age_gate(self, s):
# The videos with age protection use another player, so the algorithms
# can be different.
if len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
else:
# Fallback to the other algortihms
return self._decrypt_signature(s)
def _get_available_subtitles(self, video_id):
try:
sub_list = self._download_webpage(
@ -631,7 +1271,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
# Attempt to extract SWF player URL
mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
if mobj is not None:
player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
else:
@ -695,9 +1335,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self._downloader.report_warning(u'unable to extract uploader nickname')
# title
if 'title' not in video_info:
raise ExtractorError(u'Unable to extract video title')
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
if 'title' in video_info:
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
else:
self._downloader.report_warning(u'Unable to extract video title')
video_title = u'_'
# thumbnail image
# We try first to get a high quality image:
@ -707,7 +1349,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_thumbnail = m_thumb.group(1)
elif 'thumbnail_url' not in video_info:
self._downloader.report_warning(u'unable to extract video thumbnail')
video_thumbnail = ''
video_thumbnail = None
else: # don't panic if we can't find it
video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
@ -752,6 +1394,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
args = info['args']
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
# this signatures are encrypted
if 'url_encoded_fmt_stream_map' not in args:
raise ValueError(u'No stream_map present') # caught below
m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
if m_s is not None:
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
@ -784,21 +1428,34 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
if self._downloader.params.get('verbose'):
s = url_data['s'][0]
if age_gate:
player = 'flash player'
else:
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(s), parts_sizes, url_data['itag'][0], player))
encrypted_sig = url_data['s'][0]
if age_gate:
signature = self._decrypt_signature_age_gate(encrypted_sig)
else:
signature = self._decrypt_signature(encrypted_sig)
if self._downloader.params.get('verbose'):
if age_gate:
if player_url is None:
player_version = 'unknown'
else:
player_version = self._search_regex(
r'-(.+)\.swf$', player_url,
u'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
if not age_gate:
jsplayer_url_json = self._search_regex(
r'"assets":.+?"js":\s*("[^"]+")',
video_webpage, u'JS player URL')
player_url = json.loads(jsplayer_url_json)
signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
url += '&signature=' + signature
if 'ratebypass' not in url:
url += '&ratebypass=yes'
@ -814,7 +1471,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return
else:
raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
results = []
for format_param, video_real_url in video_url_list:
@ -872,9 +1529,19 @@ class YoutubePlaylistIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group(1) or mobj.group(2)
# Check if it's a video-specific URL
query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
if 'v' in query_dict:
video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
# Download playlist videos from API
playlist_id = mobj.group(1) or mobj.group(2)
videos = []
for page_num in itertools.count(1):

View File

@ -2,16 +2,14 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
unescapeHTML,
)
class ZDFIE(InfoExtractor):
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
_TITLE = r'<h1(?: class="beitragHeadline")?>(?P<title>.*)</h1>'
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
_MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
_MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
_RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@ -19,6 +17,9 @@ class ZDFIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('video_id')
if mobj.group('hash'):
url = url.replace(u'#', u'', 1)
html = self._download_webpage(url, video_id)
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
if streams is None:
@ -27,39 +28,48 @@ class ZDFIE(InfoExtractor):
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
# choose first/default media type and highest quality for now
for s in streams: #find 300 - dsl1000mbit
if s['quality'] == '300' and s['media_type'] == 'wstreaming':
stream_=s
break
for s in streams: #find veryhigh - dsl2000mbit
if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working
stream_=s
break
if stream_ is None:
def stream_pref(s):
TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
try:
type_pref = TYPE_ORDER.index(s['media_type'])
except ValueError:
type_pref = 999
QUALITY_ORDER = ['veryhigh', '300']
try:
quality_pref = QUALITY_ORDER.index(s['quality'])
except ValueError:
quality_pref = 999
return (type_pref, quality_pref)
sorted_streams = sorted(streams, key=stream_pref)
if not sorted_streams:
raise ExtractorError(u'No stream found.')
stream = sorted_streams[0]
media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL')
media_link = self._download_webpage(
stream['video_url'],
video_id,
u'Get stream URL')
self.report_extraction(video_id)
mobj = re.search(self._TITLE, html)
MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
mobj = re.search(self._MEDIA_STREAM, media_link)
if mobj is None:
raise ExtractorError(u'Cannot extract title')
title = unescapeHTML(mobj.group('title'))
mobj = re.search(self._MMS_STREAM, media_link)
if mobj is None:
mobj = re.search(self._RTSP_STREAM, media_link)
mobj = re.search(RTSP_STREAM, media_link)
if mobj is None:
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
mms_url = mobj.group('video_url')
video_url = mobj.group('video_url')
mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url)
if mobj is None:
raise ExtractorError(u'Cannot extract extention')
ext = mobj.group('ext')
title = self._html_search_regex(
r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
html, u'title')
return [{'id': video_id,
'url': mms_url,
'title': title,
'ext': ext
}]
return {
'id': video_id,
'url': video_url,
'title': title,
'ext': determine_ext(video_url)
}

View File

@ -1,6 +1,9 @@
import io
import json
import traceback
import hashlib
import subprocess
import sys
from zipimport import zipimporter
from .utils import *
@ -34,7 +37,7 @@ def rsa_verify(message, signature, key):
if signature != sha256(message).digest(): return False
return True
def update_self(to_screen, verbose, filename):
def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository"""
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
@ -42,7 +45,6 @@ def update_self(to_screen, verbose, filename):
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
return
@ -75,11 +77,18 @@ def update_self(to_screen, verbose, filename):
to_screen(u'ERROR: the versions file signature is invalid. Aborting.')
return
to_screen(u'Updating to version ' + versions_info['latest'] + '...')
version = versions_info['versions'][versions_info['latest']]
version_id = versions_info['latest']
to_screen(u'Updating to version ' + version_id + '...')
version = versions_info['versions'][version_id]
print_notes(to_screen, versions_info['versions'])
filename = sys.argv[0]
# Py2EXE: Filename could be different
if hasattr(sys, "frozen") and not os.path.isfile(filename):
if os.path.isfile(filename + u'.exe'):
filename += u'.exe'
if not os.access(filename, os.W_OK):
to_screen(u'ERROR: no write permissions on %s' % filename)
return
@ -116,16 +125,18 @@ def update_self(to_screen, verbose, filename):
try:
bat = os.path.join(directory, 'youtube-dl-updater.bat')
b = open(bat, 'w')
b.write("""
echo Updating youtube-dl...
with io.open(bat, 'w') as batfile:
batfile.write(u"""
@echo off
echo Waiting for file handle to be closed ...
ping 127.0.0.1 -n 5 -w 1000 > NUL
move /Y "%s.new" "%s"
del "%s"
\n""" %(exe, exe, bat))
b.close()
move /Y "%s.new" "%s" > NUL
echo Updated youtube-dl to version %s.
start /b "" cmd /c del "%%~f0"&exit /b"
\n""" % (exe, exe, version_id))
os.startfile(bat)
subprocess.Popen([bat]) # Continues to run in the background
return # Do not show premature success messages
except (IOError, OSError) as err:
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')

View File

@ -66,6 +66,12 @@ try:
except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError
try:
from urllib.request import urlretrieve as compat_urlretrieve
except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve
try:
from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL
@ -169,7 +175,7 @@ def compat_ord(c):
compiled_regex_type = type(re.compile(''))
std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
@ -818,3 +824,9 @@ def intlist_to_bytes(xs):
return ''.join([chr(x) for x in xs])
else:
return bytes(xs)
def get_cachedir(params={}):
cache_root = os.environ.get('XDG_CACHE_HOME',
os.path.expanduser('~/.cache'))
return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))

View File

@ -1,2 +1,2 @@
__version__ = '2013.09.20.1'
__version__ = '2013.10.06'