Compare commits
135 Commits
2014.07.23
...
2014.08.21
Author | SHA1 | Date | |
---|---|---|---|
|
3b88ee9a7d | ||
|
55c49908d2 | ||
|
db9b0b67b7 | ||
|
35f76e0061 | ||
|
3f338cd6de | ||
|
1d01f26ab1 | ||
|
266c71f971 | ||
|
e8ee972c6e | ||
|
f83dda12ad | ||
|
696d49815e | ||
|
fe556f1b0c | ||
|
d5638d974f | ||
|
938dd254e5 | ||
|
6493f5d704 | ||
|
cd6b48365e | ||
|
4d9bd478f9 | ||
|
c1d293cfa6 | ||
|
49807b4ac6 | ||
|
c990bb3633 | ||
|
af8322d2f9 | ||
|
df866e7f2a | ||
|
664718ff63 | ||
|
3258263371 | ||
|
3cfafc4a9b | ||
|
6f600ff5d6 | ||
|
90e075da3a | ||
|
9572013de9 | ||
|
3a5beb0ca1 | ||
|
a6da7b6b96 | ||
|
173a7026d5 | ||
|
40a90862f4 | ||
|
511c4325dc | ||
|
85a699246a | ||
|
4dc5286e13 | ||
|
c767dc74b8 | ||
|
56ca04f662 | ||
|
eb3680123a | ||
|
f5273890ee | ||
|
c7a088a816 | ||
|
fb17b60811 | ||
|
1e58804260 | ||
|
31bf213032 | ||
|
1cccc41ddc | ||
|
a91cf27767 | ||
|
64d02399d8 | ||
|
5961017202 | ||
|
d9760fd43c | ||
|
d42b2d2985 | ||
|
cccfab6412 | ||
|
4665664c92 | ||
|
0adc996bc3 | ||
|
b42a2a720b | ||
|
37edd7dd4a | ||
|
f87b3500c5 | ||
|
66420a2db4 | ||
|
6b8492a782 | ||
|
6de0595eb8 | ||
|
e48a2c646d | ||
|
0f831a1a92 | ||
|
3e510af38d | ||
|
5ecd7b0a92 | ||
|
a229909fa6 | ||
|
548f31d99c | ||
|
78b296b0ff | ||
|
be79b07907 | ||
|
5537dce84d | ||
|
493987fefe | ||
|
c97797a737 | ||
|
8d7d9d3452 | ||
|
7a5e7b303c | ||
|
61aabb9d70 | ||
|
62af3a0eb5 | ||
|
60064c53f1 | ||
|
98eb1c3fa2 | ||
|
201e9eaa0e | ||
|
9afa6ede21 | ||
|
f4776371ae | ||
|
328a20bf9c | ||
|
5622f29ae4 | ||
|
b4f23afbd1 | ||
|
0138968a6a | ||
|
4f31d0f2b7 | ||
|
bff74bdd1a | ||
|
10b04ff7f4 | ||
|
1f7ccb9014 | ||
|
c7b3209668 | ||
|
895ba7d1dd | ||
|
a2a1b0baa2 | ||
|
8646eb790e | ||
|
f036a6328e | ||
|
31bb8d3f51 | ||
|
4958ae2058 | ||
|
7e8d73c183 | ||
|
65bc504db8 | ||
|
0fc74a0d91 | ||
|
8d2cc6fbb1 | ||
|
a954584f63 | ||
|
cb3ff6fb01 | ||
|
71aa656d13 | ||
|
366b1f3cfe | ||
|
64ce58db38 | ||
|
11b85ce62e | ||
|
1220352ff7 | ||
|
8f3034d871 | ||
|
7fa547ab02 | ||
|
3182f3e2dc | ||
|
cbf915f3f6 | ||
|
b490b8849a | ||
|
5d2519e5bf | ||
|
c3415d1bac | ||
|
36f3542883 | ||
|
4cb71e9b6a | ||
|
4bc7009e8a | ||
|
16f8e9df8a | ||
|
b081cebefa | ||
|
916c145217 | ||
|
4192b51c7c | ||
|
052421ff09 | ||
|
4e99f48817 | ||
|
a11165ecc6 | ||
|
fbb2fc5580 | ||
|
2fe3d240cc | ||
|
42f4dcfe41 | ||
|
892e3192fb | ||
|
7272eab9d0 | ||
|
ebe832dc37 | ||
|
825abb8175 | ||
|
8944ec0109 | ||
|
c084c93402 | ||
|
d799b47b82 | ||
|
b7f8116406 | ||
|
6db274e057 | ||
|
0c92b57398 | ||
|
becafcbf0f | ||
|
92a86f4c1a |
8
Makefile
8
Makefile
@@ -6,10 +6,10 @@ clean:
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
|
||||
PREFIX=/usr/local
|
||||
BINDIR=$(PREFIX)/bin
|
||||
MANDIR=$(PREFIX)/man
|
||||
PYTHON=/usr/bin/env python
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
MANDIR ?= $(PREFIX)/man
|
||||
PYTHON ?= /usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
ifeq ($(PREFIX),/usr)
|
||||
|
71
README.md
71
README.md
@@ -38,12 +38,6 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
playlist or the command line) if an error
|
||||
occurs
|
||||
--dump-user-agent display the current browser identification
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--list-extractors List all supported extractors and the URLs
|
||||
they would handle
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
@@ -51,35 +45,22 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--socket-timeout None Time to wait before giving up, in seconds
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
example "gvsearch2:" downloads two videos
|
||||
from google videos for youtube-dl "large
|
||||
apple". Use the value "auto" to let
|
||||
youtube-dl guess. The default value "error"
|
||||
just throws an error.
|
||||
youtube-dl guess ("auto_warning" to emit a
|
||||
warning when guessing). "error" just throws
|
||||
an error. The default value "fixup_error"
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: do not read the user
|
||||
configuration in ~/.config/youtube-dl.conf
|
||||
(%APPDATA%/youtube-dl/config.txt on
|
||||
Windows)
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
@@ -125,9 +106,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
of SIZE.
|
||||
|
||||
## Filesystem Options:
|
||||
-t, --title use title in file name (default)
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
--id use only video ID in file name
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-A, --auto-number number downloaded files starting from 00000
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to
|
||||
get the title, %(uploader)s for the
|
||||
@@ -160,18 +141,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||
stdin)
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
-t, --title [deprecated] use title in file name
|
||||
(default)
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue force resume of partially downloaded files.
|
||||
By default, youtube-dl will resume
|
||||
downloads if possible.
|
||||
--no-continue do not resume partially downloaded files
|
||||
(restart from beginning)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
jar in
|
||||
--no-part do not use .part files
|
||||
--no-mtime do not use the Last-modified header to set
|
||||
the file modification time
|
||||
@@ -181,6 +159,19 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--write-annotations write video annotations to a .annotation
|
||||
file
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
--load-info FILE json file containing the video information
|
||||
(created with the "--write-json" option)
|
||||
--cookies FILE file to read cookies from and dump cookie
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
@@ -210,6 +201,22 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer REF specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: "-f 22/17/18".
|
||||
|
@@ -117,8 +117,9 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||
if got_dict.get('_type') != 'playlist':
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
||||
|
@@ -15,7 +15,6 @@ from youtube_dl.extractor import (
|
||||
FacebookIE,
|
||||
gen_extractors,
|
||||
JustinTVIE,
|
||||
PBSIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
@@ -100,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
|
||||
def test_facebook_matching(self):
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
|
@@ -10,7 +10,6 @@ from test.helper import (
|
||||
get_params,
|
||||
gettestcases,
|
||||
expect_info_dict,
|
||||
md5,
|
||||
try_rm,
|
||||
report_warning,
|
||||
)
|
||||
@@ -24,7 +23,6 @@ import socket
|
||||
import youtube_dl.YoutubeDL
|
||||
from youtube_dl.utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_HTTPError,
|
||||
DownloadError,
|
||||
@@ -65,15 +63,21 @@ def generator(test_case):
|
||||
def test_template(self):
|
||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||
is_playlist = any(k.startswith('playlist') for k in test_case)
|
||||
test_cases = test_case.get(
|
||||
'playlist', [] if is_playlist else [test_case])
|
||||
|
||||
def print_skipping(reason):
|
||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||
if not ie.working():
|
||||
print_skipping('IE marked as not _WORKING')
|
||||
return
|
||||
if 'playlist' not in test_case:
|
||||
info_dict = test_case.get('info_dict', {})
|
||||
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||
|
||||
for tc in test_cases:
|
||||
info_dict = tc.get('info_dict', {})
|
||||
if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
return
|
||||
@@ -83,6 +87,9 @@ def generator(test_case):
|
||||
return
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', True)
|
||||
params.setdefault('skip_download', True)
|
||||
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_default_info_extractors()
|
||||
@@ -95,7 +102,6 @@ def generator(test_case):
|
||||
def get_tc_filename(tc):
|
||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
|
||||
test_cases = test_case.get('playlist', [test_case])
|
||||
def try_rm_tcs_files():
|
||||
for tc in test_cases:
|
||||
tc_filename = get_tc_filename(tc)
|
||||
@@ -107,7 +113,10 @@ def generator(test_case):
|
||||
try_num = 1
|
||||
while True:
|
||||
try:
|
||||
ydl.download([test_case['url']])
|
||||
# We're not using .download here sine that is just a shim
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(test_case['url'])
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
@@ -123,6 +132,17 @@ def generator(test_case):
|
||||
else:
|
||||
break
|
||||
|
||||
if is_playlist:
|
||||
self.assertEqual(res_dict['_type'], 'playlist')
|
||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||
if 'playlist_mincount' in test_case:
|
||||
self.assertGreaterEqual(
|
||||
len(res_dict['entries']),
|
||||
test_case['playlist_mincount'],
|
||||
'Expected at least %d in playlist %s, but got only %d' % (
|
||||
test_case['playlist_mincount'], test_case['url'],
|
||||
len(res_dict['entries'])))
|
||||
|
||||
for tc in test_cases:
|
||||
tc_filename = get_tc_filename(tc)
|
||||
if not test_case.get('params', {}).get('skip_download', False):
|
||||
|
@@ -1,6 +1,17 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
## DEPRECATED FILE!
|
||||
# Add new tests to the extractors themselves, like this:
|
||||
# _TEST = {
|
||||
# 'url': 'http://example.com/playlist/42',
|
||||
# 'playlist_mincount': 99,
|
||||
# 'info_dict': {
|
||||
# 'id': '42',
|
||||
# 'title': 'Playlist number forty-two',
|
||||
# }
|
||||
# }
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
@@ -193,10 +204,10 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_bandcamp_album(self):
|
||||
dl = FakeYDL()
|
||||
ie = BandcampAlbumIE(dl)
|
||||
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
|
||||
result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'Nightmare Night EP')
|
||||
assertGreaterEqual(self, len(result['entries']), 4)
|
||||
self.assertEqual(result['title'], 'Hierophany of the Open Grave')
|
||||
assertGreaterEqual(self, len(result['entries']), 9)
|
||||
|
||||
def test_smotri_community(self):
|
||||
dl = FakeYDL()
|
||||
|
@@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
||||
|
||||
def test_uppercase_escpae(self):
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape(u'aä'), u'aä')
|
||||
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
|
||||
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
@@ -16,52 +18,64 @@ from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||
|
||||
_TESTS = [
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
'js',
|
||||
86,
|
||||
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
'js',
|
||||
85,
|
||||
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
'js',
|
||||
90,
|
||||
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
'js',
|
||||
84,
|
||||
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
u'js',
|
||||
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
'js',
|
||||
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
),
|
||||
(
|
||||
u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
||||
u'swf',
|
||||
'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
||||
'swf',
|
||||
86,
|
||||
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
||||
),
|
||||
(
|
||||
u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
||||
u'swf',
|
||||
u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
||||
u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
||||
'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
||||
'swf',
|
||||
'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
||||
'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
u'js',
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
'js',
|
||||
84,
|
||||
u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
'js',
|
||||
83,
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
)
|
||||
]
|
||||
|
||||
@@ -75,7 +89,7 @@ class TestSignature(unittest.TestCase):
|
||||
|
||||
|
||||
def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
|
||||
assert m, '%r should follow URL format' % url
|
||||
test_id = m.group(1)
|
||||
|
||||
|
@@ -1,12 +0,0 @@
|
||||
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
|
||||
from .downloader import FileDownloader as RealFileDownloader
|
||||
from .downloader import get_suitable_downloader
|
||||
|
||||
|
||||
# This class reproduces the old behaviour of FileDownloader
|
||||
class FileDownloader(RealFileDownloader):
|
||||
def _do_download(self, filename, info_dict):
|
||||
real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
real_fd.add_progress_hook(ph)
|
||||
return real_fd.download(filename, info_dict)
|
@@ -162,6 +162,7 @@ class YoutubeDL(object):
|
||||
default_search: Prepend this string if an input url is not valid.
|
||||
'auto' for elaborate guessing
|
||||
encoding: Use this encoding instead of the system-specified.
|
||||
extract_flat: Do not resolve URLs, return the immediate result.
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
@@ -275,7 +276,7 @@ class YoutubeDL(object):
|
||||
return message
|
||||
|
||||
assert hasattr(self, '_output_process')
|
||||
assert type(message) == type('')
|
||||
assert isinstance(message, compat_str)
|
||||
line_count = message.count('\n') + 1
|
||||
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
||||
self._output_process.stdin.flush()
|
||||
@@ -303,7 +304,7 @@ class YoutubeDL(object):
|
||||
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert type(message) == type('')
|
||||
assert isinstance(message, compat_str)
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].error(message)
|
||||
else:
|
||||
@@ -558,7 +559,12 @@ class YoutubeDL(object):
|
||||
Returns the resolved ie_result.
|
||||
"""
|
||||
|
||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
||||
result_type = ie_result.get('_type', 'video')
|
||||
|
||||
if self.params.get('extract_flat', False):
|
||||
if result_type in ('url', 'url_transparent'):
|
||||
return ie_result
|
||||
|
||||
if result_type == 'video':
|
||||
self.add_extra_info(ie_result, extra_info)
|
||||
return self.process_video_result(ie_result, download=download)
|
||||
@@ -849,7 +855,7 @@ class YoutubeDL(object):
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if not 'format' in info_dict:
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
reason = self._match_entry(info_dict)
|
||||
@@ -999,7 +1005,7 @@ class YoutubeDL(object):
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
downloaded = []
|
||||
success = True
|
||||
merger = FFmpegMergerPP(self)
|
||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||
if not merger._get_executable():
|
||||
postprocessors = []
|
||||
self.report_warning('You have requested multiple '
|
||||
@@ -1234,21 +1240,18 @@ class YoutubeDL(object):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
||||
if type('') is not compat_str:
|
||||
# Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
|
||||
self.report_warning(
|
||||
'Your Python is broken! Update to a newer and supported version')
|
||||
|
||||
encoding_str = (
|
||||
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
|
||||
locale.getpreferredencoding(),
|
||||
sys.getfilesystemencoding(),
|
||||
sys.stdout.encoding,
|
||||
self.get_encoding()))
|
||||
try:
|
||||
write_string(encoding_str, encoding=None)
|
||||
except:
|
||||
errmsg = 'Failed to write encoding string %r' % encoding_str
|
||||
try:
|
||||
sys.stdout.write(errmsg)
|
||||
except:
|
||||
pass
|
||||
raise IOError(errmsg)
|
||||
write_string(encoding_str, encoding=None)
|
||||
|
||||
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||
try:
|
||||
|
@@ -66,6 +66,10 @@ __authors__ = (
|
||||
'Naglis Jonaitis',
|
||||
'Charles Chen',
|
||||
'Hassaan Ali',
|
||||
'Dobrosław Żybort',
|
||||
'David Fabijan',
|
||||
'Sebastian Haas',
|
||||
'Alexander Kirk',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -76,6 +80,7 @@ import optparse
|
||||
import os
|
||||
import random
|
||||
import shlex
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
|
||||
@@ -97,7 +102,7 @@ from .utils import (
|
||||
write_string,
|
||||
)
|
||||
from .update import update_self
|
||||
from .FileDownloader import (
|
||||
from .downloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors
|
||||
@@ -222,6 +227,7 @@ def parseOpts(overrideArguments=None):
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
|
||||
workarounds = optparse.OptionGroup(parser, 'Workarounds')
|
||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||
|
||||
general.add_option('-h', '--help',
|
||||
@@ -238,14 +244,6 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option('--dump-user-agent',
|
||||
action='store_true', dest='dump_user_agent',
|
||||
help='display the current browser identification', default=False)
|
||||
general.add_option('--user-agent',
|
||||
dest='user_agent', help='specify a custom user agent', metavar='UA')
|
||||
general.add_option('--referer',
|
||||
dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
metavar='REF', default=None)
|
||||
general.add_option('--add-header',
|
||||
dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
|
||||
metavar='FIELD:VALUE')
|
||||
general.add_option('--list-extractors',
|
||||
action='store_true', dest='list_extractors',
|
||||
help='List all supported extractors and the URLs they would handle', default=False)
|
||||
@@ -255,33 +253,17 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option(
|
||||
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
general.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
general.add_option(
|
||||
'--socket-timeout', dest='socket_timeout',
|
||||
type=float, default=None, help=u'Time to wait before giving up, in seconds')
|
||||
general.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
general.add_option(
|
||||
'--default-search',
|
||||
dest='default_search', metavar='PREFIX',
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
|
||||
general.add_option(
|
||||
'--ignore-config',
|
||||
action='store_true',
|
||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||
general.add_option(
|
||||
'--encoding', dest='encoding', metavar='ENCODING',
|
||||
help='Force the specified encoding (experimental)')
|
||||
|
||||
selection.add_option(
|
||||
'--playlist-start',
|
||||
@@ -382,6 +364,33 @@ def parseOpts(overrideArguments=None):
|
||||
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
|
||||
downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
|
||||
|
||||
workarounds.add_option(
|
||||
'--encoding', dest='encoding', metavar='ENCODING',
|
||||
help='Force the specified encoding (experimental)')
|
||||
workarounds.add_option(
|
||||
'--no-check-certificate', action='store_true',
|
||||
dest='no_check_certificate', default=False,
|
||||
help='Suppress HTTPS certificate validation.')
|
||||
workarounds.add_option(
|
||||
'--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
workarounds.add_option(
|
||||
'--user-agent', metavar='UA',
|
||||
dest='user_agent', help='specify a custom user agent')
|
||||
workarounds.add_option(
|
||||
'--referer', metavar='REF',
|
||||
dest='referer', default=None,
|
||||
help='specify a custom referer, use if the video access is restricted to one domain',
|
||||
)
|
||||
workarounds.add_option(
|
||||
'--add-header', metavar='FIELD:VALUE',
|
||||
dest='headers', action='append',
|
||||
help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
|
||||
)
|
||||
workarounds.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
|
||||
verbosity.add_option('-q', '--quiet',
|
||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||
verbosity.add_option(
|
||||
@@ -439,12 +448,10 @@ def parseOpts(overrideArguments=None):
|
||||
help='Display sent and read HTTP traffic')
|
||||
|
||||
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
|
||||
filesystem.add_option('-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||
filesystem.add_option('--id',
|
||||
action='store_true', dest='useid', help='use only video ID in file name', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
|
||||
filesystem.add_option('-A', '--auto-number',
|
||||
action='store_true', dest='autonumber',
|
||||
help='number downloaded files starting from 00000', default=False)
|
||||
@@ -470,11 +477,10 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--restrict-filenames',
|
||||
action='store_true', dest='restrictfilenames',
|
||||
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||
filesystem.add_option('-a', '--batch-file',
|
||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||
filesystem.add_option('--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
help='json file containing the video information (created with the "--write-json" option)')
|
||||
filesystem.add_option('-t', '--title',
|
||||
action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
|
||||
filesystem.add_option('-l', '--literal',
|
||||
action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
|
||||
filesystem.add_option('-w', '--no-overwrites',
|
||||
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
|
||||
filesystem.add_option('-c', '--continue',
|
||||
@@ -482,8 +488,6 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--no-continue',
|
||||
action='store_false', dest='continue_dl',
|
||||
help='do not resume partially downloaded files (restart from beginning)')
|
||||
filesystem.add_option('--cookies',
|
||||
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
||||
filesystem.add_option('--no-part',
|
||||
action='store_true', dest='nopart', help='do not use .part files', default=False)
|
||||
filesystem.add_option('--no-mtime',
|
||||
@@ -501,6 +505,20 @@ def parseOpts(overrideArguments=None):
|
||||
filesystem.add_option('--write-thumbnail',
|
||||
action='store_true', dest='writethumbnail',
|
||||
help='write thumbnail image to disk', default=False)
|
||||
filesystem.add_option('--load-info',
|
||||
dest='load_info_filename', metavar='FILE',
|
||||
help='json file containing the video information (created with the "--write-json" option)')
|
||||
filesystem.add_option('--cookies',
|
||||
dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
|
||||
filesystem.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
filesystem.add_option(
|
||||
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
|
||||
help='Disable filesystem caching')
|
||||
filesystem.add_option(
|
||||
'--rm-cache-dir', action='store_true', dest='rm_cachedir',
|
||||
help='Delete all filesystem cache files')
|
||||
|
||||
|
||||
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
|
||||
@@ -534,6 +552,7 @@ def parseOpts(overrideArguments=None):
|
||||
parser.add_option_group(downloader)
|
||||
parser.add_option_group(filesystem)
|
||||
parser.add_option_group(verbosity)
|
||||
parser.add_option_group(workarounds)
|
||||
parser.add_option_group(video_format)
|
||||
parser.add_option_group(subtitles)
|
||||
parser.add_option_group(authentication)
|
||||
@@ -694,7 +713,7 @@ def _real_main(argv=None):
|
||||
date = DateRange.day(opts.date)
|
||||
else:
|
||||
date = DateRange(opts.dateafter, opts.datebefore)
|
||||
if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
|
||||
if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
|
||||
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
||||
|
||||
# Do not download videos when there are audio-only formats
|
||||
@@ -833,9 +852,26 @@ def _real_main(argv=None):
|
||||
if opts.update_self:
|
||||
update_self(ydl.to_screen, opts.verbose)
|
||||
|
||||
# Remove cache dir
|
||||
if opts.rm_cachedir:
|
||||
if opts.cachedir is None:
|
||||
ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
else:
|
||||
if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
|
||||
ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
|
||||
retcode = 141
|
||||
else:
|
||||
ydl.to_screen(
|
||||
u'Removing cache dir %s .' % opts.cachedir,
|
||||
skip_eol=True)
|
||||
if os.path.exists(opts.cachedir):
|
||||
ydl.to_screen(u'.', skip_eol=True)
|
||||
shutil.rmtree(opts.cachedir)
|
||||
ydl.to_screen(u'.')
|
||||
|
||||
# Maybe do nothing
|
||||
if (len(all_urls) < 1) and (opts.load_info_filename is None):
|
||||
if not opts.update_self:
|
||||
if not (opts.update_self or opts.rm_cachedir):
|
||||
parser.error(u'you must provide at least one URL')
|
||||
else:
|
||||
sys.exit()
|
||||
|
@@ -292,7 +292,7 @@ class FileDownloader(object):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
"""Real download process. Redefine in subclasses."""
|
||||
raise NotImplementedError(u'This method must be implemented by sublcasses')
|
||||
raise NotImplementedError(u'This method must be implemented by subclasses')
|
||||
|
||||
def _hook_progress(self, status):
|
||||
for ph in self._progress_hooks:
|
||||
|
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[download] Downloading f4m manifest')
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
self.report_destination(filename)
|
||||
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
|
||||
|
||||
doc = etree.fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
if requested_bitrate is None:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
else:
|
||||
rate, media = list(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||
|
||||
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
|
||||
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
|
||||
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from .abc import ABCIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .adultswim import AdultSwimIE
|
||||
@@ -76,6 +77,10 @@ from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .ellentv import (
|
||||
EllenTVIE,
|
||||
EllenTVClipsIE,
|
||||
)
|
||||
from .elpais import ElPaisIE
|
||||
from .empflix import EmpflixIE
|
||||
from .engadget import EngadgetIE
|
||||
@@ -111,9 +116,11 @@ from .funnyordie import FunnyOrDieIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import GameOneIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .godtube import GodTubeIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
@@ -123,6 +130,7 @@ from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
@@ -140,8 +148,10 @@ from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .izlesene import IzleseneIE
|
||||
from .jadorecettepub import JadoreCettePubIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
@@ -151,6 +161,7 @@ from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .la7 import LA7IE
|
||||
from .lifenews import LifeNewsIE
|
||||
@@ -172,10 +183,12 @@ from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mlb import MLBIE
|
||||
from .mpora import MporaIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
@@ -219,9 +232,12 @@ from .nrk import (
|
||||
from .ntv import NTVIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nuvid import NuvidIE
|
||||
from .oe1 import OE1IE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFOE1IE,
|
||||
ORFFM4IE,
|
||||
)
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
@@ -242,6 +258,7 @@ from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rtlnl import RtlXlIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE
|
||||
@@ -258,6 +275,7 @@ from .savefrom import SaveFromIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .shared import SharedIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
@@ -320,6 +338,8 @@ from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
@@ -341,6 +361,7 @@ from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
from .vidme import VidmeIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoChannelIE,
|
||||
@@ -374,6 +395,7 @@ from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wrzuta import WrzutaIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
|
48
youtube_dl/extractor/abc.py
Normal file
48
youtube_dl/extractor/abc.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
|
||||
'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
|
||||
'info_dict': {
|
||||
'id': '5624716',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
|
||||
'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
urls_info_json = self._search_regex(
|
||||
r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
|
||||
flags=re.DOTALL)
|
||||
urls_info = json.loads(urls_info_json.replace('\'', '"'))
|
||||
formats = [{
|
||||
'url': url_info['url'],
|
||||
'width': int(url_info['width']),
|
||||
'height': int(url_info['height']),
|
||||
'tbr': int(url_info['bitrate']),
|
||||
'filesize': int(url_info['filesize']),
|
||||
} for url_info in urls_info]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -6,6 +6,7 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': format['type'],
|
||||
'width': format['width'],
|
||||
'height': int(format['height']),
|
||||
'width': int_or_none(format['width']),
|
||||
'height': int_or_none(format['height']),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@@ -8,6 +8,8 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
qualities,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
@@ -44,8 +46,14 @@ class ARDIE(InfoExtractor):
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms.title" content="(.*?)"/>',
|
||||
|
@@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||
# Some formats may not be in the same language as the url
|
||||
# TODO: Might want not to drop videos that does not match requested language
|
||||
# but to process those formats with lower precedence
|
||||
formats = filter(_match_lang, all_formats)
|
||||
formats = list(formats) # in python3 filter returns an iterator
|
||||
formats = list(formats) # in python3 filter returns an iterator
|
||||
if not formats:
|
||||
# Some videos are only available in the 'Originalversion'
|
||||
# they aren't tagged as being in French or German
|
||||
if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
|
||||
formats = all_formats
|
||||
else:
|
||||
raise ExtractorError(u'The formats list is empty')
|
||||
# Sometimes there are neither videos of requested lang code
|
||||
# nor original version videos available
|
||||
# For such cases we just take all_formats as is
|
||||
formats = all_formats
|
||||
if not formats:
|
||||
raise ExtractorError('The formats list is empty')
|
||||
|
||||
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
||||
def sort_key(f):
|
||||
|
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
|
||||
'height': int(m['h']),
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = m['d']
|
||||
duration = float(m['d'])
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
|
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
'md5': '7bf08858ff7c203c870e8a6190e221e5',
|
||||
# The md5 checksum changes
|
||||
'info_dict': {
|
||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||
'ext': 'flv',
|
||||
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
|
||||
return {
|
||||
'id': name.split('-')[-1],
|
||||
'title': title,
|
||||
'url': f4m_url,
|
||||
'ext': 'flv',
|
||||
'formats': self._extract_f4m_formats(f4m_url, name),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Am 1. und 2. August in Oberammergau',
|
||||
'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
|
||||
'title': 'Wenn das Traditions-Theater wackelt',
|
||||
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
||||
'duration': 34,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Über den Pass',
|
||||
'description': 'Die Eroberung der Alpen: Über den Pass',
|
||||
'duration': 2588,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'aac',
|
||||
'title': '"Keine neuen Schulden im nächsten Jahr"',
|
||||
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
|
||||
'duration': 64,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Umweltbewusster Häuslebauer',
|
||||
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
|
||||
'duration': 116,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 1 - Metaphysik',
|
||||
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
|
||||
'duration': 893,
|
||||
'uploader': 'Eva Maria Steimle',
|
||||
'upload_date': '20140117',
|
||||
}
|
||||
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
|
||||
media = {
|
||||
'id': xml_media.get('externalId'),
|
||||
'title': xml_media.find('title').text,
|
||||
'duration': parse_duration(xml_media.find('duration').text),
|
||||
'formats': self._extract_formats(xml_media.find('assets')),
|
||||
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
|
||||
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
|
||||
|
@@ -18,6 +18,7 @@ from ..utils import (
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
@@ -373,7 +374,8 @@ class InfoExtractor(object):
|
||||
else:
|
||||
for p in pattern:
|
||||
mobj = re.search(p, string, flags)
|
||||
if mobj: break
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if os.name != 'nt' and sys.stderr.isatty():
|
||||
_name = u'\033[0;34m%s\033[0m' % name
|
||||
@@ -589,6 +591,24 @@ class InfoExtractor(object):
|
||||
self.to_screen(msg)
|
||||
time.sleep(timeout)
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest')
|
||||
|
||||
formats = []
|
||||
for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
|
||||
formats.append({
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'tbr': int_or_none(media_el.attrib.get('bitrate')),
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
|
||||
video_id)
|
||||
video_info = player_info.find('video')
|
||||
|
||||
f4m_info = self._download_xml(video_info.find('url').text, video_id)
|
||||
f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
|
||||
token_el = f4m_info.find('token')
|
||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||
|
||||
|
79
youtube_dl/extractor/ellentv.py
Normal file
79
youtube_dl/extractor/ellentv.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
||||
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
||||
'info_dict': {
|
||||
'id': '0-7jqrsr18',
|
||||
'ext': 'mp4',
|
||||
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
||||
'timestamp': 1406876400,
|
||||
'upload_date': '20140801',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||
webpage, 'timestamp'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': self._html_search_meta('VideoURL', webpage, 'url'),
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
|
||||
class EllenTVClipsIE(InfoExtractor):
|
||||
IE_NAME = 'EllenTV:clips'
|
||||
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/',
|
||||
'info_dict': {
|
||||
'id': 'meryl-streep-vanessa-hudgens',
|
||||
'title': 'Meryl Streep, Vanessa Hudgens',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
playlist = self._extract_playlist(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': self._extract_entries(playlist)
|
||||
}
|
||||
|
||||
def _extract_playlist(self, webpage):
|
||||
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
|
||||
try:
|
||||
return json.loads("[{" + json_string + "}]")
|
||||
except ValueError as ve:
|
||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
||||
|
||||
def _extract_entries(self, playlist):
|
||||
return [self.url_result(item['url'], 'EllenTV') for item in playlist]
|
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:\w+\.)?facebook\.com/
|
||||
(?:[^#?]*\#!/)?
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=(?P<id>[0-9]+)
|
||||
(?:.*)'''
|
||||
|
@@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor):
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
|
||||
@@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor):
|
||||
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
|
||||
webpage, 'extension', fatal=False)
|
||||
video_url = self._search_regex(
|
||||
r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
|
||||
r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||
video_url = video_url.replace('/z/', '/i/')
|
||||
manifest_url = manifest_url.replace('/z/', '/i/')
|
||||
|
||||
if manifest_url.startswith('rtmp'):
|
||||
formats = [{'url': manifest_url, 'ext': 'flv'}]
|
||||
else:
|
||||
formats = []
|
||||
available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
|
||||
for index, format_descr in enumerate(available_formats.split(',')):
|
||||
format_info = {
|
||||
'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
|
||||
if m_resolution is not None:
|
||||
format_info.update({
|
||||
'width': int(m_resolution.group('width')),
|
||||
'height': int(m_resolution.group('height')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
||||
thumbnail_path = info.find('image').text
|
||||
|
||||
return {'id': video_id,
|
||||
'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
|
||||
'url': video_url,
|
||||
'title': info.find('titre').text,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('titre').text,
|
||||
'formats': formats,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
|
||||
|
||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
74
youtube_dl/extractor/gamestar.py
Normal file
74
youtube_dl/extractor/gamestar.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
'info_dict': {
|
||||
'id': '76110',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
|
||||
'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
|
||||
'upload_date': '20140728',
|
||||
'duration': 17
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
og_title = self._og_search_title(webpage)
|
||||
title = og_title.replace(' - Video bei GameStar.de', '').strip()
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
thumbnail = self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+) ',
|
||||
webpage, 'upload_date', fatal=False))
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r' Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r' Zuschauer: ([0-9\.]+) ', webpage,
|
||||
'view_count', fatal=False))
|
||||
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
|
||||
fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count
|
||||
}
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_TESTS = [
|
||||
@@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor):
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
|
||||
'md5': 'a5eb77996ef82118afbbe8e48731b98e',
|
||||
'info_dict': {
|
||||
'id': '1015301',
|
||||
'ext': 'flv',
|
||||
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _parse_mp4(self, xml_description):
|
||||
@@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor):
|
||||
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||
start_page = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
|
||||
direct_url = self._search_regex(
|
||||
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
|
||||
start_page, 'url', default=None)
|
||||
if direct_url:
|
||||
video_url = 'http://www.gdcvault.com/' + direct_url
|
||||
title = self._html_search_regex(
|
||||
r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
|
||||
start_page, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
}
|
||||
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root', default=None)
|
||||
if xml_root is None:
|
||||
# Probably need to authenticate
|
||||
start_page = self._login(webpage_url, video_id)
|
||||
if start_page is None:
|
||||
login_res = self._login(webpage_url, video_id)
|
||||
if login_res is None:
|
||||
self.report_warning('Could not login.')
|
||||
else:
|
||||
start_page = login_res
|
||||
# Grab the url from the authenticated page
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
|
||||
xml_root = self._html_search_regex(
|
||||
r'<iframe src="(.*?)player.html.*?".*?</iframe>',
|
||||
start_page, 'xml root')
|
||||
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
|
||||
xml_name = self._html_search_regex(
|
||||
r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
|
||||
start_page, 'xml filename', default=None)
|
||||
if xml_name is None:
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
@@ -383,13 +383,13 @@ class GenericIE(InfoExtractor):
|
||||
if not parsed_url.scheme:
|
||||
default_search = self._downloader.params.get('default_search')
|
||||
if default_search is None:
|
||||
default_search = 'error'
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning'):
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if '/' in url:
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
else:
|
||||
elif default_search != 'fixup_error':
|
||||
if default_search == 'auto_warning':
|
||||
if re.match(r'^(?:url|URL)$', url):
|
||||
raise ExtractorError(
|
||||
@@ -399,7 +399,8 @@ class GenericIE(InfoExtractor):
|
||||
self._downloader.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
|
||||
return self.url_result('ytsearch:' + url)
|
||||
elif default_search == 'error':
|
||||
|
||||
if default_search in ('error', 'fixup_error'):
|
||||
raise ExtractorError(
|
||||
('%r is not a valid URL. '
|
||||
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
|
||||
@@ -705,6 +706,13 @@ class GenericIE(InfoExtractor):
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='MTVServicesEmbedded')
|
||||
|
||||
# Look for embedded yahoo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Yahoo')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
|
58
youtube_dl/extractor/godtube.py
Normal file
58
youtube_dl/extractor/godtube.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class GodTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
|
||||
'md5': '77108c1e4ab58f48031101a1a2119789',
|
||||
'info_dict': {
|
||||
'id': '0C0CNNNU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Woman at the well.',
|
||||
'duration': 159,
|
||||
'timestamp': 1205712000,
|
||||
'uploader': 'beverlybmusic',
|
||||
'upload_date': '20080317',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
config = self._download_xml(
|
||||
'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
|
||||
video_id, 'Downloading player config XML')
|
||||
|
||||
video_url = config.find('.//file').text
|
||||
uploader = config.find('.//author').text
|
||||
timestamp = parse_iso8601(config.find('.//date').text)
|
||||
duration = parse_duration(config.find('.//duration').text)
|
||||
thumbnail = config.find('.//image').text
|
||||
|
||||
media = self._download_xml(
|
||||
'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
|
||||
|
||||
title = media.find('.//title').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
}
|
134
youtube_dl/extractor/howstuffworks.py
Normal file
134
youtube_dl/extractor/howstuffworks.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import random
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr
|
||||
|
||||
|
||||
class HowStuffWorksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
||||
'info_dict': {
|
||||
'id': '450221',
|
||||
'display_id': 'cool-jobs-iditarod-musher',
|
||||
'ext': 'flv',
|
||||
'title': 'Cool Jobs - Iditarod Musher',
|
||||
'description': 'md5:82bb58438a88027b8186a1fccb365f90',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# md5 is not consistent
|
||||
'skip_download': True
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
|
||||
'info_dict': {
|
||||
'id': '553470',
|
||||
'display_id': 'deadliest-catch-jakes-farewell-pots',
|
||||
'ext': 'mp4',
|
||||
'title': 'Deadliest Catch: Jake\'s Farewell Pots',
|
||||
'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# md5 is not consistent
|
||||
'skip_download': True
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
|
||||
'info_dict': {
|
||||
'id': '440011',
|
||||
'display_id': 'sword-swallowing-1-by-dan-meyer',
|
||||
'ext': 'flv',
|
||||
'title': 'Sword Swallowing #1 by Dan Meyer',
|
||||
'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# md5 is not consistent
|
||||
'skip_download': True
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
|
||||
|
||||
mp4 = self._search_regex(
|
||||
r'''(?xs)var\s+clip\s*=\s*{\s*
|
||||
.+?\s*
|
||||
content_id\s*:\s*%s\s*,\s*
|
||||
.+?\s*
|
||||
mp4\s*:\s*\[(.*?),?\]\s*
|
||||
};\s*
|
||||
videoData\.push\(clip\);''' % content_id,
|
||||
webpage, 'mp4', fatal=False, default=None)
|
||||
|
||||
smil = self._download_xml(
|
||||
'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
|
||||
content_id, 'Downloading video SMIL')
|
||||
|
||||
http_base = find_xpath_attr(
|
||||
smil,
|
||||
'./{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
|
||||
'name',
|
||||
'httpBase').get('content')
|
||||
|
||||
def random_string(str_len=0):
|
||||
return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
|
||||
|
||||
URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
|
||||
|
||||
formats = []
|
||||
|
||||
if mp4:
|
||||
for video in json.loads('[%s]' % mp4):
|
||||
bitrate = video['bitrate']
|
||||
fmt = {
|
||||
'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
|
||||
'format_id': bitrate,
|
||||
}
|
||||
m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
|
||||
if m:
|
||||
fmt['vbr'] = int(m.group('vbr'))
|
||||
formats.append(fmt)
|
||||
else:
|
||||
for video in smil.findall(
|
||||
'.//{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
|
||||
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||
formats.append({
|
||||
'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
|
||||
'format_id': '%dk' % vbr,
|
||||
'vbr': vbr,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' : HowStuffWorks'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
97
youtube_dl/extractor/izlesene.py
Normal file
97
youtube_dl/extractor/izlesene.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
parse_iso8601,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class IzleseneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
|
||||
_STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
|
||||
'md5': '4384f9f0ea65086734b881085ee05ac2',
|
||||
'info_dict': {
|
||||
'id': '7599694',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
|
||||
'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
|
||||
'thumbnail': 're:^http://.*\.jpg',
|
||||
'uploader_id': 'pelikzzle',
|
||||
'timestamp': 1404298698,
|
||||
'upload_date': '20140702',
|
||||
'duration': 95.395,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = 'http://www.izlesene.com/video/%s' % video_id
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'uploadDate', webpage, 'upload date', fatal=False))
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
|
||||
if duration:
|
||||
duration /= 1000.0
|
||||
|
||||
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
|
||||
comment_count = self._html_search_regex(
|
||||
r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
|
||||
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, 'age limit', fatal=False)
|
||||
|
||||
content_url = self._html_search_meta(
|
||||
'contentURL', webpage, 'content URL', fatal=False)
|
||||
ext = determine_ext(content_url, 'mp4')
|
||||
|
||||
# Might be empty for some videos.
|
||||
qualities = self._html_search_regex(
|
||||
r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
|
||||
|
||||
formats = []
|
||||
for quality in qualities.split('|'):
|
||||
json = self._download_json(
|
||||
self._STREAM_URL.format(id=video_id, format=quality), video_id,
|
||||
note='Getting video URL for "%s" quality' % quality,
|
||||
errnote='Failed to get video URL for "%s" quality' % quality
|
||||
)
|
||||
formats.append({
|
||||
'url': json.get('streamurl'),
|
||||
'ext': ext,
|
||||
'format_id': '%sp' % quality if quality else 'sd',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader_id': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'age_limit': 18 if family_friendly == 'False' else 0,
|
||||
'formats': formats,
|
||||
}
|
80
youtube_dl/extractor/jove.py
Normal file
80
youtube_dl/extractor/jove.py
Normal file
@@ -0,0 +1,80 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate
|
||||
)
|
||||
|
||||
|
||||
class JoveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
|
||||
_CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
|
||||
'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
|
||||
'info_dict': {
|
||||
'id': '2744',
|
||||
'ext': 'mp4',
|
||||
'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
|
||||
'description': 'md5:015dd4509649c0908bc27f049e0262c6',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'upload_date': '20110523',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
|
||||
'md5': '914aeb356f416811d911996434811beb',
|
||||
'info_dict': {
|
||||
'id': '51796',
|
||||
'ext': 'mp4',
|
||||
'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
|
||||
'description': 'md5:35ff029261900583970c4023b70f1dc9',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'upload_date': '20140802',
|
||||
}
|
||||
},
|
||||
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
chapters_id = self._html_search_regex(
|
||||
r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
|
||||
|
||||
chapters_xml = self._download_xml(
|
||||
self._CHAPTERS_URL.format(video_id=chapters_id),
|
||||
video_id, note='Downloading chapters XML',
|
||||
errnote='Failed to download chapters XML')
|
||||
|
||||
video_url = chapters_xml.attrib.get('video')
|
||||
if not video_url:
|
||||
raise ExtractorError('Failed to get the video URL')
|
||||
|
||||
title = self._html_search_meta('citation_title', webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
publish_date = unified_strdate(self._html_search_meta(
|
||||
'citation_publication_date', webpage, 'publish date', fatal=False))
|
||||
comment_count = self._html_search_regex(
|
||||
r'<meta name="num_comments" content="(\d+) Comments?"',
|
||||
webpage, 'comment count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': publish_date,
|
||||
'comment_count': comment_count,
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -43,10 +44,11 @@ class JustinTVIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# Return count of items, list of *valid* items
|
||||
def _parse_page(self, url, video_id):
|
||||
info_json = self._download_webpage(url, video_id,
|
||||
'Downloading video info JSON',
|
||||
'unable to download video info JSON')
|
||||
def _parse_page(self, url, video_id, counter):
|
||||
info_json = self._download_webpage(
|
||||
url, video_id,
|
||||
'Downloading video info JSON on page %d' % counter,
|
||||
'Unable to download video info JSON %d' % counter)
|
||||
|
||||
response = json.loads(info_json)
|
||||
if type(response) != list:
|
||||
@@ -138,11 +140,10 @@ class JustinTVIE(InfoExtractor):
|
||||
entries = []
|
||||
offset = 0
|
||||
limit = self._JUSTIN_PAGE_LIMIT
|
||||
while True:
|
||||
if paged:
|
||||
self.report_download_page(video_id, offset)
|
||||
for counter in itertools.count(1):
|
||||
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
|
||||
page_count, page_info = self._parse_page(page_url, video_id)
|
||||
page_count, page_info = self._parse_page(
|
||||
page_url, video_id, counter)
|
||||
entries.extend(page_info)
|
||||
if not paged or page_count != limit:
|
||||
break
|
||||
|
59
youtube_dl/extractor/krasview.py
Normal file
59
youtube_dl/extractor/krasview.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class KrasViewIE(InfoExtractor):
|
||||
IE_DESC = 'Красвью'
|
||||
_VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://krasview.ru/video/512228',
|
||||
'md5': '3b91003cf85fc5db277870c8ebd98eae',
|
||||
'info_dict': {
|
||||
'id': '512228',
|
||||
'ext': 'mp4',
|
||||
'title': 'Снег, лёд, заносы',
|
||||
'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
|
||||
'duration': 27,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
flashvars = json.loads(self._search_regex(
|
||||
r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
|
||||
|
||||
video_url = flashvars['url']
|
||||
title = unescapeHTML(flashvars['title'])
|
||||
description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
|
||||
thumbnail = flashvars['image']
|
||||
duration = int(flashvars['duration'])
|
||||
filesize = int(flashvars['size'])
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
@@ -5,11 +5,14 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
xpath_with_ns,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,20 +27,82 @@ class LivestreamIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Live from Webster Hall NYC',
|
||||
'upload_date': '20121012',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^http://.*\.jpg$'
|
||||
}
|
||||
}
|
||||
|
||||
def _parse_smil(self, video_id, smil_url):
|
||||
formats = []
|
||||
_SWITCH_XPATH = (
|
||||
'.//{http://www.w3.org/2001/SMIL20/Language}body/'
|
||||
'{http://www.w3.org/2001/SMIL20/Language}switch')
|
||||
smil_doc = self._download_xml(
|
||||
smil_url, video_id,
|
||||
note='Downloading SMIL information',
|
||||
errnote='Unable to download SMIL information',
|
||||
fatal=False)
|
||||
if smil_doc is False: # Download failed
|
||||
return formats
|
||||
title_node = find_xpath_attr(
|
||||
smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
|
||||
'name', 'title')
|
||||
if title_node is None:
|
||||
self.report_warning('Cannot find SMIL id')
|
||||
switch_node = smil_doc.find(_SWITCH_XPATH)
|
||||
else:
|
||||
title_id = title_node.attrib['content']
|
||||
switch_node = find_xpath_attr(
|
||||
smil_doc, _SWITCH_XPATH, 'id', title_id)
|
||||
if switch_node is None:
|
||||
raise ExtractorError('Cannot find switch node')
|
||||
video_nodes = switch_node.findall(
|
||||
'{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
|
||||
for vn in video_nodes:
|
||||
tbr = int_or_none(vn.attrib.get('system-bitrate'))
|
||||
furl = (
|
||||
'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' %
|
||||
(vn.attrib['src']))
|
||||
if 'clipBegin' in vn.attrib:
|
||||
furl += '&ssek=' + vn.attrib['clipBegin']
|
||||
formats.append({
|
||||
'url': furl,
|
||||
'format_id': 'smil_%d' % tbr,
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
'preference': -1000,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_video_info(self, video_data):
|
||||
video_url = (
|
||||
video_data.get('progressive_url_hd') or
|
||||
video_data.get('progressive_url')
|
||||
video_id = compat_str(video_data['id'])
|
||||
|
||||
FORMAT_KEYS = (
|
||||
('sd', 'progressive_url'),
|
||||
('hd', 'progressive_url_hd'),
|
||||
)
|
||||
formats = [{
|
||||
'format_id': format_id,
|
||||
'url': video_data[key],
|
||||
'quality': i + 1,
|
||||
} for i, (format_id, key) in enumerate(FORMAT_KEYS)
|
||||
if video_data.get(key)]
|
||||
|
||||
smil_url = video_data.get('smil_url')
|
||||
if smil_url:
|
||||
formats.extend(self._parse_smil(video_id, smil_url))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(video_data['id']),
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data['caption'],
|
||||
'thumbnail': video_data['thumbnail_url'],
|
||||
'thumbnail': video_data.get('thumbnail_url'),
|
||||
'upload_date': video_data['updated_at'].replace('-', '')[:8],
|
||||
'like_count': video_data.get('likes', {}).get('total'),
|
||||
'view_count': video_data.get('views'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -46,17 +111,28 @@ class LivestreamIE(InfoExtractor):
|
||||
event_name = mobj.group('event_name')
|
||||
webpage = self._download_webpage(url, video_id or event_name)
|
||||
|
||||
if video_id is None:
|
||||
# This is an event page:
|
||||
og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
|
||||
if og_video is None:
|
||||
config_json = self._search_regex(
|
||||
r'window.config = ({.*?});', webpage, 'window config')
|
||||
info = json.loads(config_json)['event']
|
||||
|
||||
def is_relevant(vdata, vid):
|
||||
result = vdata['type'] == 'video'
|
||||
if video_id is not None:
|
||||
result = result and compat_str(vdata['data']['id']) == vid
|
||||
return result
|
||||
|
||||
videos = [self._extract_video_info(video_data['data'])
|
||||
for video_data in info['feed']['data']
|
||||
if video_data['type'] == 'video']
|
||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||
for video_data in info['feed']['data']
|
||||
if is_relevant(video_data, video_id)]
|
||||
if video_id is None:
|
||||
# This is an event page:
|
||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||
else:
|
||||
if videos:
|
||||
return videos[0]
|
||||
else:
|
||||
og_video = self._og_search_video_url(webpage, 'player url')
|
||||
query_str = compat_urllib_parse_urlparse(og_video).query
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
api_url = query['play_url'][0].replace('.smil', '')
|
||||
|
60
youtube_dl/extractor/mitele.py
Normal file
60
youtube_dl/extractor/mitele.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
get_element_by_attribute,
|
||||
parse_duration,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
IE_NAME = 'mitele.es'
|
||||
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
|
||||
'info_dict': {
|
||||
'id': '0fce117d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Programa 144 - Tor, la web invisible',
|
||||
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||
'display_id': 'programa-144',
|
||||
'duration': 2913,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = mobj.group('episode')
|
||||
webpage = self._download_webpage(url, episode)
|
||||
embed_data_json = self._search_regex(
|
||||
r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
||||
flags=re.DOTALL
|
||||
).replace('\'', '"')
|
||||
embed_data = json.loads(embed_data_json)
|
||||
|
||||
info_url = embed_data['flashvars']['host']
|
||||
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||
|
||||
video_link = info_el.find('videoUrl/link').text
|
||||
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
||||
token_info = self._download_json(
|
||||
'http://token.mitele.es/?' + token_query, episode,
|
||||
transform_source=strip_jsonp
|
||||
)
|
||||
|
||||
return {
|
||||
'id': embed_data['videoId'],
|
||||
'display_id': episode,
|
||||
'title': info_el.find('title').text,
|
||||
'url': token_info['tokenizedUrl'],
|
||||
'description': get_element_by_attribute('class', 'text', webpage),
|
||||
'thumbnail': info_el.find('thumb').text,
|
||||
'duration': parse_duration(info_el.find('duration').text),
|
||||
}
|
58
youtube_dl/extractor/mojvideo.py
Normal file
58
youtube_dl/extractor/mojvideo.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MojvideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
|
||||
'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
|
||||
'info_dict': {
|
||||
'id': '3d1ed4497707730b2906',
|
||||
'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
|
||||
'ext': 'mp4',
|
||||
'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
'duration': 242,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
# XML is malformed
|
||||
playerapi = self._download_webpage(
|
||||
'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
|
||||
|
||||
if '<error>true</error>' in playerapi:
|
||||
error_desc = self._html_search_regex(
|
||||
r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', playerapi, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'<file>([^<]+)</file>', playerapi, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
}
|
@@ -1,3 +1,4 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -8,19 +9,34 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NownessIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||
'info_dict': {
|
||||
'id': '2520295746001',
|
||||
'ext': 'mp4',
|
||||
'description': 'Candor: The Art of Gesticulation',
|
||||
'uploader': 'Nowness',
|
||||
'title': 'Candor: The Art of Gesticulation',
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||
'info_dict': {
|
||||
'id': '2520295746001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Candor: The Art of Gesticulation',
|
||||
'description': 'Candor: The Art of Gesticulation',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
|
||||
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
|
||||
'info_dict': {
|
||||
'id': '3716354522001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'Nowness',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -1,40 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
# audios on oe1.orf.at are only available for 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
|
||||
class OE1IE(InfoExtractor):
|
||||
IE_DESC = 'oe1.orf.at'
|
||||
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
||||
show_id
|
||||
)
|
||||
|
||||
timestamp = datetime.datetime.strptime('%s %s' % (
|
||||
data['item']['day_label'],
|
||||
data['item']['time']
|
||||
), '%d.%m.%Y %H:%M')
|
||||
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
||||
|
||||
return {
|
||||
'id': show_id,
|
||||
'title': data['item']['title'],
|
||||
'url': data['item']['url_stream'],
|
||||
'ext': 'mp3',
|
||||
'description': data['item'].get('info'),
|
||||
'timestamp': unix_timestamp
|
||||
}
|
@@ -3,23 +3,38 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unescapeHTML
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class OoyalaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||
'info_dict': {
|
||||
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
|
||||
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
_TESTS = [
|
||||
{
|
||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||
'info_dict': {
|
||||
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
|
||||
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||
},
|
||||
}, {
|
||||
# Only available for ipad
|
||||
'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
|
||||
'md5': '4b9754921fddb68106e48c142e2a01e6',
|
||||
'info_dict': {
|
||||
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Simulation Overview - Levels of Simulation',
|
||||
'description': '',
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _url_for_embed_code(embed_code):
|
||||
@@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor):
|
||||
player = self._download_webpage(player_url, embedCode)
|
||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||
player, 'mobile player url')
|
||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
||||
videos_info = self._search_regex(
|
||||
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
||||
mobile_player, 'info').replace('\\"','"')
|
||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
|
||||
# Looks like some videos are only available for particular devices
|
||||
# (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
|
||||
# is only available for ipad)
|
||||
# Working around with fetching URLs for all the devices found starting with 'unknown'
|
||||
# until we succeed or eventually fail for each device.
|
||||
devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
|
||||
devices.remove('unknown')
|
||||
devices.insert(0, 'unknown')
|
||||
for device in devices:
|
||||
mobile_player = self._download_webpage(
|
||||
'%s&device=%s' % (mobile_url, device), embedCode,
|
||||
'Downloading mobile player JS for %s device' % device)
|
||||
videos_info = self._search_regex(
|
||||
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
||||
mobile_player, 'info', fatal=False, default=None)
|
||||
if videos_info:
|
||||
break
|
||||
if not videos_info:
|
||||
raise ExtractorError('Unable to extract info')
|
||||
videos_info = videos_info.replace('\\"', '"')
|
||||
videos_more_info = self._search_regex(
|
||||
r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
|
||||
videos_info = json.loads(videos_info)
|
||||
videos_more_info =json.loads(videos_more_info)
|
||||
videos_more_info = json.loads(videos_more_info)
|
||||
|
||||
if videos_more_info.get('lineup'):
|
||||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
||||
|
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import calendar
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -12,7 +14,9 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ORFIE(InfoExtractor):
|
||||
class ORFTVthekIE(InfoExtractor):
|
||||
IE_NAME = 'orf:tvthek'
|
||||
IE_DESC = 'ORF TVthek'
|
||||
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
@@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
'id': playlist_id,
|
||||
}
|
||||
|
||||
|
||||
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
||||
|
||||
|
||||
class ORFOE1IE(InfoExtractor):
|
||||
IE_NAME = 'orf:oe1'
|
||||
IE_DESC = 'Radio Österreich 1'
|
||||
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
||||
show_id
|
||||
)
|
||||
|
||||
timestamp = datetime.datetime.strptime('%s %s' % (
|
||||
data['item']['day_label'],
|
||||
data['item']['time']
|
||||
), '%d.%m.%Y %H:%M')
|
||||
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
||||
|
||||
return {
|
||||
'id': show_id,
|
||||
'title': data['item']['title'],
|
||||
'url': data['item']['url_stream'],
|
||||
'ext': 'mp3',
|
||||
'description': data['item'].get('info'),
|
||||
'timestamp': unix_timestamp
|
||||
}
|
||||
|
||||
|
||||
class ORFFM4IE(InfoExtractor):
|
||||
IE_DESC = 'orf:fm4'
|
||||
IE_DESC = 'radio FM4'
|
||||
_VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_date = mobj.group('date')
|
||||
show_id = mobj.group('show')
|
||||
|
||||
data = self._download_json(
|
||||
'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
|
||||
show_id
|
||||
)
|
||||
|
||||
def extract_entry_dict(info, title, subtitle):
|
||||
return {
|
||||
'id': info['loopStreamId'].replace('.mp3', ''),
|
||||
'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
|
||||
'title': title,
|
||||
'description': subtitle,
|
||||
'duration': (info['end'] - info['start']) / 1000,
|
||||
'timestamp': info['start'] / 1000,
|
||||
'ext': 'mp3'
|
||||
}
|
||||
|
||||
entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_id,
|
||||
'title': data['title'],
|
||||
'description': data['subtitle'],
|
||||
'entries': entries
|
||||
}
|
@@ -20,25 +20,60 @@ class PBSIE(InfoExtractor):
|
||||
)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'duration': 3190,
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||
'info_dict': {
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'duration': 3190,
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
|
||||
'md5': '143c98aa54a346738a3d78f54c925321',
|
||||
'info_dict': {
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'Losing Iraq',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'duration': 5050,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
|
||||
'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
|
||||
'info_dict': {
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
||||
'duration': 801,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
def _extract_ids(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
display_id = presumptive_id
|
||||
if presumptive_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
MEDIA_ID_REGEXES = [
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
r'class="coveplayerid">([^<]+)<', # coveplayer
|
||||
]
|
||||
|
||||
media_id = self._search_regex(
|
||||
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
|
||||
if media_id:
|
||||
return media_id, presumptive_id
|
||||
|
||||
url = self._search_regex(
|
||||
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
webpage, 'player URL')
|
||||
@@ -57,6 +92,11 @@ class PBSIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._extract_ids(url)
|
||||
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info = self._download_json(info_url, display_id)
|
||||
|
||||
|
@@ -1,23 +1,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import strip_jsonp
|
||||
from ..utils import str_or_none
|
||||
|
||||
|
||||
class ReverbNationIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||
'file': '16965047.mp3',
|
||||
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
|
||||
'info_dict': {
|
||||
"id": "16965047",
|
||||
"ext": "mp3",
|
||||
"title": "MONA LISA",
|
||||
"uploader": "ALKILADOS",
|
||||
"uploader_id": 216429,
|
||||
"thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
|
||||
"uploader_id": "216429",
|
||||
"thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
|
||||
song_id = mobj.group('id')
|
||||
|
||||
api_res = self._download_json(
|
||||
'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
|
||||
% (song_id, int(time.time() * 1000)),
|
||||
'https://api.reverbnation.com/song/%s' % song_id,
|
||||
song_id,
|
||||
transform_source=strip_jsonp,
|
||||
note='Downloading information of song %s' % song_id
|
||||
)
|
||||
|
||||
@@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
|
||||
'title': api_res.get('name'),
|
||||
'url': api_res.get('url'),
|
||||
'uploader': api_res.get('artist', {}).get('name'),
|
||||
'uploader_id': api_res.get('artist', {}).get('id'),
|
||||
'thumbnail': api_res.get('image', api_res.get('thumbnail')),
|
||||
'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
|
||||
'thumbnail': self._proto_relative_url(
|
||||
api_res.get('image', api_res.get('thumbnail'))),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
52
youtube_dl/extractor/rtlnl.py
Normal file
52
youtube_dl/extractor/rtlnl.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RtlXlIE(InfoExtractor):
|
||||
IE_NAME = 'rtlxl.nl'
|
||||
_VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'info_dict': {
|
||||
'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'ext': 'flv',
|
||||
'title': 'RTL Nieuws - Laat',
|
||||
'description': 'Dagelijks het laatste nieuws uit binnen- en '
|
||||
'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
|
||||
'onze mobiele apps.',
|
||||
'timestamp': 1408051800,
|
||||
'upload_date': '20140814',
|
||||
},
|
||||
'params': {
|
||||
# We download the first bytes of the first fragment, it can't be
|
||||
# processed by the f4m downloader beacuse it isn't complete
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uuid = mobj.group('uuid')
|
||||
|
||||
info = self._download_json(
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||
uuid)
|
||||
meta = info['meta']
|
||||
material = info['material'][0]
|
||||
episode_info = info['episodes'][0]
|
||||
|
||||
f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
|
||||
progname = info['abstracts'][0]['name']
|
||||
subtitle = material['title'] or info['episodes'][0]['name']
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'title': '%s - %s' % (progname, subtitle),
|
||||
'formats': self._extract_f4m_formats(f4m_url, uuid),
|
||||
'timestamp': material['original_date'],
|
||||
'description': episode_info['synopsis'],
|
||||
}
|
57
youtube_dl/extractor/shared.py
Normal file
57
youtube_dl/extractor/shared.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SharedIE(InfoExtractor):
|
||||
_VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://shared.sx/0060718775',
|
||||
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
|
||||
'info_dict': {
|
||||
'id': '0060718775',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bmp4',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'>File does not exist<', page) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
|
||||
|
||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||
|
||||
video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
|
||||
title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
|
||||
filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
|
||||
thumbnail = self._html_search_regex(
|
||||
r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'filesize': filesize,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -1,4 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
@@ -10,18 +12,18 @@ from ..utils import (
|
||||
|
||||
|
||||
class StreamcloudIE(InfoExtractor):
|
||||
IE_NAME = u'streamcloud.eu'
|
||||
IE_NAME = 'streamcloud.eu'
|
||||
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
u'file': u'skp9j99s4bpz.mp4',
|
||||
u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
|
||||
u'info_dict': {
|
||||
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||
u'duration': 9,
|
||||
'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
|
||||
'md5': '6bea4c7fa5daaacc2a946b7146286686',
|
||||
'info_dict': {
|
||||
'id': 'skp9j99s4bpz',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'/\\ ä ↭',
|
||||
},
|
||||
u'skip': u'Only available from the EU'
|
||||
'skip': 'Only available from the EU'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note=u'Downloading video page ...')
|
||||
req, video_id, note='Downloading video page ...')
|
||||
title = self._html_search_regex(
|
||||
r'<h1[^>]*>([^<]+)<', webpage, u'title')
|
||||
r'<h1[^>]*>([^<]+)<', webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'file:\s*"([^"]+)"', webpage, u'video URL')
|
||||
duration_str = self._search_regex(
|
||||
r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
|
||||
duration = None if duration_str is None else int(duration_str)
|
||||
r'file:\s*"([^"]+)"', webpage, 'video URL')
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import parse_duration
|
||||
|
||||
|
||||
class SWRMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
|
||||
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
|
||||
'uploader': 'SWR 2',
|
||||
'uploader_id': '284670',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
|
||||
'md5': '881531487d0633080a8cc88d31ef896f',
|
||||
'info_dict': {
|
||||
'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Familienspaß am Bodensee',
|
||||
'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 1784,
|
||||
'upload_date': '20140727',
|
||||
'uploader': 'SWR Fernsehen BW',
|
||||
'uploader_id': '281130',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -37,7 +37,7 @@ class TeamcocoIE(InfoExtractor):
|
||||
video_id = mobj.group("video_id")
|
||||
if not video_id:
|
||||
video_id = self._html_search_regex(
|
||||
r'<article class="video" data-id="(\d+?)"',
|
||||
r'data-node-id="(\d+?)"',
|
||||
webpage, 'video id')
|
||||
|
||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||
|
85
youtube_dl/extractor/tvplay.py
Normal file
85
youtube_dl/extractor/tvplay.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'info_dict': {
|
||||
'id': '418113',
|
||||
'ext': 'flv',
|
||||
'title': 'Kādi ir īri? - Viņas melo labāk',
|
||||
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
|
||||
'duration': 25,
|
||||
'timestamp': 1406097056,
|
||||
'upload_date': '20140723',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
if video['is_geo_blocked']:
|
||||
raise ExtractorError(
|
||||
'This content is not available in your country due to copyright reasons', expected=True)
|
||||
|
||||
streams = self._download_json(
|
||||
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
|
||||
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams['streams'].items():
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'preference': quality(format_id),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||
if not m:
|
||||
continue
|
||||
fmt.update({
|
||||
'ext': 'flv',
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
'url': video_url,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'timestamp': parse_iso8601(video['created_at']),
|
||||
'view_count': video['views']['total'],
|
||||
'age_limit': video.get('age_limit', 0),
|
||||
'formats': formats,
|
||||
}
|
56
youtube_dl/extractor/ubu.py
Normal file
56
youtube_dl/extractor/ubu.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class UbuIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://ubu.com/film/her_noise.html',
|
||||
'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
|
||||
'info_dict': {
|
||||
'id': 'her_noise',
|
||||
'ext': 'mp4',
|
||||
'title': 'Her Noise - The Making Of (2007)',
|
||||
'duration': 3600,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title')
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
|
||||
if duration:
|
||||
duration *= 60
|
||||
|
||||
formats = []
|
||||
|
||||
FORMAT_REGEXES = [
|
||||
['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
|
||||
['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
|
||||
]
|
||||
|
||||
for format_id, format_regex in FORMAT_REGEXES:
|
||||
m = re.search(format_regex, webpage)
|
||||
if m:
|
||||
formats.append({
|
||||
'url': m.group(1),
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
|
||||
self._downloader.report_warning(
|
||||
'Cannot download SMIL information, falling back to JSON ..')
|
||||
|
||||
self._sort_formats(formats)
|
||||
timestamp_ms = int(self._search_regex(
|
||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||
|
||||
|
68
youtube_dl/extractor/vidme.py
Normal file
68
youtube_dl/extractor/vidme.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class VidmeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'https://vid.me/QNB',
|
||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||
'info_dict': {
|
||||
'id': 'QNB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fishing for piranha - the easy way',
|
||||
'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
|
||||
'duration': 119.92,
|
||||
'timestamp': 1406313244,
|
||||
'upload_date': '20140725',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage, default='')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
|
||||
duration = float_or_none(self._html_search_regex(
|
||||
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
|
||||
webpage, 'like count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
@@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'videopassword': 'youtube-dl',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/channels/keypeele/75629013',
|
||||
'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
|
||||
'note': 'Video is freely available via original URL '
|
||||
'and protected with password when accessed via http://vimeo.com/75629013',
|
||||
'info_dict': {
|
||||
'id': '75629013',
|
||||
'ext': 'mp4',
|
||||
'title': 'Key & Peele: Terrorist Interrogation',
|
||||
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
|
||||
'uploader_id': 'atencio',
|
||||
'uploader': 'Peter Atencio',
|
||||
'duration': 187,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/76979871',
|
||||
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
||||
@@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
if mobj.group('pro') or mobj.group('player'):
|
||||
url = 'http://player.vimeo.com/video/' + video_id
|
||||
else:
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
@@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if video_thumbnail is None:
|
||||
video_thumbs = config["video"].get("thumbs")
|
||||
if video_thumbs and isinstance(video_thumbs, dict):
|
||||
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||
|
||||
# Extract video description
|
||||
video_description = None
|
||||
|
@@ -1,10 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class VubeIE(InfoExtractor):
|
||||
@@ -29,6 +31,7 @@ class VubeIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -47,6 +50,7 @@ class VubeIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['seraina', 'jessica', 'krewella', 'alive'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
||||
@@ -56,13 +60,15 @@ class VubeIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Frozen - Let It Go Cover by Siren Gene',
|
||||
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
|
||||
'uploader': 'Siren Gene',
|
||||
'uploader_id': 'Siren',
|
||||
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
|
||||
'uploader': 'Siren',
|
||||
'timestamp': 1395448018,
|
||||
'upload_date': '20140322',
|
||||
'duration': 221.788,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -71,47 +77,40 @@ class VubeIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._search_regex(
|
||||
r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n',
|
||||
webpage, 'video data'
|
||||
)
|
||||
data = json.loads(data_json)
|
||||
video = (
|
||||
data.get('video') or
|
||||
data)
|
||||
assert isinstance(video, dict)
|
||||
video = self._download_json(
|
||||
'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
public_id = video['public_id']
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||
'height': int(fmt['height']),
|
||||
'abr': int(fmt['audio_bitrate']),
|
||||
'vbr': int(fmt['video_bitrate']),
|
||||
'format_id': fmt['media_resolution_id']
|
||||
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'
|
||||
]
|
||||
formats = []
|
||||
|
||||
for media in video['media'].get('video', []) + video['media'].get('audio', []):
|
||||
if media['transcoding_status'] != 'processed':
|
||||
continue
|
||||
fmt = {
|
||||
'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
|
||||
'abr': int(media['audio_bitrate']),
|
||||
'format_id': compat_str(media['media_resolution_id']),
|
||||
}
|
||||
vbr = int(media['video_bitrate'])
|
||||
if vbr:
|
||||
fmt.update({
|
||||
'vbr': vbr,
|
||||
'height': int(media['height']),
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = self._proto_relative_url(
|
||||
video.get('thumbnail') or video.get('thumbnail_src'),
|
||||
scheme='http:')
|
||||
uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias')
|
||||
uploader_id = data.get('user', {}).get('name')
|
||||
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
|
||||
uploader = video.get('user_alias') or video.get('channel')
|
||||
timestamp = int_or_none(video.get('upload_time'))
|
||||
duration = video['duration']
|
||||
view_count = video.get('raw_view_count')
|
||||
like_count = video.get('rlikes')
|
||||
if like_count is None:
|
||||
like_count = video.get('total_likes')
|
||||
dislike_count = video.get('rhates')
|
||||
if dislike_count is None:
|
||||
dislike_count = video.get('total_hates')
|
||||
like_count = video.get('total_likes')
|
||||
dislike_count = video.get('total_hates')
|
||||
|
||||
comments = video.get('comments')
|
||||
comment_count = None
|
||||
@@ -124,6 +123,8 @@ class VubeIE(InfoExtractor):
|
||||
else:
|
||||
comment_count = len(comments)
|
||||
|
||||
categories = [tag['text'] for tag in video['tags']]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
@@ -131,11 +132,11 @@ class VubeIE(InfoExtractor):
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
}
|
||||
|
@@ -6,7 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
determine_ext,
|
||||
unified_strdate,
|
||||
)
|
||||
|
57
youtube_dl/extractor/xboxclips.py
Normal file
57
youtube_dl/extractor/xboxclips.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class XboxClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
|
||||
_TEST = {
|
||||
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
||||
'info_dict': {
|
||||
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Iabdulelah playing Upload Studio',
|
||||
'filesize_approx': 28101836.8,
|
||||
'timestamp': 1407388500,
|
||||
'upload_date': '20140807',
|
||||
'duration': 56,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'>Link: <a href="([^"]+)">', webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
|
||||
filesize = float_or_none(self._html_search_regex(
|
||||
r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'>Views: (\d+)<', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'filesize_approx': filesize,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
}
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class YahooIE(InfoExtractor):
|
||||
IE_DESC = 'Yahoo screen and movies'
|
||||
_VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
|
||||
_VALID_URL = r'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||
@@ -46,12 +46,23 @@ class YahooIE(InfoExtractor):
|
||||
'title': 'The World Loves Spider-Man',
|
||||
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
|
||||
'md5': '60e8ac193d8fb71997caa8fce54c6460',
|
||||
'info_dict': {
|
||||
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
|
||||
'ext': 'mp4',
|
||||
'title': "Yahoo Saves 'Community'",
|
||||
'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = mobj.group('url')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
items_json = self._search_regex(
|
||||
|
@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Indicate the download will use the RTMP protocol."""
|
||||
self.to_screen(u'RTMP download detected')
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, slen):
|
||||
def _signature_cache_id(self, example_sig):
|
||||
""" Return a string representation of a signature """
|
||||
return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_id = id_m.group('id')
|
||||
|
||||
# Read from filesystem cache
|
||||
func_id = '%s_%s_%d' % (player_type, player_id, slen)
|
||||
func_id = '%s_%s_%s' % (
|
||||
player_type, player_id, self._signature_cache_id(example_sig))
|
||||
assert os.path.basename(func_id) == func_id
|
||||
cache_dir = get_cachedir(self._downloader.params)
|
||||
|
||||
@@ -369,6 +374,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return lambda s: u''.join(s[i] for i in cache_spec)
|
||||
except IOError:
|
||||
pass # No cache available
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = str(oe)
|
||||
self._downloader.report_warning(
|
||||
u'Cache %s failed (%s)' % (cache_fn, file_size))
|
||||
|
||||
if player_type == 'js':
|
||||
code = self._download_webpage(
|
||||
@@ -388,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
if cache_enabled:
|
||||
try:
|
||||
test_string = u''.join(map(compat_chr, range(slen)))
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
try:
|
||||
@@ -404,7 +416,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, slen):
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
def gen_sig_code(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = u'' if start == 0 else str(start)
|
||||
@@ -433,11 +445,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
yield _genslice(start, i, step)
|
||||
|
||||
test_string = u''.join(map(compat_chr, range(slen)))
|
||||
test_string = u''.join(map(compat_chr, range(len(example_sig))))
|
||||
cache_res = func(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
expr_code = u' + '.join(gen_sig_code(cache_spec))
|
||||
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
|
||||
signature_id_tuple = '(%s)' % (
|
||||
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
|
||||
code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||
u' return %s\n') % (signature_id_tuple, expr_code)
|
||||
self.to_screen(u'Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
@@ -465,20 +480,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if player_url.startswith(u'//'):
|
||||
player_url = u'https:' + player_url
|
||||
try:
|
||||
player_id = (player_url, len(s))
|
||||
player_id = (player_url, self._signature_cache_id(s))
|
||||
if player_id not in self._player_cache:
|
||||
func = self._extract_signature_function(
|
||||
video_id, player_url, len(s)
|
||||
video_id, player_url, s
|
||||
)
|
||||
self._player_cache[player_id] = func
|
||||
func = self._player_cache[player_id]
|
||||
if self._downloader.params.get('youtube_print_sig_code'):
|
||||
self._print_sig_code(func, len(s))
|
||||
self._print_sig_code(func, s)
|
||||
return func(s)
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
raise ExtractorError(
|
||||
u'Automatic signature extraction failed: ' + tb, cause=e)
|
||||
u'Signature extraction failed: ' + tb, cause=e)
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
@@ -611,7 +626,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'video_id': video_id,
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
'sts':'16268',
|
||||
'sts': self._search_regex(
|
||||
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
|
||||
})
|
||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
@@ -805,50 +821,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url_map = {}
|
||||
for url_data_str in encoded_url_map.split(','):
|
||||
url_data = compat_parse_qs(url_data_str)
|
||||
if 'itag' in url_data and 'url' in url_data:
|
||||
url = url_data['url'][0]
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
if 'itag' not in url_data or 'url' not in url_data:
|
||||
continue
|
||||
format_id = url_data['itag'][0]
|
||||
url = url_data['url'][0]
|
||||
|
||||
if not age_gate:
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
video_webpage, u'JS player URL')
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
encrypted_sig = url_data['s'][0]
|
||||
|
||||
if not age_gate:
|
||||
jsplayer_url_json = self._search_regex(
|
||||
r'"assets":.+?"js":\s*("[^"]+")',
|
||||
video_webpage, u'JS player URL')
|
||||
player_url = json.loads(jsplayer_url_json)
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, u'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_url_json = self._search_regex(
|
||||
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
video_webpage, u'age gate player URL')
|
||||
player_url = json.loads(player_url_json)
|
||||
|
||||
if self._downloader.params.get('verbose'):
|
||||
if player_url is None:
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
player_version = 'unknown'
|
||||
player_desc = 'unknown'
|
||||
else:
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
u'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
if player_url.endswith('swf'):
|
||||
player_version = self._search_regex(
|
||||
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||
u'flash player', fatal=False)
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
r'html5player-(.+?)\.js', video_webpage,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = u'html5 player %s' % player_version
|
||||
player_version = self._search_regex(
|
||||
r'html5player-([^/]+?)(?:/html5player)?\.js',
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = u'html5 player %s' % player_version
|
||||
|
||||
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
|
||||
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
|
||||
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen(u'{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
signature = self._decrypt_signature(
|
||||
encrypted_sig, video_id, player_url, age_gate)
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[format_id] = url
|
||||
formats = _map_to_format_list(url_map)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
|
@@ -1,5 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .utils import (
|
||||
@@ -40,8 +41,9 @@ class JSInterpreter(object):
|
||||
assign = lambda v: v
|
||||
expr = stmt[len('return '):]
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Cannot determine left side of statement in %r' % stmt)
|
||||
# Try interpreting it as an expression
|
||||
expr = stmt
|
||||
assign = lambda v: v
|
||||
|
||||
v = self.interpret_expression(expr, local_vars, allow_recursion)
|
||||
return assign(v)
|
||||
@@ -53,35 +55,63 @@ class JSInterpreter(object):
|
||||
if expr.isalpha():
|
||||
return local_vars[expr]
|
||||
|
||||
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
|
||||
if m:
|
||||
member = m.group('member')
|
||||
variable = m.group('in')
|
||||
try:
|
||||
return json.loads(expr)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if variable not in local_vars:
|
||||
m = re.match(
|
||||
r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
||||
expr)
|
||||
if m:
|
||||
variable = m.group('var')
|
||||
member = m.group('member')
|
||||
arg_str = m.group('args')
|
||||
|
||||
if variable in local_vars:
|
||||
obj = local_vars[variable]
|
||||
else:
|
||||
if variable not in self._objects:
|
||||
self._objects[variable] = self.extract_object(variable)
|
||||
obj = self._objects[variable]
|
||||
key, args = member.split('(', 1)
|
||||
args = args.strip(')')
|
||||
argvals = [int(v) if v.isdigit() else local_vars[v]
|
||||
for v in args.split(',')]
|
||||
return obj[key](argvals)
|
||||
|
||||
val = local_vars[variable]
|
||||
if member == 'split("")':
|
||||
return list(val)
|
||||
if member == 'join("")':
|
||||
return ''.join(val)
|
||||
if member == 'length':
|
||||
return len(val)
|
||||
if member == 'reverse()':
|
||||
return val[::-1]
|
||||
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
|
||||
if slice_m:
|
||||
idx = self.interpret_expression(
|
||||
slice_m.group('idx'), local_vars, allow_recursion - 1)
|
||||
return val[idx:]
|
||||
if arg_str is None:
|
||||
# Member access
|
||||
if member == 'length':
|
||||
return len(obj)
|
||||
return obj[member]
|
||||
|
||||
assert expr.endswith(')')
|
||||
# Function call
|
||||
if arg_str == '':
|
||||
argvals = tuple()
|
||||
else:
|
||||
argvals = tuple([
|
||||
self.interpret_expression(v, local_vars, allow_recursion)
|
||||
for v in arg_str.split(',')])
|
||||
|
||||
if member == 'split':
|
||||
assert argvals == ('',)
|
||||
return list(obj)
|
||||
if member == 'join':
|
||||
assert len(argvals) == 1
|
||||
return argvals[0].join(obj)
|
||||
if member == 'reverse':
|
||||
assert len(argvals) == 0
|
||||
obj.reverse()
|
||||
return obj
|
||||
if member == 'slice':
|
||||
assert len(argvals) == 1
|
||||
return obj[argvals[0]:]
|
||||
if member == 'splice':
|
||||
assert isinstance(obj, list)
|
||||
index, howMany = argvals
|
||||
res = []
|
||||
for i in range(index, min(index + howMany, len(obj))):
|
||||
res.append(obj.pop(index))
|
||||
return res
|
||||
|
||||
return obj[member](argvals)
|
||||
|
||||
m = re.match(
|
||||
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
||||
@@ -103,10 +133,11 @@ class JSInterpreter(object):
|
||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||
if m:
|
||||
fname = m.group('func')
|
||||
argvals = tuple([
|
||||
int(v) if v.isdigit() else local_vars[v]
|
||||
for v in m.group('args').split(',')])
|
||||
if fname not in self._functions:
|
||||
self._functions[fname] = self.extract_function(fname)
|
||||
argvals = [int(v) if v.isdigit() else local_vars[v]
|
||||
for v in m.group('args').split(',')]
|
||||
return self._functions[fname](argvals)
|
||||
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||
|
||||
|
@@ -18,14 +18,15 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
|
||||
class FFmpegPostProcessorError(PostProcessingError):
|
||||
pass
|
||||
|
||||
|
||||
class FFmpegPostProcessor(PostProcessor):
|
||||
def __init__(self,downloader=None):
|
||||
def __init__(self, downloader=None, deletetempfiles=False):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
self._exes = self.detect_executables()
|
||||
self._deletetempfiles = deletetempfiles
|
||||
|
||||
@staticmethod
|
||||
def detect_executables():
|
||||
@@ -60,6 +61,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
msg = stderr.strip().split('\n')[-1]
|
||||
raise FFmpegPostProcessorError(msg)
|
||||
if self._deletetempfiles:
|
||||
for ipath in input_paths:
|
||||
os.remove(ipath)
|
||||
|
||||
def run_ffmpeg(self, path, out_path, opts):
|
||||
self.run_ffmpeg_multiple_files([path], out_path, opts)
|
||||
|
@@ -242,8 +242,8 @@ else:
|
||||
if sys.version_info >= (2,7):
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z]+$', key)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
|
||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
||||
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
|
||||
expr = xpath + u"[@%s='%s']" % (key, val)
|
||||
return node.find(expr)
|
||||
else:
|
||||
@@ -827,6 +827,7 @@ def unified_strdate(date_str):
|
||||
'%b %dnd %Y %I:%M%p',
|
||||
'%b %dth %Y %I:%M%p',
|
||||
'%Y-%m-%d',
|
||||
'%Y/%m/%d',
|
||||
'%d.%m.%Y',
|
||||
'%d/%m/%Y',
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
@@ -852,6 +853,8 @@ def unified_strdate(date_str):
|
||||
return upload_date
|
||||
|
||||
def determine_ext(url, default_ext=u'unknown_video'):
|
||||
if url is None:
|
||||
return default_ext
|
||||
guess = url.partition(u'?')[0].rpartition(u'.')[2]
|
||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||
return guess
|
||||
@@ -1271,9 +1274,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
|
||||
if get_attr:
|
||||
if v is not None:
|
||||
v = getattr(v, get_attr, None)
|
||||
if v == '':
|
||||
v = None
|
||||
return default if v is None else (int(v) * invscale // scale)
|
||||
|
||||
|
||||
def str_or_none(v, default=None):
|
||||
return default if v is None else compat_str(v)
|
||||
|
||||
|
||||
def str_to_int(int_str):
|
||||
if int_str is None:
|
||||
return None
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.07.23.2'
|
||||
__version__ = '2014.08.21.1'
|
||||
|
Reference in New Issue
Block a user