Compare commits
99 Commits
2015.02.17
...
2015.02.23
Author | SHA1 | Date | |
---|---|---|---|
bd61a9e770 | |||
3438e7acd2 | |||
09c200acf2 | |||
716889cab1 | |||
409693984f | |||
04e8c11080 | |||
80af2b73ab | |||
3cc57f9645 | |||
a65d4e7f14 | |||
543ec2136b | |||
93b5071f73 | |||
ddc369f073 | |||
fcc3e6138b | |||
9fe6ef7ab2 | |||
c010af6f19 | |||
35b7982303 | |||
f311cfa231 | |||
e086e0eb6c | |||
314368c822 | |||
c5181ab410 | |||
ea5152cae1 | |||
255fca5eea | |||
4aeccadf4e | |||
93540ee10e | |||
8fb3ac3649 | |||
77b2986b5b | |||
62b013df0d | |||
fad6768bd1 | |||
a78125f925 | |||
a00a8bcc8a | |||
1e9a9e167d | |||
3da0db62e6 | |||
e14ced7918 | |||
ab9d02f53b | |||
a461a11989 | |||
1bd838608f | |||
365577f567 | |||
50efb383f0 | |||
5da6bd0083 | |||
5e9a033e6e | |||
dd0a58f5f0 | |||
a21420389e | |||
6140baf4e1 | |||
8fc642eb5b | |||
e66e1a0046 | |||
d5c69f1da4 | |||
5c8a3f862a | |||
a3b9157f49 | |||
b88ba05356 | |||
b74d505577 | |||
9e2d7dca87 | |||
d236b37ac9 | |||
e880c66bd8 | |||
383456aa29 | |||
1a13940c8d | |||
3d54788495 | |||
71d53ace2f | |||
f37e3f99f0 | |||
bd03ffc16e | |||
1ac1af9b47 | |||
3bf5705316 | |||
1c2528c8a3 | |||
7bd15b1a03 | |||
6b961a85fd | |||
7707004043 | |||
a025d3c5a5 | |||
c460bdd56b | |||
b81a359eb6 | |||
d61aefb24c | |||
d305dd73a3 | |||
93a16ba238 | |||
85d5866177 | |||
9789d7535d | |||
d8443cd3f7 | |||
d47c26e168 | |||
81975f4693 | |||
b8b928d5cb | |||
3eff81fbf7 | |||
785521bf4f | |||
6d1a55a521 | |||
9cad27008b | |||
11e611a7fa | |||
72c1f8de06 | |||
6e99868e4c | |||
4d278fde64 | |||
f21e915fb9 | |||
6f53c63df6 | |||
1def5f359e | |||
15ec669374 | |||
a3fa5da496 | |||
30965ac66a | |||
09ab40b7d1 | |||
fa15607773 | |||
a91a2c1a83 | |||
16e7711e22 | |||
5cda4eda72 | |||
98f000409f | |||
4a8d4a53b1 | |||
4cd95bcbc3 |
1
AUTHORS
1
AUTHORS
@ -111,3 +111,4 @@ Paul Hartmann
|
|||||||
Frans de Jonge
|
Frans de Jonge
|
||||||
Robin de Rooij
|
Robin de Rooij
|
||||||
Ryan Schmidt
|
Ryan Schmidt
|
||||||
|
Leslie P. Polzer
|
||||||
|
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
|||||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||||
|
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
|
10
README.md
10
README.md
@ -515,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c
|
|||||||
|
|
||||||
### ERROR: no fmt_url_map or conn information found in video info
|
### ERROR: no fmt_url_map or conn information found in video info
|
||||||
|
|
||||||
youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
### ERROR: unable to download video ###
|
### ERROR: unable to download video ###
|
||||||
|
|
||||||
youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`.
|
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
|
### ExtractorError: Could not find JS function u'OF'
|
||||||
|
|
||||||
|
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||||
|
|
||||||
### SyntaxError: Non-ASCII character ###
|
### SyntaxError: Non-ASCII character ###
|
||||||
|
|
||||||
@ -567,7 +571,7 @@ Support requests for services that **do** purchase the rights to distribute thei
|
|||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
|
@ -45,12 +45,12 @@ for test in get_testcases():
|
|||||||
|
|
||||||
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
|
||||||
|
|
||||||
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
|
if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
|
||||||
or test['info_dict']['age_limit'] != 18):
|
test['info_dict']['age_limit'] != 18):
|
||||||
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
print('\nPotential missing age_limit check: {0}'.format(test['name']))
|
||||||
|
|
||||||
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
|
elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
|
||||||
and test['info_dict']['age_limit'] == 18):
|
test['info_dict']['age_limit'] == 18):
|
||||||
print('\nPotential false negative: {0}'.format(test['name']))
|
print('\nPotential false negative: {0}'.format(test['name']))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -68,9 +68,12 @@
|
|||||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||||
- **CBS**
|
- **CBS**
|
||||||
- **CBSNews**: CBS News
|
- **CBSNews**: CBS News
|
||||||
|
- **CBSSports**
|
||||||
- **CeskaTelevize**
|
- **CeskaTelevize**
|
||||||
- **channel9**: Channel 9
|
- **channel9**: Channel 9
|
||||||
- **Chilloutzone**
|
- **Chilloutzone**
|
||||||
|
- **chirbit**
|
||||||
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
- **Cinemassacre**
|
- **Cinemassacre**
|
||||||
- **clipfish**
|
- **clipfish**
|
||||||
@ -121,6 +124,7 @@
|
|||||||
- **EllenTV**
|
- **EllenTV**
|
||||||
- **EllenTV:clips**
|
- **EllenTV:clips**
|
||||||
- **ElPais**: El País
|
- **ElPais**: El País
|
||||||
|
- **Embedly**
|
||||||
- **EMPFlix**
|
- **EMPFlix**
|
||||||
- **Engadget**
|
- **Engadget**
|
||||||
- **Eporner**
|
- **Eporner**
|
||||||
@ -190,6 +194,7 @@
|
|||||||
- **ign.com**
|
- **ign.com**
|
||||||
- **imdb**: Internet Movie Database trailers
|
- **imdb**: Internet Movie Database trailers
|
||||||
- **imdb:list**: Internet Movie Database lists
|
- **imdb:list**: Internet Movie Database lists
|
||||||
|
- **Imgur**
|
||||||
- **Ina**
|
- **Ina**
|
||||||
- **InfoQ**
|
- **InfoQ**
|
||||||
- **Instagram**
|
- **Instagram**
|
||||||
@ -262,6 +267,7 @@
|
|||||||
- **myvideo**
|
- **myvideo**
|
||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
- **n-tv.de**
|
- **n-tv.de**
|
||||||
|
- **NationalGeographic**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
@ -319,12 +325,14 @@
|
|||||||
- **podomatic**
|
- **podomatic**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**
|
- **PornHub**
|
||||||
|
- **PornHubPlaylist**
|
||||||
- **Pornotube**
|
- **Pornotube**
|
||||||
- **PornoXO**
|
- **PornoXO**
|
||||||
- **PromptFile**
|
- **PromptFile**
|
||||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||||
- **Pyvideo**
|
- **Pyvideo**
|
||||||
- **QuickVid**
|
- **QuickVid**
|
||||||
|
- **R7**
|
||||||
- **radio.de**
|
- **radio.de**
|
||||||
- **radiobremen**
|
- **radiobremen**
|
||||||
- **radiofrance**
|
- **radiofrance**
|
||||||
@ -352,6 +360,7 @@
|
|||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
|
- **Sandia**: Sandia National Laboratories
|
||||||
- **Sapo**: SAPO Vídeos
|
- **Sapo**: SAPO Vídeos
|
||||||
- **savefrom.net**
|
- **savefrom.net**
|
||||||
- **SBS**: sbs.com.au
|
- **SBS**: sbs.com.au
|
||||||
@ -379,7 +388,8 @@
|
|||||||
- **soundcloud:playlist**
|
- **soundcloud:playlist**
|
||||||
- **soundcloud:set**
|
- **soundcloud:set**
|
||||||
- **soundcloud:user**
|
- **soundcloud:user**
|
||||||
- **Soundgasm**
|
- **soundgasm**
|
||||||
|
- **soundgasm:profile**
|
||||||
- **southpark.cc.com**
|
- **southpark.cc.com**
|
||||||
- **southpark.de**
|
- **southpark.de**
|
||||||
- **Space**
|
- **Space**
|
||||||
@ -445,6 +455,7 @@
|
|||||||
- **Turbo**
|
- **Turbo**
|
||||||
- **Tutv**
|
- **Tutv**
|
||||||
- **tv.dfb.de**
|
- **tv.dfb.de**
|
||||||
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvp.pl**
|
- **tvp.pl**
|
||||||
- **tvp.pl:Series**
|
- **tvp.pl:Series**
|
||||||
@ -552,6 +563,7 @@
|
|||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **Zapiks**
|
||||||
- **ZDF**
|
- **ZDF**
|
||||||
- **ZDFChannel**
|
- **ZDFChannel**
|
||||||
- **zingmp3:album**: mp3.zing.vn albums
|
- **zingmp3:album**: mp3.zing.vn albums
|
||||||
|
@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
got.startswith(start_str),
|
got.startswith(start_str),
|
||||||
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
|
'field %s (value: %r) should start with %r' % (info_field, got, start_str))
|
||||||
|
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
|
||||||
|
got = got_dict.get(info_field)
|
||||||
|
contains_str = expected[len('contains:'):]
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str),
|
||||||
|
'Expected a %s object, but got %s for field %s' % (
|
||||||
|
compat_str.__name__, type(got).__name__, info_field))
|
||||||
|
self.assertTrue(
|
||||||
|
contains_str in got,
|
||||||
|
'field %s (value: %r) should contain %r' % (info_field, got, contains_str))
|
||||||
elif isinstance(expected, type):
|
elif isinstance(expected, type):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertTrue(isinstance(got, expected),
|
self.assertTrue(isinstance(got, expected),
|
||||||
@ -163,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
info_dict_str += ''.join(
|
info_dict_str += ''.join(
|
||||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||||
for k, v in test_info_dict.items() if k not in missing_keys)
|
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||||
|
|
||||||
|
if info_dict_str:
|
||||||
info_dict_str += '\n'
|
info_dict_str += '\n'
|
||||||
info_dict_str += ''.join(
|
info_dict_str += ''.join(
|
||||||
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
||||||
for k in missing_keys)
|
for k in missing_keys)
|
||||||
write_string(
|
write_string(
|
||||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
'\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
missing_keys,
|
missing_keys,
|
||||||
'Missing keys in test definition: %s' % (
|
'Missing keys in test definition: %s' % (
|
||||||
|
@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self.assertEqual(jsi.call_function('f'), -11)
|
self.assertEqual(jsi.call_function('f'), -11)
|
||||||
|
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
|
'Skipping: Not yet fully implemented'
|
||||||
|
return
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() {
|
function x() {
|
||||||
var x = /* 1 + */ 2;
|
var x = /* 1 + */ 2;
|
||||||
@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x'), 52)
|
self.assertEqual(jsi.call_function('x'), 52)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function f() {
|
||||||
|
var x = "/*";
|
||||||
|
var y = 1 /* comment */ + 2;
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 3)
|
||||||
|
|
||||||
def test_precedence(self):
|
def test_precedence(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() {
|
function x() {
|
||||||
|
@ -34,8 +34,8 @@ def _make_testfunc(testfile):
|
|||||||
def test_func(self):
|
def test_func(self):
|
||||||
as_file = os.path.join(TEST_DIR, testfile)
|
as_file = os.path.join(TEST_DIR, testfile)
|
||||||
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
|
||||||
if ((not os.path.exists(swf_file))
|
if ((not os.path.exists(swf_file)) or
|
||||||
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
|
||||||
# Recompile
|
# Recompile
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([
|
subprocess.check_call([
|
||||||
|
@ -370,6 +370,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
"playlist":[{"controls":{"all":null}}]
|
"playlist":[{"controls":{"all":null}}]
|
||||||
}''')
|
}''')
|
||||||
|
|
||||||
|
inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"'
|
||||||
|
json_code = js_to_json(inp)
|
||||||
|
self.assertEqual(json.loads(json_code), json.loads(inp))
|
||||||
|
|
||||||
def test_js_to_json_edgecases(self):
|
def test_js_to_json_edgecases(self):
|
||||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||||
|
@ -64,6 +64,12 @@ _TESTS = [
|
|||||||
'js',
|
'js',
|
||||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||||
|
'js',
|
||||||
|
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||||
|
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -199,18 +199,25 @@ class YoutubeDL(object):
|
|||||||
postprocessor.
|
postprocessor.
|
||||||
progress_hooks: A list of functions that get called on download
|
progress_hooks: A list of functions that get called on download
|
||||||
progress, with a dictionary with the entries
|
progress, with a dictionary with the entries
|
||||||
* status: One of "downloading" and "finished".
|
* status: One of "downloading", "error", or "finished".
|
||||||
Check this first and ignore unknown values.
|
Check this first and ignore unknown values.
|
||||||
|
|
||||||
If status is one of "downloading" or "finished", the
|
If status is one of "downloading", or "finished", the
|
||||||
following properties may also be present:
|
following properties may also be present:
|
||||||
* filename: The final filename (always present)
|
* filename: The final filename (always present)
|
||||||
|
* tmpfilename: The filename we're currently writing to
|
||||||
* downloaded_bytes: Bytes on disk
|
* downloaded_bytes: Bytes on disk
|
||||||
* total_bytes: Size of the whole file, None if unknown
|
* total_bytes: Size of the whole file, None if unknown
|
||||||
* tmpfilename: The filename we're currently writing to
|
* total_bytes_estimate: Guess of the eventual file size,
|
||||||
|
None if unavailable.
|
||||||
|
* elapsed: The number of seconds since download started.
|
||||||
* eta: The estimated time in seconds, None if unknown
|
* eta: The estimated time in seconds, None if unknown
|
||||||
* speed: The download speed in bytes/second, None if
|
* speed: The download speed in bytes/second, None if
|
||||||
unknown
|
unknown
|
||||||
|
* fragment_index: The counter of the currently
|
||||||
|
downloaded video fragment.
|
||||||
|
* fragment_count: The number of fragments (= individual
|
||||||
|
files that will be merged)
|
||||||
|
|
||||||
Progress hooks are guaranteed to be called at least once
|
Progress hooks are guaranteed to be called at least once
|
||||||
(with status "finished") if the download is successful.
|
(with status "finished") if the download is successful.
|
||||||
@ -301,8 +308,8 @@ class YoutubeDL(object):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
if (sys.version_info >= (3,) and sys.platform != 'win32' and
|
||||||
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
|
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
|
||||||
and not params.get('restrictfilenames', False)):
|
not params.get('restrictfilenames', False)):
|
||||||
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
# On Python 3, the Unicode filesystem API will throw errors (#1474)
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Assuming --restrict-filenames since file system encoding '
|
'Assuming --restrict-filenames since file system encoding '
|
||||||
@ -1359,8 +1366,8 @@ class YoutubeDL(object):
|
|||||||
"""Download a given list of URLs."""
|
"""Download a given list of URLs."""
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
if (len(url_list) > 1 and
|
if (len(url_list) > 1 and
|
||||||
'%' not in outtmpl
|
'%' not in outtmpl and
|
||||||
and self.params.get('max_downloads') != 1):
|
self.params.get('max_downloads') != 1):
|
||||||
raise SameFileError(outtmpl)
|
raise SameFileError(outtmpl)
|
||||||
|
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
@ -1527,29 +1534,18 @@ class YoutubeDL(object):
|
|||||||
return res
|
return res
|
||||||
|
|
||||||
def list_formats(self, info_dict):
|
def list_formats(self, info_dict):
|
||||||
def line(format, idlen=20):
|
|
||||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
|
||||||
format['format_id'],
|
|
||||||
format['ext'],
|
|
||||||
self.format_resolution(format),
|
|
||||||
self._format_note(format),
|
|
||||||
))
|
|
||||||
|
|
||||||
formats = info_dict.get('formats', [info_dict])
|
formats = info_dict.get('formats', [info_dict])
|
||||||
idlen = max(len('format code'),
|
table = [
|
||||||
max(len(f['format_id']) for f in formats))
|
[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
|
||||||
formats_s = [
|
for f in formats
|
||||||
line(f, idlen) for f in formats
|
|
||||||
if f.get('preference') is None or f['preference'] >= -1000]
|
if f.get('preference') is None or f['preference'] >= -1000]
|
||||||
if len(formats) > 1:
|
if len(formats) > 1:
|
||||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
|
||||||
|
|
||||||
header_line = line({
|
header_line = ['format code', 'extension', 'resolution', 'note']
|
||||||
'format_id': 'format code', 'ext': 'extension',
|
|
||||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[info] Available formats for %s:\n%s\n%s' %
|
'[info] Available formats for %s:\n%s' %
|
||||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
(info_dict['id'], render_table(header_line, table)))
|
||||||
|
|
||||||
def list_thumbnails(self, info_dict):
|
def list_thumbnails(self, info_dict):
|
||||||
thumbnails = info_dict.get('thumbnails')
|
thumbnails = info_dict.get('thumbnails')
|
||||||
|
@ -189,14 +189,14 @@ def _real_main(argv=None):
|
|||||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||||
if opts.outtmpl is not None:
|
if opts.outtmpl is not None:
|
||||||
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
||||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
|
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
||||||
or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
|
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
||||||
or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
|
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
||||||
or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
|
(opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
|
||||||
or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
|
(opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
|
||||||
or (opts.useid and '%(id)s.%(ext)s')
|
(opts.useid and '%(id)s.%(ext)s') or
|
||||||
or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
|
(opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
|
||||||
or DEFAULT_OUTTMPL)
|
DEFAULT_OUTTMPL)
|
||||||
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||||
parser.error('Cannot download a video and extract audio into the same'
|
parser.error('Cannot download a video and extract audio into the same'
|
||||||
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -54,6 +54,7 @@ class FileDownloader(object):
|
|||||||
self.ydl = ydl
|
self.ydl = ydl
|
||||||
self._progress_hooks = []
|
self._progress_hooks = []
|
||||||
self.params = params
|
self.params = params
|
||||||
|
self.add_progress_hook(self.report_progress)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_seconds(seconds):
|
def format_seconds(seconds):
|
||||||
@ -226,42 +227,64 @@ class FileDownloader(object):
|
|||||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||||
self.to_console_title('youtube-dl ' + msg)
|
self.to_console_title('youtube-dl ' + msg)
|
||||||
|
|
||||||
def report_progress(self, percent, data_len_str, speed, eta):
|
def report_progress(self, s):
|
||||||
"""Report download progress."""
|
if s['status'] == 'finished':
|
||||||
if self.params.get('noprogress', False):
|
|
||||||
return
|
|
||||||
if eta is not None:
|
|
||||||
eta_str = self.format_eta(eta)
|
|
||||||
else:
|
|
||||||
eta_str = 'Unknown ETA'
|
|
||||||
if percent is not None:
|
|
||||||
percent_str = self.format_percent(percent)
|
|
||||||
else:
|
|
||||||
percent_str = 'Unknown %'
|
|
||||||
speed_str = self.format_speed(speed)
|
|
||||||
|
|
||||||
msg = ('%s of %s at %s ETA %s' %
|
|
||||||
(percent_str, data_len_str, speed_str, eta_str))
|
|
||||||
self._report_progress_status(msg)
|
|
||||||
|
|
||||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
|
||||||
if self.params.get('noprogress', False):
|
|
||||||
return
|
|
||||||
downloaded_str = format_bytes(downloaded_data_len)
|
|
||||||
speed_str = self.format_speed(speed)
|
|
||||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
|
||||||
msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
|
||||||
self._report_progress_status(msg)
|
|
||||||
|
|
||||||
def report_finish(self, data_len_str, tot_time):
|
|
||||||
"""Report download finished."""
|
|
||||||
if self.params.get('noprogress', False):
|
if self.params.get('noprogress', False):
|
||||||
self.to_screen('[download] Download completed')
|
self.to_screen('[download] Download completed')
|
||||||
else:
|
else:
|
||||||
|
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||||
|
if s.get('elapsed') is not None:
|
||||||
|
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||||
|
msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
|
||||||
|
else:
|
||||||
|
msg_template = '100%% of %(_total_bytes_str)s'
|
||||||
self._report_progress_status(
|
self._report_progress_status(
|
||||||
('100%% of %s in %s' %
|
msg_template % s, is_last_line=True)
|
||||||
(data_len_str, self.format_seconds(tot_time))),
|
|
||||||
is_last_line=True)
|
if self.params.get('noprogress'):
|
||||||
|
return
|
||||||
|
|
||||||
|
if s['status'] != 'downloading':
|
||||||
|
return
|
||||||
|
|
||||||
|
if s.get('eta') is not None:
|
||||||
|
s['_eta_str'] = self.format_eta(s['eta'])
|
||||||
|
else:
|
||||||
|
s['_eta_str'] = 'Unknown ETA'
|
||||||
|
|
||||||
|
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
|
||||||
|
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
|
||||||
|
elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
|
||||||
|
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
|
||||||
|
else:
|
||||||
|
if s.get('downloaded_bytes') == 0:
|
||||||
|
s['_percent_str'] = self.format_percent(0)
|
||||||
|
else:
|
||||||
|
s['_percent_str'] = 'Unknown %'
|
||||||
|
|
||||||
|
if s.get('speed') is not None:
|
||||||
|
s['_speed_str'] = self.format_speed(s['speed'])
|
||||||
|
else:
|
||||||
|
s['_speed_str'] = 'Unknown speed'
|
||||||
|
|
||||||
|
if s.get('total_bytes') is not None:
|
||||||
|
s['_total_bytes_str'] = format_bytes(s['total_bytes'])
|
||||||
|
msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
elif s.get('total_bytes_estimate') is not None:
|
||||||
|
s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
|
||||||
|
msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
else:
|
||||||
|
if s.get('downloaded_bytes') is not None:
|
||||||
|
s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
|
||||||
|
if s.get('elapsed'):
|
||||||
|
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
|
||||||
|
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
|
||||||
|
else:
|
||||||
|
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
|
||||||
|
else:
|
||||||
|
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
|
||||||
|
|
||||||
|
self._report_progress_status(msg_template % s)
|
||||||
|
|
||||||
def report_resuming_byte(self, resume_len):
|
def report_resuming_byte(self, resume_len):
|
||||||
"""Report attempt to resume at given byte."""
|
"""Report attempt to resume at given byte."""
|
||||||
@ -288,14 +311,14 @@ class FileDownloader(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
nooverwrites_and_exists = (
|
nooverwrites_and_exists = (
|
||||||
self.params.get('nooverwrites', False)
|
self.params.get('nooverwrites', False) and
|
||||||
and os.path.exists(encodeFilename(filename))
|
os.path.exists(encodeFilename(filename))
|
||||||
)
|
)
|
||||||
|
|
||||||
continuedl_and_exists = (
|
continuedl_and_exists = (
|
||||||
self.params.get('continuedl', False)
|
self.params.get('continuedl', False) and
|
||||||
and os.path.isfile(encodeFilename(filename))
|
os.path.isfile(encodeFilename(filename)) and
|
||||||
and not self.params.get('nopart', False)
|
not self.params.get('nopart', False)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check file already present
|
# Check file already present
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import division, unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
@ -15,7 +15,6 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_pack,
|
struct_pack,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
format_bytes,
|
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
@ -252,17 +251,6 @@ class F4mFD(FileDownloader):
|
|||||||
requested_bitrate = info_dict.get('tbr')
|
requested_bitrate = info_dict.get('tbr')
|
||||||
self.to_screen('[download] Downloading f4m manifest')
|
self.to_screen('[download] Downloading f4m manifest')
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
self.report_destination(filename)
|
|
||||||
http_dl = HttpQuietDownloader(
|
|
||||||
self.ydl,
|
|
||||||
{
|
|
||||||
'continuedl': True,
|
|
||||||
'quiet': True,
|
|
||||||
'noprogress': True,
|
|
||||||
'ratelimit': self.params.get('ratelimit', None),
|
|
||||||
'test': self.params.get('test', False),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
doc = etree.fromstring(manifest)
|
doc = etree.fromstring(manifest)
|
||||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||||
@ -298,39 +286,65 @@ class F4mFD(FileDownloader):
|
|||||||
# For some akamai manifests we'll need to add a query to the fragment url
|
# For some akamai manifests we'll need to add a query to the fragment url
|
||||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||||
|
|
||||||
|
self.report_destination(filename)
|
||||||
|
http_dl = HttpQuietDownloader(
|
||||||
|
self.ydl,
|
||||||
|
{
|
||||||
|
'continuedl': True,
|
||||||
|
'quiet': True,
|
||||||
|
'noprogress': True,
|
||||||
|
'ratelimit': self.params.get('ratelimit', None),
|
||||||
|
'test': self.params.get('test', False),
|
||||||
|
}
|
||||||
|
)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||||
|
|
||||||
write_flv_header(dest_stream)
|
write_flv_header(dest_stream)
|
||||||
write_metadata_tag(dest_stream, metadata)
|
write_metadata_tag(dest_stream, metadata)
|
||||||
|
|
||||||
# This dict stores the download progress, it's updated by the progress
|
# This dict stores the download progress, it's updated by the progress
|
||||||
# hook
|
# hook
|
||||||
state = {
|
state = {
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': 0,
|
'downloaded_bytes': 0,
|
||||||
'frag_counter': 0,
|
'frag_index': 0,
|
||||||
|
'frag_count': total_frags,
|
||||||
|
'filename': filename,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
}
|
}
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
def frag_progress_hook(status):
|
def frag_progress_hook(s):
|
||||||
frag_total_bytes = status.get('total_bytes', 0)
|
if s['status'] not in ('downloading', 'finished'):
|
||||||
estimated_size = (state['downloaded_bytes'] +
|
return
|
||||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
|
||||||
if status['status'] == 'finished':
|
frag_total_bytes = s.get('total_bytes', 0)
|
||||||
|
if s['status'] == 'finished':
|
||||||
state['downloaded_bytes'] += frag_total_bytes
|
state['downloaded_bytes'] += frag_total_bytes
|
||||||
state['frag_counter'] += 1
|
state['frag_index'] += 1
|
||||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
|
||||||
byte_counter = state['downloaded_bytes']
|
estimated_size = (
|
||||||
|
(state['downloaded_bytes'] + frag_total_bytes) /
|
||||||
|
(state['frag_index'] + 1) * total_frags)
|
||||||
|
time_now = time.time()
|
||||||
|
state['total_bytes_estimate'] = estimated_size
|
||||||
|
state['elapsed'] = time_now - start
|
||||||
|
|
||||||
|
if s['status'] == 'finished':
|
||||||
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
else:
|
else:
|
||||||
frag_downloaded_bytes = status['downloaded_bytes']
|
frag_downloaded_bytes = s['downloaded_bytes']
|
||||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
|
||||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||||
frag_total_bytes)
|
frag_total_bytes)
|
||||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
progress = self.calc_percent(state['frag_index'], total_frags)
|
||||||
progress += frag_progress / float(total_frags)
|
progress += frag_progress / float(total_frags)
|
||||||
|
|
||||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
state['eta'] = self.calc_eta(
|
||||||
self.report_progress(progress, format_bytes(estimated_size),
|
start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
|
||||||
status.get('speed'), eta)
|
state['speed'] = s.get('speed')
|
||||||
|
self._hook_progress(state)
|
||||||
|
|
||||||
http_dl.add_progress_hook(frag_progress_hook)
|
http_dl.add_progress_hook(frag_progress_hook)
|
||||||
|
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
@ -354,8 +368,8 @@ class F4mFD(FileDownloader):
|
|||||||
frags_filenames.append(frag_filename)
|
frags_filenames.append(frag_filename)
|
||||||
|
|
||||||
dest_stream.close()
|
dest_stream.close()
|
||||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
|
||||||
|
|
||||||
|
elapsed = time.time() - start
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
for frag_file in frags_filenames:
|
for frag_file in frags_filenames:
|
||||||
os.remove(frag_file)
|
os.remove(frag_file)
|
||||||
@ -366,6 +380,7 @@ class F4mFD(FileDownloader):
|
|||||||
'total_bytes': fsize,
|
'total_bytes': fsize,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'elapsed': elapsed,
|
||||||
})
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -1,10 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
from socket import error as SocketError
|
|
||||||
import errno
|
import errno
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -15,7 +14,6 @@ from ..utils import (
|
|||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
sanitize_open,
|
sanitize_open,
|
||||||
format_bytes,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -102,7 +100,7 @@ class HttpFD(FileDownloader):
|
|||||||
resume_len = 0
|
resume_len = 0
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
break
|
break
|
||||||
except SocketError as e:
|
except socket.error as e:
|
||||||
if e.errno != errno.ECONNRESET:
|
if e.errno != errno.ECONNRESET:
|
||||||
# Connection reset is no problem, just retry
|
# Connection reset is no problem, just retry
|
||||||
raise
|
raise
|
||||||
@ -137,7 +135,6 @@ class HttpFD(FileDownloader):
|
|||||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
data_len_str = format_bytes(data_len)
|
|
||||||
byte_counter = 0 + resume_len
|
byte_counter = 0 + resume_len
|
||||||
block_size = self.params.get('buffersize', 1024)
|
block_size = self.params.get('buffersize', 1024)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
@ -196,20 +193,19 @@ class HttpFD(FileDownloader):
|
|||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
||||||
if data_len is None:
|
if data_len is None:
|
||||||
eta = percent = None
|
eta = None
|
||||||
else:
|
else:
|
||||||
percent = self.calc_percent(byte_counter, data_len)
|
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
'total_bytes': data_len,
|
'total_bytes': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
|
||||||
'eta': eta,
|
'eta': eta,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
|
'elapsed': now - start,
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_test and byte_counter == data_len:
|
if is_test and byte_counter == data_len:
|
||||||
@ -221,7 +217,13 @@ class HttpFD(FileDownloader):
|
|||||||
return False
|
return False
|
||||||
if tmpfilename != '-':
|
if tmpfilename != '-':
|
||||||
stream.close()
|
stream.close()
|
||||||
self.report_finish(data_len_str, (time.time() - start))
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
'status': 'error',
|
||||||
|
})
|
||||||
if data_len is not None and byte_counter != data_len:
|
if data_len is not None and byte_counter != data_len:
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
raise ContentTooShortError(byte_counter, int(data_len))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
@ -235,6 +237,7 @@ class HttpFD(FileDownloader):
|
|||||||
'total_bytes': byte_counter,
|
'total_bytes': byte_counter,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
|
'elapsed': time.time() - start,
|
||||||
})
|
})
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -11,7 +11,6 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
check_executable,
|
check_executable,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
format_bytes,
|
|
||||||
get_exe_version,
|
get_exe_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
|
|||||||
if not resume_percent:
|
if not resume_percent:
|
||||||
resume_percent = percent
|
resume_percent = percent
|
||||||
resume_downloaded_data_len = downloaded_data_len
|
resume_downloaded_data_len = downloaded_data_len
|
||||||
eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
|
time_now = time.time()
|
||||||
speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
|
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||||
|
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||||
data_len = None
|
data_len = None
|
||||||
if percent > 0:
|
if percent > 0:
|
||||||
data_len = int(downloaded_data_len * 100 / percent)
|
data_len = int(downloaded_data_len * 100 / percent)
|
||||||
data_len_str = '~' + format_bytes(data_len)
|
|
||||||
self.report_progress(percent, data_len_str, speed, eta)
|
|
||||||
cursor_in_new_line = False
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
'downloaded_bytes': downloaded_data_len,
|
'downloaded_bytes': downloaded_data_len,
|
||||||
'total_bytes': data_len,
|
'total_bytes_estimate': data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
|
||||||
'eta': eta,
|
'eta': eta,
|
||||||
|
'elapsed': time_now - start,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
})
|
})
|
||||||
|
cursor_in_new_line = False
|
||||||
else:
|
else:
|
||||||
# no percent for live streams
|
# no percent for live streams
|
||||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||||
@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
|
|||||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||||
time_now = time.time()
|
time_now = time.time()
|
||||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
|
||||||
cursor_in_new_line = False
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': downloaded_data_len,
|
'downloaded_bytes': downloaded_data_len,
|
||||||
'tmpfilename': tmpfilename,
|
'tmpfilename': tmpfilename,
|
||||||
'filename': filename,
|
'filename': filename,
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
|
'elapsed': time_now - start,
|
||||||
'speed': speed,
|
'speed': speed,
|
||||||
})
|
})
|
||||||
|
cursor_in_new_line = False
|
||||||
elif self.params.get('verbose', False):
|
elif self.params.get('verbose', False):
|
||||||
if not cursor_in_new_line:
|
if not cursor_in_new_line:
|
||||||
self.to_screen('')
|
self.to_screen('')
|
||||||
|
@ -58,10 +58,15 @@ from .canalplus import CanalplusIE
|
|||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbsnews import CBSNewsIE
|
from .cbsnews import CBSNewsIE
|
||||||
|
from .cbssports import CBSSportsIE
|
||||||
from .ccc import CCCIE
|
from .ccc import CCCIE
|
||||||
from .ceskatelevize import CeskaTelevizeIE
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
|
from .chirbit import (
|
||||||
|
ChirbitIE,
|
||||||
|
ChirbitProfileIE,
|
||||||
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
from .clipfish import ClipfishIE
|
from .clipfish import ClipfishIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
@ -121,6 +126,7 @@ from .ellentv import (
|
|||||||
EllenTVClipsIE,
|
EllenTVClipsIE,
|
||||||
)
|
)
|
||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
|
from .embedly import EmbedlyIE
|
||||||
from .empflix import EMPFlixIE
|
from .empflix import EMPFlixIE
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
@ -204,6 +210,7 @@ from .imdb import (
|
|||||||
ImdbIE,
|
ImdbIE,
|
||||||
ImdbListIE
|
ImdbListIE
|
||||||
)
|
)
|
||||||
|
from .imgur import ImgurIE
|
||||||
from .ina import InaIE
|
from .ina import InaIE
|
||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE, InstagramUserIE
|
from .instagram import InstagramIE, InstagramUserIE
|
||||||
@ -282,6 +289,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE
|
|||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
|
from .nationalgeographic import NationalGeographicIE
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import (
|
from .nbc import (
|
||||||
@ -350,13 +358,17 @@ from .playfm import PlayFMIE
|
|||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import (
|
||||||
|
PornHubIE,
|
||||||
|
PornHubPlaylistIE,
|
||||||
|
)
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
from .promptfile import PromptFileIE
|
from .promptfile import PromptFileIE
|
||||||
from .prosiebensat1 import ProSiebenSat1IE
|
from .prosiebensat1 import ProSiebenSat1IE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
from .quickvid import QuickVidIE
|
from .quickvid import QuickVidIE
|
||||||
|
from .r7 import R7IE
|
||||||
from .radiode import RadioDeIE
|
from .radiode import RadioDeIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
@ -386,6 +398,7 @@ from .rutube import (
|
|||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
|
from .sandia import SandiaIE
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
@ -416,7 +429,10 @@ from .soundcloud import (
|
|||||||
SoundcloudUserIE,
|
SoundcloudUserIE,
|
||||||
SoundcloudPlaylistIE
|
SoundcloudPlaylistIE
|
||||||
)
|
)
|
||||||
from .soundgasm import SoundgasmIE
|
from .soundgasm import (
|
||||||
|
SoundgasmIE,
|
||||||
|
SoundgasmProfileIE
|
||||||
|
)
|
||||||
from .southpark import (
|
from .southpark import (
|
||||||
SouthParkIE,
|
SouthParkIE,
|
||||||
SouthparkDeIE,
|
SouthparkDeIE,
|
||||||
@ -482,6 +498,7 @@ from .tumblr import TumblrIE
|
|||||||
from .tunein import TuneInIE
|
from .tunein import TuneInIE
|
||||||
from .turbo import TurboIE
|
from .turbo import TurboIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
|
from .tv4 import TV4IE
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE, TvpSeriesIE
|
from .tvp import TvpIE, TvpSeriesIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
@ -603,6 +620,7 @@ from .youtube import (
|
|||||||
YoutubeUserIE,
|
YoutubeUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
|
from .zapiks import ZapiksIE
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
from .zingmp3 import (
|
from .zingmp3 import (
|
||||||
ZingMp3SongIE,
|
ZingMp3SongIE,
|
||||||
|
@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player = self._parse_json(
|
player = self._parse_json(
|
||||||
@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor):
|
|||||||
self._html_search_meta('datepublished', webpage, 'upload date'))
|
self._html_search_meta('datepublished', webpage, 'upload date'))
|
||||||
|
|
||||||
duration = parse_duration(
|
duration = parse_duration(
|
||||||
self._html_search_meta('duration', webpage, 'duration')
|
self._html_search_meta('duration', webpage, 'duration') or
|
||||||
or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration'))
|
self._search_regex(
|
||||||
|
r'Runtime:\s*(\d{2}:\d{2}:\d{2})',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>',
|
||||||
|
@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
|
||||||
'title': 'Rick and Morty - Pilot',
|
'title': 'Rick and Morty - Pilot',
|
||||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||||
}
|
}
|
||||||
@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '-t8CamQlQ2aYZ49ItZCFog',
|
||||||
'title': 'American Dad - Putting Francine Out of Business',
|
'title': 'American Dad - Putting Francine Out of Business',
|
||||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||||
},
|
},
|
||||||
|
@ -11,9 +11,12 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AppleTrailersIE(InfoExtractor):
|
class AppleTrailersIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'manofsteel',
|
||||||
|
},
|
||||||
"playlist": [
|
"playlist": [
|
||||||
{
|
{
|
||||||
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
||||||
@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
_JSON_RE = r'iTunes.playURL\((.*?)\);'
|
||||||
|
|
||||||
|
@ -109,7 +109,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
@ -133,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
],
|
],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Jazz Format Mixtape vol.1',
|
'title': 'Jazz Format Mixtape vol.1',
|
||||||
|
'id': 'jazz-format-mixtape-vol-1',
|
||||||
|
'uploader_id': 'blazo',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2
|
'playlistend': 2
|
||||||
},
|
},
|
||||||
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
'skip': 'Bandcamp imposes download limits.'
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Hierophany of the Open Grave',
|
'title': 'Hierophany of the Open Grave',
|
||||||
|
'uploader_id': 'nightbringer',
|
||||||
|
'id': 'hierophany-of-the-open-grave',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 9,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://dotscale.bandcamp.com',
|
'url': 'http://dotscale.bandcamp.com',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Loom',
|
'title': 'Loom',
|
||||||
|
'id': 'dotscale',
|
||||||
|
'uploader_id': 'dotscale',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
playlist_id = mobj.group('subdomain')
|
uploader_id = mobj.group('subdomain')
|
||||||
title = mobj.group('title')
|
album_id = mobj.group('album_id')
|
||||||
display_id = title or playlist_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||||
if not tracks_paths:
|
if not tracks_paths:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
@ -168,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
@ -1,40 +1,35 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import remove_start
|
from ..utils import (
|
||||||
|
remove_start,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlinkxIE(InfoExtractor):
|
class BlinkxIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||||
IE_NAME = 'blinkx'
|
IE_NAME = 'blinkx'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
||||||
'md5': '2e9a07364af40163a908edbf10bb2492',
|
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8aQUy7GV',
|
'id': 'Da0Gw3xc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Police Car Rolls Away',
|
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
||||||
'uploader': 'stupidvideos.com',
|
'uploader': 'IGN News',
|
||||||
'upload_date': '20131215',
|
'upload_date': '20150217',
|
||||||
'timestamp': 1387068000,
|
'timestamp': 1424215740,
|
||||||
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
|
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
||||||
'duration': 14.886,
|
'duration': 47.743333,
|
||||||
'thumbnails': [{
|
|
||||||
'width': 100,
|
|
||||||
'height': 76,
|
|
||||||
'resolution': '100x76',
|
|
||||||
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
|
|
||||||
}],
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, rl):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, rl)
|
video_id = self._match_id(url)
|
||||||
video_id = m.group('id')
|
|
||||||
display_id = video_id[:8]
|
display_id = video_id[:8]
|
||||||
|
|
||||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||||
@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
|
|||||||
elif m['type'] in ('flv', 'mp4'):
|
elif m['type'] in ('flv', 'mp4'):
|
||||||
vcodec = remove_start(m['vcodec'], 'ff')
|
vcodec = remove_start(m['vcodec'], 'ff')
|
||||||
acodec = remove_start(m['acodec'], 'ff')
|
acodec = remove_start(m['acodec'], 'ff')
|
||||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
||||||
|
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
||||||
|
tbr = vbr + abr if vbr and abr else None
|
||||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': m['link'],
|
'url': m['link'],
|
||||||
'vcodec': vcodec,
|
'vcodec': vcodec,
|
||||||
'acodec': acodec,
|
'acodec': acodec,
|
||||||
'abr': int(m['abr']) // 1000,
|
'abr': abr,
|
||||||
'vbr': int(m['vbr']) // 1000,
|
'vbr': vbr,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': int(m['w']),
|
'width': int_or_none(m.get('w')),
|
||||||
'height': int(m['h']),
|
'height': int_or_none(m.get('h')),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Sealife',
|
'title': 'Sealife',
|
||||||
|
'id': '3550319591001',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 7,
|
'playlist_mincount': 7,
|
||||||
},
|
},
|
||||||
@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
playlist_info = json_data['videoList']
|
playlist_info = json_data['videoList']
|
||||||
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']]
|
||||||
|
|
||||||
return self.playlist_result(videos, playlist_id=playlist_info['id'],
|
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||||
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
|
||||||
|
|
||||||
def _extract_video_info(self, video_info):
|
def _extract_video_info(self, video_info):
|
||||||
|
@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'skip_download': True, # Got enough YouTube download tests
|
'skip_download': True, # Got enough YouTube download tests
|
||||||
},
|
},
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'look-at-this-cute-dog-omg',
|
||||||
'description': 're:Munchkin the Teddy Bear is back ?!',
|
'description': 're:Munchkin the Teddy Bear is back ?!',
|
||||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||||
},
|
},
|
||||||
@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20141124',
|
'upload_date': '20141124',
|
||||||
'uploader_id': 'CindysMunchkin',
|
'uploader_id': 'CindysMunchkin',
|
||||||
'description': 're:© 2014 Munchkin the Shih Tzu',
|
'description': 're:© 2014 Munchkin the',
|
||||||
'uploader': 'Munchkin the Shih Tzu',
|
'uploader': 're:^Munchkin the',
|
||||||
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
'title': 're:Munchkin the Teddy Bear gets her exercise',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
real_id = self._search_regex(
|
real_id = self._search_regex(
|
||||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||||
|
30
youtube_dl/extractor/cbssports.py
Normal file
30
youtube_dl/extractor/cbssports.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_d5_GbO8p1sT',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'US Open flashbacks: 1990s',
|
||||||
|
'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
section = mobj.group('section')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
all_videos = self._download_json(
|
||||||
|
'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section,
|
||||||
|
video_id)
|
||||||
|
# The json file contains the info of all the videos in the section
|
||||||
|
video_info = next(v for v in all_videos if v['pcid'] == video_id)
|
||||||
|
return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform')
|
84
youtube_dl/extractor/chirbit.py
Normal file
84
youtube_dl/extractor/chirbit.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ChirbitIE(InfoExtractor):
|
||||||
|
IE_NAME = 'chirbit'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://chirb.it/PrIPv5',
|
||||||
|
'md5': '9847b0dad6ac3e074568bf2cfb197de8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PrIPv5',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Фасадстрой',
|
||||||
|
'duration': 52,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://chirb.it/%s' % audio_id, audio_id)
|
||||||
|
|
||||||
|
audio_url = self._search_regex(
|
||||||
|
r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url')
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'itemprop="name">([^<]+)', webpage, 'title')
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration', fatal=False))
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'itemprop="playCount"\s*>(\d+)', webpage,
|
||||||
|
'listen count', fatal=False))
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'>(\d+) Comments?:', webpage,
|
||||||
|
'comment count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'url': audio_url,
|
||||||
|
'title': title,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ChirbitProfileIE(InfoExtractor):
|
||||||
|
IE_NAME = 'chirbit:profile'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://chirbit.com/ScarletBeauty',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ScarletBeauty',
|
||||||
|
'title': 'Chirbits by ScarletBeauty',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
profile_id = self._match_id(url)
|
||||||
|
|
||||||
|
rss = self._download_xml(
|
||||||
|
'http://chirbit.com/rss/%s' % profile_id, profile_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(audio_url.text, 'Chirbit')
|
||||||
|
for audio_url in rss.findall('./channel/item/link')]
|
||||||
|
|
||||||
|
title = rss.find('./channel/title').text
|
||||||
|
|
||||||
|
return self.playlist_result(entries, profile_id, title)
|
@ -27,7 +27,6 @@ from ..utils import (
|
|||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@ -392,6 +391,16 @@ class InfoExtractor(object):
|
|||||||
if blocked_iframe:
|
if blocked_iframe:
|
||||||
msg += ' Visit %s for more details' % blocked_iframe
|
msg += ' Visit %s for more details' % blocked_iframe
|
||||||
raise ExtractorError(msg, expected=True)
|
raise ExtractorError(msg, expected=True)
|
||||||
|
if '<title>The URL you requested has been blocked</title>' in content[:512]:
|
||||||
|
msg = (
|
||||||
|
'Access to this webpage has been blocked by Indian censorship. '
|
||||||
|
'Use a VPN or proxy server (with --proxy) to route around it.')
|
||||||
|
block_msg = self._html_search_regex(
|
||||||
|
r'</h1><p>(.*?)</p>',
|
||||||
|
content, 'block message', default=None)
|
||||||
|
if block_msg:
|
||||||
|
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
@ -799,8 +808,8 @@ class InfoExtractor(object):
|
|||||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||||
for i, media_el in enumerate(media_nodes):
|
for i, media_el in enumerate(media_nodes):
|
||||||
if manifest_version == '2.0':
|
if manifest_version == '2.0':
|
||||||
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/'
|
manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' +
|
||||||
+ (media_el.attrib.get('href') or media_el.attrib.get('url')))
|
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
||||||
@ -824,7 +833,7 @@ class InfoExtractor(object):
|
|||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
'preference': -1,
|
'preference': preference - 1 if preference else -1,
|
||||||
'resolution': 'multiple',
|
'resolution': 'multiple',
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}]
|
}]
|
||||||
@ -839,6 +848,7 @@ class InfoExtractor(object):
|
|||||||
note='Downloading m3u8 information',
|
note='Downloading m3u8 information',
|
||||||
errnote='Failed to download m3u8 information')
|
errnote='Failed to download m3u8 information')
|
||||||
last_info = None
|
last_info = None
|
||||||
|
last_media = None
|
||||||
kv_rex = re.compile(
|
kv_rex = re.compile(
|
||||||
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||||
for line in m3u8_doc.splitlines():
|
for line in m3u8_doc.splitlines():
|
||||||
@ -849,6 +859,13 @@ class InfoExtractor(object):
|
|||||||
if v.startswith('"'):
|
if v.startswith('"'):
|
||||||
v = v[1:-1]
|
v = v[1:-1]
|
||||||
last_info[m.group('key')] = v
|
last_info[m.group('key')] = v
|
||||||
|
elif line.startswith('#EXT-X-MEDIA:'):
|
||||||
|
last_media = {}
|
||||||
|
for m in kv_rex.finditer(line):
|
||||||
|
v = m.group('val')
|
||||||
|
if v.startswith('"'):
|
||||||
|
v = v[1:-1]
|
||||||
|
last_media[m.group('key')] = v
|
||||||
elif line.startswith('#') or not line.strip():
|
elif line.startswith('#') or not line.strip():
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@ -877,6 +894,9 @@ class InfoExtractor(object):
|
|||||||
width_str, height_str = resolution.split('x')
|
width_str, height_str = resolution.split('x')
|
||||||
f['width'] = int(width_str)
|
f['width'] = int(width_str)
|
||||||
f['height'] = int(height_str)
|
f['height'] = int(height_str)
|
||||||
|
if last_media is not None:
|
||||||
|
f['m3u8_media'] = last_media
|
||||||
|
last_media = None
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
last_info = {}
|
last_info = {}
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -194,6 +194,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
|||||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'SPORT',
|
'title': 'SPORT',
|
||||||
|
'id': 'xv4bw_nqtv_sport',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}]
|
}]
|
||||||
|
@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor):
|
|||||||
r"flashvars.pvg_id=\"(\d+)\";",
|
r"flashvars.pvg_id=\"(\d+)\";",
|
||||||
webpage, 'ID')
|
webpage, 'ID')
|
||||||
|
|
||||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
json_url = (
|
||||||
+ video_id)
|
'http://static.videos.gouv.fr/brightcovehub/export/json/%s' %
|
||||||
|
video_id)
|
||||||
info = self._download_json(json_url, title, 'Downloading JSON config')
|
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||||
video_url = info['renditions'][0]['url']
|
video_url = info['renditions'][0]['url']
|
||||||
|
|
||||||
|
16
youtube_dl/extractor/embedly.py
Normal file
16
youtube_dl/extractor/embedly.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
|
|
||||||
|
|
||||||
|
class EmbedlyIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
|
@ -22,6 +22,7 @@ class EscapistIE(InfoExtractor):
|
|||||||
'uploader_id': 'the-escapist-presents',
|
'uploader_id': 'the-escapist-presents',
|
||||||
'uploader': 'The Escapist Presents',
|
'uploader': 'The Escapist Presents',
|
||||||
'title': "Breaking Down Baldur's Gate",
|
'title': "Breaking Down Baldur's Gate",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,19 +31,18 @@ class EscapistIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r"<h1 class='headline'><a href='/videos/view/(.*?)'",
|
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
|
||||||
webpage, 'uploader ID', fatal=False)
|
webpage, 'uploader ID', fatal=False)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r"<h1 class='headline'>(.*?)</a>",
|
r"<h1\s+class='headline'>(.*?)</a>",
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
|
|
||||||
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
||||||
title = raw_title.partition(' : ')[2]
|
title = raw_title.partition(' : ')[2]
|
||||||
|
|
||||||
player_url = self._og_search_video_url(webpage, name='player URL')
|
config_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||||
config_url = compat_urllib_parse.unquote(self._search_regex(
|
r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL'))
|
||||||
r'config=(.*)$', player_url, 'config URL'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
@ -81,5 +81,4 @@ class EscapistIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': description,
|
'description': description,
|
||||||
'player_url': player_url,
|
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor):
|
|||||||
IE_NAME = '5min'
|
IE_NAME = '5min'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
|
||||||
|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
|
||||||
5min:)
|
5min:)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
|
@ -7,6 +7,7 @@ from ..compat import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
|
from ..utils import remove_end
|
||||||
|
|
||||||
|
|
||||||
class GDCVaultIE(InfoExtractor):
|
class GDCVaultIE(InfoExtractor):
|
||||||
@ -68,7 +69,9 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
akami_url = xml_description.find('./metadata/akamaiHost').text
|
akami_url = xml_description.find('./metadata/akamaiHost').text
|
||||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||||
video_formats.append({
|
video_formats.append({
|
||||||
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
|
||||||
|
'play_path': remove_end(slide_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
'format_note': 'slide deck video',
|
'format_note': 'slide deck video',
|
||||||
'quality': -2,
|
'quality': -2,
|
||||||
'preference': -2,
|
'preference': -2,
|
||||||
@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||||
video_formats.append({
|
video_formats.append({
|
||||||
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st',
|
||||||
|
'play_path': remove_end(speaker_video_path, '.flv'),
|
||||||
|
'ext': 'flv',
|
||||||
'format_note': 'speaker video',
|
'format_note': 'speaker video',
|
||||||
'quality': -1,
|
'quality': -1,
|
||||||
'preference': -1,
|
'preference': -1,
|
||||||
|
@ -473,6 +473,7 @@ class GenericIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '1986',
|
||||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
@ -531,7 +532,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Mrj4DVp2zeA',
|
'id': 'Mrj4DVp2zeA',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20150204',
|
'upload_date': '20150212',
|
||||||
'uploader': 'The National Archives UK',
|
'uploader': 'The National Archives UK',
|
||||||
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
|
||||||
'uploader_id': 'NationalArchives08',
|
'uploader_id': 'NationalArchives08',
|
||||||
@ -546,7 +547,16 @@ class GenericIE(InfoExtractor):
|
|||||||
'id': 'aanslagen-kopenhagen',
|
'id': 'aanslagen-kopenhagen',
|
||||||
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
# Zapiks embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '118046',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
@ -1097,6 +1107,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Livestream')
|
return self.url_result(mobj.group('url'), 'Livestream')
|
||||||
|
|
||||||
|
# Look for Zapiks embed
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
@ -34,6 +34,9 @@ class IGNIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
97
youtube_dl/extractor/imgur.py
Normal file
97
youtube_dl/extractor/imgur.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A61SaA1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
|
'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
width = int_or_none(self._search_regex(
|
||||||
|
r'<param name="width" value="([0-9]+)"',
|
||||||
|
webpage, 'width', fatal=False))
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'<param name="height" value="([0-9]+)"',
|
||||||
|
webpage, 'height', fatal=False))
|
||||||
|
|
||||||
|
video_elements = self._search_regex(
|
||||||
|
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||||
|
webpage, 'video elements', default=None)
|
||||||
|
if not video_elements:
|
||||||
|
raise ExtractorError(
|
||||||
|
'No sources found for video %s. Maybe an image?' % video_id,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
||||||
|
formats.append({
|
||||||
|
'format_id': m.group('type').partition('/')[2],
|
||||||
|
'url': self._proto_relative_url(m.group('src')),
|
||||||
|
'ext': mimetype2ext(m.group('type')),
|
||||||
|
'acodec': 'none',
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
gif_json = self._search_regex(
|
||||||
|
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
||||||
|
webpage, 'GIF code', fatal=False)
|
||||||
|
if gif_json:
|
||||||
|
gifd = self._parse_json(
|
||||||
|
gif_json, video_id, transform_source=js_to_json)
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'gif',
|
||||||
|
'preference': -10,
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'ext': 'gif',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'gif',
|
||||||
|
'container': 'gif',
|
||||||
|
'url': self._proto_relative_url(gifd['gifUrl']),
|
||||||
|
'filesize': gifd.get('size'),
|
||||||
|
'http_headers': {
|
||||||
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
}
|
@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'url': 'http://new.livestream.com/tedx/cityenglish',
|
'url': 'http://new.livestream.com/tedx/cityenglish',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'TEDCity2.0 (English)',
|
'title': 'TEDCity2.0 (English)',
|
||||||
|
'id': '2245590',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}, {
|
}, {
|
||||||
@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor):
|
|||||||
if is_relevant(video_data, video_id)]
|
if is_relevant(video_data, video_id)]
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
# This is an event page:
|
# This is an event page:
|
||||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
return self.playlist_result(
|
||||||
|
videos, '%s' % info['id'], info['full_name'])
|
||||||
else:
|
else:
|
||||||
if not videos:
|
if not videos:
|
||||||
raise ExtractorError('Cannot find video %s' % video_id)
|
raise ExtractorError('Cannot find video %s' % video_id)
|
||||||
|
38
youtube_dl/extractor/nationalgeographic.py
Normal file
38
youtube_dl/extractor/nationalgeographic.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
url_basename,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NationalGeographicIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4DmDACA6Qtk_',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Mating Crabs Busted by Sharks',
|
||||||
|
'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3',
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
name = url_basename(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url')
|
||||||
|
guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid')
|
||||||
|
|
||||||
|
feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name)
|
||||||
|
content = feed.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
|
theplatform_id = url_basename(content.attrib.get('url'))
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id,
|
||||||
|
# For some reason, the normal links don't work and we must force the use of f4m
|
||||||
|
{'force_smil_url': True}))
|
@ -18,13 +18,13 @@ class NBCIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||||
# md5 checksum is not stable
|
# md5 checksum is not stable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bTmnLCvIbaaH',
|
'id': 'c9xnCo0YPOPH',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'I Am a Firefighter',
|
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor):
|
|||||||
'timestamp': 1344858571,
|
'timestamp': 1344858571,
|
||||||
'age_limit': 12,
|
'age_limit': 12,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Download only works from Germany',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
js_to_json,
|
js_to_json,
|
||||||
@ -11,7 +8,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class PatreonIE(InfoExtractor):
|
class PatreonIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
|
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||||
@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*$',
|
'thumbnail': 're:^https?://.*$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.patreon.com/creation?hid=1682498',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SU4fj_aEMVw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'I\'m on Patreon!',
|
||||||
|
'uploader': 'TraciJHines',
|
||||||
|
'thumbnail': 're:^https?://.*$',
|
||||||
|
'upload_date': '20150211',
|
||||||
|
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
|
||||||
|
'uploader_id': 'TraciJHines',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||||
@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group(1)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._og_search_title(webpage).strip()
|
title = self._og_search_title(webpage).strip()
|
||||||
|
|
||||||
attach_fn = self._html_search_regex(
|
attach_fn = self._html_search_regex(
|
||||||
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||||
webpage, 'attachment URL', default=None)
|
webpage, 'attachment URL', default=None)
|
||||||
|
embed = self._html_search_regex(
|
||||||
|
r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
|
||||||
|
webpage, 'embedded URL', default=None)
|
||||||
|
|
||||||
if attach_fn is not None:
|
if attach_fn is not None:
|
||||||
video_url = 'http://www.patreon.com' + attach_fn
|
video_url = 'http://www.patreon.com' + attach_fn
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||||
|
elif embed is not None:
|
||||||
|
return self.url_result(embed)
|
||||||
else:
|
else:
|
||||||
playlist_js = self._search_regex(
|
playlist = self._parse_json(self._search_regex(
|
||||||
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||||
webpage, 'playlist JSON')
|
webpage, 'playlist JSON'),
|
||||||
playlist_json = js_to_json(playlist_js)
|
video_id, transform_source=js_to_json)
|
||||||
playlist = json.loads(playlist_json)
|
|
||||||
data = playlist[0]
|
data = playlist[0]
|
||||||
video_url = self._proto_relative_url(data['mp3'])
|
video_url = self._proto_relative_url(data['mp3'])
|
||||||
thumbnail = self._proto_relative_url(data.get('cover'))
|
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||||
|
@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||||
if thumbnail:
|
if thumbnail:
|
||||||
@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PornHubPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.pornhub.com/playlist/6201671',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6201671',
|
||||||
|
'title': 'P0p4',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 35,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
|
||||||
|
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
|
||||||
|
]
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'),
|
||||||
|
playlist_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist.get('title'), playlist.get('description'))
|
||||||
|
88
youtube_dl/extractor/r7.py
Normal file
88
youtube_dl/extractor/r7.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
unescapeHTML,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class R7IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?:
|
||||||
|
(?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
|
||||||
|
noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
|
||||||
|
player\.r7\.com/video/i/
|
||||||
|
)
|
||||||
|
(?P<id>[\da-f]{24})
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
|
||||||
|
'md5': '403c4e393617e8e8ddc748978ee8efde',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '54e7050b0cf2ff57e0279389',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 98,
|
||||||
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://player.r7.com/video/i/%s' % video_id, video_id)
|
||||||
|
|
||||||
|
item = self._parse_json(js_to_json(self._search_regex(
|
||||||
|
r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
|
||||||
|
|
||||||
|
title = unescapeHTML(item['title'])
|
||||||
|
thumbnail = item.get('init', {}).get('thumbUri')
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
statistics = item.get('statistics', {})
|
||||||
|
like_count = int_or_none(statistics.get('likes'))
|
||||||
|
view_count = int_or_none(statistics.get('views'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_key, format_dict in item['playlist'][0].items():
|
||||||
|
src = format_dict.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
format_id = format_dict.get('format') or format_key
|
||||||
|
if duration is None:
|
||||||
|
duration = format_dict.get('duration')
|
||||||
|
if '.f4m' in src:
|
||||||
|
formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
|
||||||
|
elif src.endswith('.m3u8'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': src,
|
||||||
|
'format_id': format_id,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'like_count': like_count,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://ndr2.radio.de/',
|
'url': 'http://ndr2.radio.de/',
|
||||||
'md5': '3b4cdd011bc59174596b6145cda474a4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ndr2',
|
'id': 'ndr2',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
||||||
'thumbnail': 're:^https?://.*\.png',
|
'thumbnail': 're:^https?://.*\.png',
|
||||||
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
radio_id = self._match_id(url)
|
radio_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, radio_id)
|
webpage = self._download_webpage(url, radio_id)
|
||||||
|
jscode = self._search_regex(
|
||||||
|
r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n",
|
||||||
|
webpage, 'broadcast')
|
||||||
|
|
||||||
broadcast = json.loads(self._search_regex(
|
broadcast = self._parse_json(jscode, radio_id)
|
||||||
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
|
|
||||||
webpage, 'broadcast'))
|
|
||||||
|
|
||||||
title = self._live_title(broadcast['name'])
|
title = self._live_title(broadcast['name'])
|
||||||
description = broadcast.get('description') or broadcast.get('shortDescription')
|
description = broadcast.get('description') or broadcast.get('shortDescription')
|
||||||
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
|
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': stream['streamUrl'],
|
'url': stream['streamUrl'],
|
||||||
|
@ -6,6 +6,7 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
remove_end,
|
remove_end,
|
||||||
@ -96,12 +97,10 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||||
video_path = self._download_webpage(
|
video_path = self._download_webpage(
|
||||||
auth_url, video_id, 'Getting video url')
|
auth_url, video_id, 'Getting video url')
|
||||||
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
# Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
||||||
# the right Content-Length header and the mp4 format
|
# the right Content-Length header and the mp4 format
|
||||||
video_url = (
|
video_url = compat_urlparse.urljoin(
|
||||||
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
|
'http://mvod1.akcdn.rtve.es/', video_path)
|
||||||
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
117
youtube_dl/extractor/sandia.py
Normal file
117
youtube_dl/extractor/sandia.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SandiaIE(InfoExtractor):
|
||||||
|
IE_DESC = 'Sandia National Laboratories'
|
||||||
|
_VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
|
||||||
|
'md5': '9422edc9b9a60151727e4b6d8bef393d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '24aace4429fc450fb5b38cdbf424a66e1d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Xyce Software Training - Section 1',
|
||||||
|
'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}',
|
||||||
|
'upload_date': '20120904',
|
||||||
|
'duration': 7794,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4')
|
||||||
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
js_path = self._search_regex(
|
||||||
|
r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"',
|
||||||
|
webpage, 'JS code URL')
|
||||||
|
js_url = compat_urlparse.urljoin(url, js_path)
|
||||||
|
|
||||||
|
js_code = self._download_webpage(
|
||||||
|
js_url, video_id, note='Downloading player')
|
||||||
|
|
||||||
|
def extract_str(key, **args):
|
||||||
|
return self._search_regex(
|
||||||
|
r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key),
|
||||||
|
js_code, key, **args)
|
||||||
|
|
||||||
|
def extract_data(key, **args):
|
||||||
|
data_json = extract_str(key, **args)
|
||||||
|
if data_json is None:
|
||||||
|
return data_json
|
||||||
|
return self._parse_json(
|
||||||
|
data_json, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for i in itertools.count():
|
||||||
|
fd = extract_data('VideoUrls[%d]' % i, default=None)
|
||||||
|
if fd is None:
|
||||||
|
break
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s' % i,
|
||||||
|
'format_note': fd['MimeType'].partition('/')[2],
|
||||||
|
'ext': mimetype2ext(fd['MimeType']),
|
||||||
|
'url': fd['Location'],
|
||||||
|
'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
slide_baseurl = compat_urlparse.urljoin(
|
||||||
|
url, extract_data('SlideBaseUrl'))
|
||||||
|
slide_template = slide_baseurl + re.sub(
|
||||||
|
r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate'))
|
||||||
|
slides = []
|
||||||
|
last_slide_time = 0
|
||||||
|
for i in itertools.count(1):
|
||||||
|
sd = extract_str('Slides[%d]' % i, default=None)
|
||||||
|
if sd is None:
|
||||||
|
break
|
||||||
|
timestamp = int_or_none(self._search_regex(
|
||||||
|
r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),',
|
||||||
|
sd, 'slide %s timestamp' % i, fatal=False))
|
||||||
|
slides.append({
|
||||||
|
'url': slide_template % i,
|
||||||
|
'duration': timestamp - last_slide_time,
|
||||||
|
})
|
||||||
|
last_slide_time = timestamp
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'slides',
|
||||||
|
'protocol': 'slideshow',
|
||||||
|
'url': json.dumps(slides),
|
||||||
|
'preference': -10000, # Downloader not yet written
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = extract_data('Title')
|
||||||
|
description = extract_data('Description', fatal=False)
|
||||||
|
duration = int_or_none(extract_data(
|
||||||
|
'Duration', fatal=False), scale=1000)
|
||||||
|
upload_date = unified_strdate(extract_data('AirDate', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor):
|
|||||||
'id': '437BE28B89D799D7',
|
'id': '437BE28B89D799D7',
|
||||||
'title': 'big_buck_bunny_720p_surround.avi',
|
'title': 'big_buck_bunny_720p_surround.avi',
|
||||||
'ext': 'avi',
|
'ext': 'avi',
|
||||||
'thumbnail': 're:^http://.*\.jpg$',
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor):
|
|||||||
''', webpage, 'hash')
|
''', webpage, 'hash')
|
||||||
|
|
||||||
fields = {
|
fields = {
|
||||||
"hash": confirm_hash,
|
"hash": confirm_hash.encode('utf-8'),
|
||||||
"confirm": "Continue as Free User"
|
"confirm": "Continue as Free User"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor):
|
|||||||
webpage, 'title', default=None)
|
webpage, 'title', default=None)
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'<img\s+src="([^"]*)".+?name="bg"',
|
r'<img\s+src="([^"]*)".+?name="bg"',
|
||||||
webpage, 'thumbnail')
|
webpage, 'thumbnail', default=None)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
|
@ -7,6 +7,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class SoundgasmIE(InfoExtractor):
|
class SoundgasmIE(InfoExtractor):
|
||||||
|
IE_NAME = 'soundgasm'
|
||||||
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
|
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
|
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
|
||||||
@ -38,3 +39,25 @@ class SoundgasmIE(InfoExtractor):
|
|||||||
'title': audio_title,
|
'title': audio_title,
|
||||||
'description': description
|
'description': description
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class SoundgasmProfileIE(InfoExtractor):
|
||||||
|
IE_NAME = 'soundgasm:profile'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://soundgasm.net/u/ytdl',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ytdl',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
profile_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, profile_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(audio_url, 'Soundgasm')
|
||||||
|
for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, profile_id)
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import qualities
|
||||||
|
|
||||||
|
|
||||||
class TeamcocoIE(InfoExtractor):
|
class TeamcocoIE(InfoExtractor):
|
||||||
@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '19705',
|
'id': '19705',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
|
||||||
"title": "Louis C.K. Interview Pt. 1 11/3/11",
|
'title': 'Louis C.K. Interview Pt. 1 11/3/11',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
display_id = mobj.group('display_id')
|
display_id = mobj.group('display_id')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
video_id = mobj.group("video_id")
|
video_id = mobj.group('video_id')
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
self._VIDEO_ID_REGEXES, webpage, 'video id')
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
embed_url = 'http://teamcoco.com/embed/v/%s' % video_id
|
||||||
data = self._download_xml(
|
embed = self._download_webpage(
|
||||||
data_url, display_id, 'Downloading data webpage')
|
embed_url, video_id, 'Downloading embed page')
|
||||||
|
|
||||||
|
encoded_data = self._search_regex(
|
||||||
|
r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
|
||||||
|
data = self._parse_json(
|
||||||
|
base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
|
||||||
|
|
||||||
qualities = ['500k', '480p', '1000k', '720p', '1080p']
|
|
||||||
formats = []
|
formats = []
|
||||||
for filed in data.findall('files/file'):
|
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
|
||||||
if filed.attrib.get('playmode') == 'all':
|
for filed in data['files']:
|
||||||
# it just duplicates one of the entries
|
m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
|
||||||
break
|
|
||||||
file_url = filed.text
|
|
||||||
m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
|
|
||||||
if m_format is not None:
|
if m_format is not None:
|
||||||
format_id = m_format.group(1)
|
format_id = m_format.group(1)
|
||||||
else:
|
else:
|
||||||
format_id = filed.attrib['bitrate']
|
format_id = filed['bitrate']
|
||||||
tbr = (
|
tbr = (
|
||||||
int(filed.attrib['bitrate'])
|
int(filed['bitrate'])
|
||||||
if filed.attrib['bitrate'].isdigit()
|
if filed['bitrate'].isdigit()
|
||||||
else None)
|
else None)
|
||||||
|
|
||||||
try:
|
|
||||||
quality = qualities.index(format_id)
|
|
||||||
except ValueError:
|
|
||||||
quality = -1
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': file_url,
|
'url': filed['url'],
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': quality,
|
'quality': get_quality(format_id),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': self._og_search_title(webpage),
|
'title': data['title'],
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': data.get('thumb', {}).get('href'),
|
||||||
'description': self._og_search_description(webpage),
|
'description': data.get('teaser'),
|
||||||
'age_limit': self._family_friendly_search(webpage),
|
'age_limit': self._family_friendly_search(webpage),
|
||||||
}
|
}
|
||||||
|
@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# YouTube video
|
||||||
|
'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aFBIPO-P7LM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
|
||||||
|
'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
|
||||||
|
'uploader': 'TEDx Talks',
|
||||||
|
'uploader_id': 'TEDxTalks',
|
||||||
|
'upload_date': '20111216',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_NATIVE_FORMATS = {
|
_NATIVE_FORMATS = {
|
||||||
@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
talk_info = self._extract_info(webpage)['talks'][0]
|
talk_info = self._extract_info(webpage)['talks'][0]
|
||||||
|
|
||||||
if talk_info.get('external') is not None:
|
external = talk_info.get('external')
|
||||||
self.to_screen('Found video from %s' % talk_info['external']['service'])
|
if external:
|
||||||
|
service = external['service']
|
||||||
|
self.to_screen('Found video from %s' % service)
|
||||||
|
ext_url = None
|
||||||
|
if service.lower() == 'youtube':
|
||||||
|
ext_url = external.get('code')
|
||||||
return {
|
return {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': talk_info['external']['uri'],
|
'url': ext_url or external['uri'],
|
||||||
}
|
}
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
|
@ -4,11 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
|
||||||
|
|
||||||
|
|
||||||
class TheOnionIE(InfoExtractor):
|
class TheOnionIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?'
|
_VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
|
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
|
||||||
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
|
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
|
||||||
@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
article_id = mobj.group('article_id')
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, article_id)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'"videoId":\s(\d+),', webpage, 'video ID')
|
r'"videoId":\s(\d+),', webpage, 'video ID')
|
||||||
@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor):
|
|||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
|
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
|
||||||
if not sources:
|
|
||||||
raise ExtractorError(
|
|
||||||
'No sources found for video %s' % video_id, expected=True)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for src, type_ in sources:
|
for src, type_ in sources:
|
||||||
if type_ == 'video/mp4':
|
if type_ == 'video/mp4':
|
||||||
@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
elif type_ == 'application/x-mpegURL':
|
elif type_ == 'application/x-mpegURL':
|
||||||
formats.extend(
|
formats.extend(
|
||||||
self._extract_m3u8_formats(src, video_id, preference=-1))
|
self._extract_m3u8_formats(src, display_id, preference=-1))
|
||||||
else:
|
else:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Encountered unexpected format: %s' % type_)
|
'Encountered unexpected format: %s' % type_)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
|||||||
if not provider_id:
|
if not provider_id:
|
||||||
provider_id = 'dJ5BDC'
|
provider_id = 'dJ5BDC'
|
||||||
|
|
||||||
if mobj.group('config'):
|
if smuggled_data.get('force_smil_url', False):
|
||||||
|
smil_url = url
|
||||||
|
elif mobj.group('config'):
|
||||||
config_url = url + '&form=json'
|
config_url = url + '&form=json'
|
||||||
config_url = config_url.replace('swf/', 'config/')
|
config_url = config_url.replace('swf/', 'config/')
|
||||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||||
|
100
youtube_dl/extractor/tv4.py
Normal file
100
youtube_dl/extractor/tv4.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TV4IE(InfoExtractor):
|
||||||
|
IE_DESC = 'tv4.se and tv4play.se'
|
||||||
|
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||||
|
(?:
|
||||||
|
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||||
|
tv4play\.se/
|
||||||
|
(?:
|
||||||
|
(?:program|barn)/(?:[^\?]+)\?video_id=|
|
||||||
|
iframe/video/|
|
||||||
|
film/|
|
||||||
|
sport/|
|
||||||
|
)
|
||||||
|
)(?P<id>[0-9]+)'''
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
|
||||||
|
'md5': '909d6454b87b10a25aa04c4bdd416a9b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2491650',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kalla Fakta 5 (english subtitles)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': '20131125',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv4play.se/iframe/video/3054113',
|
||||||
|
'md5': '77f851c55139ffe0ebd41b6a5552489b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3054113',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': '20150130',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv4play.se/sport/3060959',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv4play.se/film/2378136',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
|
||||||
|
|
||||||
|
# If is_geo_restricted is true, it doesn't neceserally mean we can't download it
|
||||||
|
if info['is_geo_restricted']:
|
||||||
|
self.report_warning('This content might not be available in your country due to licensing restrictions.')
|
||||||
|
if info['requires_subscription']:
|
||||||
|
raise ExtractorError('This content requires subscription.', expected=True)
|
||||||
|
|
||||||
|
sources_data = self._download_json(
|
||||||
|
'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
|
||||||
|
sources = sources_data['playback']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for item in sources.get('items', {}).get('item', []):
|
||||||
|
ext, bitrate = item['mediaFormat'], item['bitrate']
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s_%s' % (ext, bitrate),
|
||||||
|
'tbr': bitrate,
|
||||||
|
'ext': ext,
|
||||||
|
'url': item['url'],
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'description': info.get('description'),
|
||||||
|
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||||
|
'duration': info.get('duration'),
|
||||||
|
'thumbnail': info.get('image'),
|
||||||
|
'is_live': sources.get('live'),
|
||||||
|
}
|
@ -349,6 +349,13 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
% (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
|
||||||
channel_id, 'mp4')
|
channel_id, 'mp4')
|
||||||
|
|
||||||
|
# prefer the 'source' stream, the others are limited to 30 fps
|
||||||
|
def _sort_source(f):
|
||||||
|
if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
formats = sorted(formats, key=_sort_source)
|
||||||
|
|
||||||
view_count = stream.get('viewers')
|
view_count = stream.get('viewers')
|
||||||
timestamp = parse_iso8601(stream.get('created_at'))
|
timestamp = parse_iso8601(stream.get('created_at'))
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ class VideoLecturesNetIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for v in switch.findall('./video'):
|
for v in switch.findall('./video'):
|
||||||
proto = v.attrib.get('proto')
|
proto = v.attrib.get('proto')
|
||||||
if not proto in ['http', 'rtmp']:
|
if proto not in ['http', 'rtmp']:
|
||||||
continue
|
continue
|
||||||
f = {
|
f = {
|
||||||
'width': int_or_none(v.attrib.get('width')),
|
'width': int_or_none(v.attrib.get('width')),
|
||||||
@ -70,6 +70,7 @@ class VideoLecturesNetIE(InfoExtractor):
|
|||||||
f.update({
|
f.update({
|
||||||
'url': v.attrib['streamer'],
|
'url': v.attrib['streamer'],
|
||||||
'play_path': src,
|
'play_path': src,
|
||||||
|
'rtmp_real_time': True,
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import itertools
|
import itertools
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
@ -18,6 +19,7 @@ from ..utils import (
|
|||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
|
smuggle_url,
|
||||||
std_headers,
|
std_headers,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -174,7 +176,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
def _verify_video_password(self, url, video_id, webpage):
|
def _verify_video_password(self, url, video_id, webpage):
|
||||||
password = self._downloader.params.get('videopassword', None)
|
password = self._downloader.params.get('videopassword', None)
|
||||||
if password is None:
|
if password is None:
|
||||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||||
data = compat_urllib_parse.urlencode({
|
data = compat_urllib_parse.urlencode({
|
||||||
'password': password,
|
'password': password,
|
||||||
@ -224,6 +226,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if mobj.group('pro') or mobj.group('player'):
|
if mobj.group('pro') or mobj.group('player'):
|
||||||
url = 'http://player.vimeo.com/video/' + video_id
|
url = 'http://player.vimeo.com/video/' + video_id
|
||||||
|
|
||||||
|
password = self._downloader.params.get('videopassword', None)
|
||||||
|
if password:
|
||||||
|
headers['Cookie'] = '%s_password=%s' % (
|
||||||
|
video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
request = compat_urllib_request.Request(url, None, headers)
|
request = compat_urllib_request.Request(url, None, headers)
|
||||||
try:
|
try:
|
||||||
@ -267,8 +274,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
|
||||||
|
|
||||||
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
|
||||||
|
if data and '_video_password_verified' in data:
|
||||||
|
raise ExtractorError('video password verification failed!')
|
||||||
self._verify_video_password(url, video_id, webpage)
|
self._verify_video_password(url, video_id, webpage)
|
||||||
return self._real_extract(url)
|
return self._real_extract(
|
||||||
|
smuggle_url(url, {'_video_password_verified': 'verified'}))
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Unable to extract info section',
|
raise ExtractorError('Unable to extract info section',
|
||||||
cause=e)
|
cause=e)
|
||||||
@ -401,6 +411,7 @@ class VimeoChannelIE(InfoExtractor):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vimeo.com/channels/tributes',
|
'url': 'http://vimeo.com/channels/tributes',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'tributes',
|
||||||
'title': 'Vimeo Tributes',
|
'title': 'Vimeo Tributes',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 25,
|
'playlist_mincount': 25,
|
||||||
@ -479,6 +490,7 @@ class VimeoUserIE(VimeoChannelIE):
|
|||||||
'url': 'http://vimeo.com/nkistudio/videos',
|
'url': 'http://vimeo.com/nkistudio/videos',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Nki',
|
'title': 'Nki',
|
||||||
|
'id': 'nkistudio',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 66,
|
'playlist_mincount': 66,
|
||||||
}]
|
}]
|
||||||
@ -496,6 +508,7 @@ class VimeoAlbumIE(VimeoChannelIE):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vimeo.com/album/2632481',
|
'url': 'http://vimeo.com/album/2632481',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '2632481',
|
||||||
'title': 'Staff Favorites: November 2013',
|
'title': 'Staff Favorites: November 2013',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 13,
|
'playlist_mincount': 13,
|
||||||
@ -526,6 +539,7 @@ class VimeoGroupsIE(VimeoAlbumIE):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vimeo.com/groups/rolexawards',
|
'url': 'http://vimeo.com/groups/rolexawards',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'rolexawards',
|
||||||
'title': 'Rolex Awards for Enterprise',
|
'title': 'Rolex Awards for Enterprise',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 73,
|
'playlist_mincount': 73,
|
||||||
@ -608,6 +622,7 @@ class VimeoLikesIE(InfoExtractor):
|
|||||||
'url': 'https://vimeo.com/user755559/likes/',
|
'url': 'https://vimeo.com/user755559/likes/',
|
||||||
'playlist_mincount': 293,
|
'playlist_mincount': 293,
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
'id': 'user755559_likes',
|
||||||
"description": "See all the videos urza likes",
|
"description": "See all the videos urza likes",
|
||||||
"title": 'Videos urza likes',
|
"title": 'Videos urza likes',
|
||||||
},
|
},
|
||||||
|
@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor):
|
|||||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://vk.com/videos205387401',
|
'url': 'http://vk.com/videos205387401',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '205387401',
|
||||||
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor):
|
|||||||
description = self._html_search_meta('description', webpage)
|
description = self._html_search_meta('description', webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
story_filename = self._search_regex(
|
embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
|
||||||
r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
|
r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
|
||||||
speaker_id = self._search_regex(
|
webpage, 'embed params').split(',')]
|
||||||
r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
|
|
||||||
story_id = self._search_regex(
|
(
|
||||||
r'\.storyId\((\d+)\)', webpage, 'story ID')
|
_, speaker_id, story_id, story_duration,
|
||||||
speaker_type = self._search_regex(
|
speaker_type, great_life, _thumbnail, _has_subtitles,
|
||||||
r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
|
story_filename, _story_order) = embed_params
|
||||||
great_life = self._search_regex(
|
|
||||||
r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
|
|
||||||
is_great_life_series = great_life == 'true'
|
is_great_life_series = great_life == 'true'
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(story_duration)
|
||||||
r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
|
|
||||||
|
|
||||||
# URL building, see: http://www.webofstories.com/scripts/player.js
|
# URL building, see: http://www.webofstories.com/scripts/player.js
|
||||||
ms_prefix = ''
|
ms_prefix = ''
|
||||||
|
@ -18,8 +18,8 @@ class WSJIE(InfoExtractor):
|
|||||||
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20150202',
|
'upload_date': '20150202',
|
||||||
'uploader_id': 'bbright',
|
'uploader_id': 'jdesai',
|
||||||
'creator': 'bbright',
|
'creator': 'jdesai',
|
||||||
'categories': list, # a long list
|
'categories': list, # a long list
|
||||||
'duration': 90,
|
'duration': 90,
|
||||||
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo',
|
||||||
|
@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
'id': 'kVTUy_G222_',
|
'id': 'kVTUy_G222_',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'strange erotica',
|
'title': 'strange erotica',
|
||||||
'description': 'http://www.xtube.com an ET kind of thing',
|
'description': 'contains:an ET kind of thing',
|
||||||
'uploader': 'greenshowers',
|
'uploader': 'greenshowers',
|
||||||
'duration': 450,
|
'duration': 450,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -24,7 +24,6 @@ class YahooIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||||
'md5': '4962b075c08be8690a922ee026d05e69',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -541,23 +541,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if cache_spec is not None:
|
if cache_spec is not None:
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
|
||||||
|
download_note = (
|
||||||
|
'Downloading player %s' % player_url
|
||||||
|
if self._downloader.params.get('verbose') else
|
||||||
|
'Downloading %s player %s' % (player_type, player_id)
|
||||||
|
)
|
||||||
if player_type == 'js':
|
if player_type == 'js':
|
||||||
code = self._download_webpage(
|
code = self._download_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id,
|
||||||
note='Downloading %s player %s' % (player_type, player_id),
|
note=download_note,
|
||||||
errnote='Download of %s failed' % player_url)
|
errnote='Download of %s failed' % player_url)
|
||||||
res = self._parse_sig_js(code)
|
res = self._parse_sig_js(code)
|
||||||
elif player_type == 'swf':
|
elif player_type == 'swf':
|
||||||
urlh = self._request_webpage(
|
urlh = self._request_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id,
|
||||||
note='Downloading %s player %s' % (player_type, player_id),
|
note=download_note,
|
||||||
errnote='Download of %s failed' % player_url)
|
errnote='Download of %s failed' % player_url)
|
||||||
code = urlh.read()
|
code = urlh.read()
|
||||||
res = self._parse_sig_swf(code)
|
res = self._parse_sig_swf(code)
|
||||||
else:
|
else:
|
||||||
assert False, 'Invalid player type %r' % player_type
|
assert False, 'Invalid player type %r' % player_type
|
||||||
|
|
||||||
if cache_spec is None:
|
|
||||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||||
cache_res = res(test_string)
|
cache_res = res(test_string)
|
||||||
cache_spec = [ord(c) for c in cache_res]
|
cache_spec = [ord(c) for c in cache_res]
|
||||||
|
110
youtube_dl/extractor/zapiks.py
Normal file
110
youtube_dl/extractor/zapiks.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_with_ns,
|
||||||
|
xpath_text,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ZapiksIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
|
||||||
|
'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '80798',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
|
||||||
|
'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 528,
|
||||||
|
'timestamp': 1359044972,
|
||||||
|
'upload_date': '20130124',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
if not video_id:
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-media-id="(\d+)"', webpage, 'video id')
|
||||||
|
|
||||||
|
playlist = self._download_xml(
|
||||||
|
'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'jwplayer': 'http://rss.jwpcdn.com/'
|
||||||
|
}
|
||||||
|
|
||||||
|
def ns(path):
|
||||||
|
return xpath_with_ns(path, NS_MAP)
|
||||||
|
|
||||||
|
item = playlist.find('./channel/item')
|
||||||
|
|
||||||
|
title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage, default=None)
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration', default=None))
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'uploadDate', webpage, 'upload date', default=None), ' ')
|
||||||
|
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'UserPlays:(\d+)', webpage, 'view count', default=None))
|
||||||
|
comment_count = int_or_none(self._search_regex(
|
||||||
|
r'UserComments:(\d+)', webpage, 'comment count', default=None))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in item.findall(ns('./jwplayer:source')):
|
||||||
|
format_id = source.attrib['label']
|
||||||
|
f = {
|
||||||
|
'url': source.attrib['file'],
|
||||||
|
'format_id': format_id,
|
||||||
|
}
|
||||||
|
m = re.search(r'^(?P<height>\d+)[pP]', format_id)
|
||||||
|
if m:
|
||||||
|
f['height'] = int(m.group('height'))
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -30,13 +30,10 @@ class JSInterpreter(object):
|
|||||||
def __init__(self, code, objects=None):
|
def __init__(self, code, objects=None):
|
||||||
if objects is None:
|
if objects is None:
|
||||||
objects = {}
|
objects = {}
|
||||||
self.code = self._remove_comments(code)
|
self.code = code
|
||||||
self._functions = {}
|
self._functions = {}
|
||||||
self._objects = objects
|
self._objects = objects
|
||||||
|
|
||||||
def _remove_comments(self, code):
|
|
||||||
return re.sub(r'(?s)/\*.*?\*/', '', code)
|
|
||||||
|
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise ExtractorError('Recursion limit reached')
|
raise ExtractorError('Recursion limit reached')
|
||||||
|
@ -900,8 +900,8 @@ def _windows_write_string(s, out):
|
|||||||
def not_a_console(handle):
|
def not_a_console(handle):
|
||||||
if handle == INVALID_HANDLE_VALUE or handle is None:
|
if handle == INVALID_HANDLE_VALUE or handle is None:
|
||||||
return True
|
return True
|
||||||
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
|
return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
|
||||||
or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
|
GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
|
||||||
|
|
||||||
if not_a_console(h):
|
if not_a_console(h):
|
||||||
return False
|
return False
|
||||||
@ -1560,8 +1560,8 @@ def js_to_json(code):
|
|||||||
return '"%s"' % v
|
return '"%s"' % v
|
||||||
|
|
||||||
res = re.sub(r'''(?x)
|
res = re.sub(r'''(?x)
|
||||||
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
"(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
|
||||||
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
'(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
|
||||||
[a-zA-Z_][.a-zA-Z_0-9]*
|
[a-zA-Z_][.a-zA-Z_0-9]*
|
||||||
''', fix_kv, code)
|
''', fix_kv, code)
|
||||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||||
@ -1616,6 +1616,15 @@ def args_to_str(args):
|
|||||||
return ' '.join(shlex_quote(a) for a in args)
|
return ' '.join(shlex_quote(a) for a in args)
|
||||||
|
|
||||||
|
|
||||||
|
def mimetype2ext(mt):
|
||||||
|
_, _, res = mt.rpartition('/')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'x-ms-wmv': 'wmv',
|
||||||
|
'x-mp4-fragmented': 'mp4',
|
||||||
|
}.get(res, res)
|
||||||
|
|
||||||
|
|
||||||
def urlhandle_detect_ext(url_handle):
|
def urlhandle_detect_ext(url_handle):
|
||||||
try:
|
try:
|
||||||
url_handle.headers
|
url_handle.headers
|
||||||
@ -1631,7 +1640,7 @@ def urlhandle_detect_ext(url_handle):
|
|||||||
if e:
|
if e:
|
||||||
return e
|
return e
|
||||||
|
|
||||||
return getheader('Content-Type').split("/")[1]
|
return mimetype2ext(getheader('Content-Type'))
|
||||||
|
|
||||||
|
|
||||||
def age_restricted(content_limit, age_limit):
|
def age_restricted(content_limit, age_limit):
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.02.17.2'
|
__version__ = '2015.02.23'
|
||||||
|
Reference in New Issue
Block a user