Compare commits
143 Commits
2014.02.21
...
2014.03.04
Author | SHA1 | Date | |
---|---|---|---|
d63516e9cd | |||
e477dcf649 | |||
9d3f7781f3 | |||
c7095dada3 | |||
607dbbad76 | |||
17b75c0de1 | |||
ab24f4f3be | |||
e1a52d9e10 | |||
d0ff838433 | |||
b37b94501c | |||
cb3bb2cfef | |||
e2cc7983e9 | |||
c9ae7b9565 | |||
86fb4347f7 | |||
2fcec131f5 | |||
9f62eaf4ef | |||
f92259c026 | |||
0afef30b23 | |||
dcdfd1c711 | |||
2acc1f8f50 | |||
2c39b0c695 | |||
e77c5b4f63 | |||
409a16cb72 | |||
94d5e90b4f | |||
2d73b45805 | |||
271a2dbfa2 | |||
bf4adcac66 | |||
fb8b8fdd62 | |||
5a0b26252e | |||
7d78f0cc48 | |||
f00fc78674 | |||
392017874c | |||
c3cb92d1ab | |||
aa5590fa07 | |||
8cfb5bbf92 | |||
69bb54ebf9 | |||
ca97a56e4b | |||
fc26f3b4c2 | |||
f604c93c64 | |||
dc3727b65c | |||
aba3231de1 | |||
9193bab91d | |||
fbcf3e416d | |||
c0e5d85631 | |||
ca7fa3dcb3 | |||
4ccfba28d9 | |||
abb82f1ddc | |||
cda008cff1 | |||
1877a14049 | |||
546582ec3e | |||
4534485586 | |||
a9ab8855e4 | |||
8a44ef6868 | |||
0c7214c404 | |||
4cf9654693 | |||
50a138d95c | |||
91346358b0 | |||
f3783d4b77 | |||
89ef304bed | |||
83cebb8b7a | |||
9e68f9fdf1 | |||
2acea5c03d | |||
978177527e | |||
2648c436f3 | |||
33f1f2c455 | |||
995befe0e9 | |||
1bb92aff55 | |||
b8e1471d3a | |||
60daf7f0bb | |||
a83a3139d1 | |||
fdb7ca3b8d | |||
0d7caf5cdf | |||
a339d7ba91 | |||
7216de55d6 | |||
2437fbca64 | |||
7d75d06b78 | |||
13ef5648c4 | |||
5b2478e2ba | |||
8b286571c3 | |||
f3ac523794 | |||
020cf5ebfd | |||
54ab193970 | |||
8f563f32ab | |||
151bae3566 | |||
76df418cba | |||
d0a72674c6 | |||
1d430674c7 | |||
70cb73922b | |||
344400951c | |||
ea5a0be811 | |||
3c7fd0bdb2 | |||
6cadf8c858 | |||
27579b9e4c | |||
4d756a9cc0 | |||
3e668e05be | |||
60d3a2e0f8 | |||
cc3a3b6b47 | |||
eda1d49a62 | |||
62e609ab77 | |||
2bfe4ead4b | |||
b1c6c32f78 | |||
f6acbdecf4 | |||
f1c9dfcc01 | |||
ce78943ae1 | |||
d6f0d86649 | |||
5bb67dbfea | |||
47610c4d3e | |||
b732f3581f | |||
9e57ce716f | |||
cd7ee7aa44 | |||
3cfe791473 | |||
973f2532f5 | |||
bc3be21d59 | |||
0bf5cf9886 | |||
919052d094 | |||
a2dafe2887 | |||
92661c994b | |||
ffe8fe356a | |||
bc2f773b4f | |||
f919201ecc | |||
7ff5d5c2e2 | |||
9b77f951c7 | |||
a25f2f990a | |||
78b373975d | |||
2fcc873c4c | |||
23c2baadb3 | |||
521ee82334 | |||
1df96e59ce | |||
3e123c1e28 | |||
f38da66731 | |||
06aabfc422 | |||
1052d2bfec | |||
5e0b652344 | |||
0f8f097183 | |||
491ed3dda2 | |||
af284c6d1b | |||
41d3ec5fba | |||
0568c352f3 | |||
2e7b4cb714 | |||
9767726b66 | |||
9ddfd84e41 | |||
7928024f57 | |||
3eb38acb43 |
@ -124,8 +124,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
video id, %(playlist)s for the playlist the
|
video id, %(playlist)s for the playlist the
|
||||||
video is in, %(playlist_index)s for the
|
video is in, %(playlist_index)s for the
|
||||||
position in the playlist and %% for a
|
position in the playlist and %% for a
|
||||||
literal percent. Use - to output to stdout.
|
literal percent. %(height)s and %(width)s
|
||||||
Can also be used to download to a different
|
for the width and height of the video
|
||||||
|
format. %(resolution)s for a textual
|
||||||
|
description of the resolution of the video
|
||||||
|
format. Use - to output to stdout. Can also
|
||||||
|
be used to download to a different
|
||||||
directory, for example with -o '/my/downloa
|
directory, for example with -o '/my/downloa
|
||||||
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||||
--autonumber-size NUMBER Specifies the number of digits in
|
--autonumber-size NUMBER Specifies the number of digits in
|
||||||
|
@ -14,9 +14,9 @@
|
|||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
skip_tests=false
|
skip_tests=true
|
||||||
if [ "$1" = '--skip-test' ]; then
|
if [ "$1" = '--run-tests' ]; then
|
||||||
skip_tests=true
|
skip_tests=false
|
||||||
shift
|
shift
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
def test_youtube_truncated(self):
|
def test_youtube_truncated(self):
|
||||||
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
||||||
|
|
||||||
|
def test_youtube_search_matching(self):
|
||||||
|
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
|
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_justin_tv_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||||
|
@ -18,6 +18,7 @@ from test.helper import (
|
|||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
@ -72,9 +73,7 @@ def generator(test_case):
|
|||||||
if 'playlist' not in test_case:
|
if 'playlist' not in test_case:
|
||||||
info_dict = test_case.get('info_dict', {})
|
info_dict = test_case.get('info_dict', {})
|
||||||
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||||
print_skipping('The output file cannot be know, the "file" '
|
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||||
'key is missing or the info_dict is incomplete')
|
|
||||||
return
|
|
||||||
if 'skip' in test_case:
|
if 'skip' in test_case:
|
||||||
print_skipping(test_case['skip'])
|
print_skipping(test_case['skip'])
|
||||||
return
|
return
|
||||||
@ -137,12 +136,21 @@ def generator(test_case):
|
|||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||||
got = 'md5:' + md5(info_dict.get(info_field))
|
|
||||||
else:
|
|
||||||
got = info_dict.get(info_field)
|
got = info_dict.get(info_field)
|
||||||
self.assertEqual(expected, got,
|
match_str = expected[len('re:'):]
|
||||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
match_rex = re.compile(match_str)
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str) and match_rex.match(got),
|
||||||
|
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
|
else:
|
||||||
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
|
got = 'md5:' + md5(info_dict.get(info_field))
|
||||||
|
else:
|
||||||
|
got = info_dict.get(info_field)
|
||||||
|
self.assertEqual(expected, got,
|
||||||
|
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||||
|
|
||||||
# If checkable fields are missing from the test case, print the info_dict
|
# If checkable fields are missing from the test case, print the info_dict
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
|
@ -170,12 +170,12 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
def test_AcademicEarthCourse(self):
|
def test_AcademicEarthCourse(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = AcademicEarthCourseIE(dl)
|
ie = AcademicEarthCourseIE(dl)
|
||||||
result = ie.extract('http://academicearth.org/courses/building-dynamic-websites/')
|
result = ie.extract('http://academicearth.org/playlists/laws-of-nature/')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['id'], 'building-dynamic-websites')
|
self.assertEqual(result['id'], 'laws-of-nature')
|
||||||
self.assertEqual(result['title'], 'Building Dynamic Websites')
|
self.assertEqual(result['title'], 'Laws of Nature')
|
||||||
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
|
self.assertEqual(result['description'],u'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.')# u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
|
||||||
self.assertEqual(len(result['entries']), 10)
|
self.assertEqual(len(result['entries']), 4)
|
||||||
|
|
||||||
def test_ivi_compilation(self):
|
def test_ivi_compilation(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
|
@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
|
|
||||||
# Various small unit tests
|
# Various small unit tests
|
||||||
|
import io
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
#from youtube_dl.utils import htmlentity_transform
|
#from youtube_dl.utils import htmlentity_transform
|
||||||
@ -21,6 +22,7 @@ from youtube_dl.utils import (
|
|||||||
orderedSet,
|
orderedSet,
|
||||||
PagedList,
|
PagedList,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
def test_struct_unpack(self):
|
def test_struct_unpack(self):
|
||||||
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
|
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
|
||||||
|
|
||||||
|
def test_read_batch_urls(self):
|
||||||
|
f = io.StringIO(u'''\xef\xbb\xbf foo
|
||||||
|
bar\r
|
||||||
|
baz
|
||||||
|
# More after this line\r
|
||||||
|
; or after this
|
||||||
|
bam''')
|
||||||
|
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -16,6 +16,7 @@ from youtube_dl.extractor import (
|
|||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
YoutubeShowIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTopListIE,
|
||||||
|
YoutubeSearchURLIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -118,6 +119,8 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||||
|
|
||||||
def test_youtube_toptracks(self):
|
def test_youtube_toptracks(self):
|
||||||
|
print('Skipping: The playlist page gives error 500')
|
||||||
|
return
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
result = ie.extract('https://www.youtube.com/playlist?list=MCUS')
|
||||||
@ -131,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertTrue(len(entries) >= 5)
|
self.assertTrue(len(entries) >= 5)
|
||||||
|
|
||||||
|
def test_youtube_search_url(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = YoutubeSearchURLIE(dl)
|
||||||
|
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
||||||
|
entries = result['entries']
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['title'], 'youtube-dl test video')
|
||||||
|
self.assertTrue(len(entries) >= 5)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -409,6 +409,13 @@ class YoutubeDL(object):
|
|||||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||||
if template_dict.get('playlist_index') is not None:
|
if template_dict.get('playlist_index') is not None:
|
||||||
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
||||||
|
if template_dict.get('resolution') is None:
|
||||||
|
if template_dict.get('width') and template_dict.get('height'):
|
||||||
|
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||||
|
elif template_dict.get('height'):
|
||||||
|
res = '%sp' % template_dict['height']
|
||||||
|
elif template_dict.get('width'):
|
||||||
|
res = '?x%d' % template_dict['width']
|
||||||
|
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
sanitize = lambda k, v: sanitize_filename(
|
||||||
compat_str(v),
|
compat_str(v),
|
||||||
@ -675,6 +682,9 @@ class YoutubeDL(object):
|
|||||||
info_dict['playlist'] = None
|
info_dict['playlist'] = None
|
||||||
info_dict['playlist_index'] = None
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
# This extractors handle format selection themselves
|
# This extractors handle format selection themselves
|
||||||
if info_dict['extractor'] in ['Youku']:
|
if info_dict['extractor'] in ['Youku']:
|
||||||
if download:
|
if download:
|
||||||
|
@ -46,12 +46,17 @@ __authors__ = (
|
|||||||
'Andreas Schmitz',
|
'Andreas Schmitz',
|
||||||
'Michael Kaiser',
|
'Michael Kaiser',
|
||||||
'Niklas Laxström',
|
'Niklas Laxström',
|
||||||
|
'David Triendl',
|
||||||
|
'Anthony Weems',
|
||||||
|
'David Wagner',
|
||||||
|
'Juan C. Olivares',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import getpass
|
import getpass
|
||||||
|
import io
|
||||||
import locale
|
import locale
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os
|
||||||
@ -70,6 +75,7 @@ from .utils import (
|
|||||||
get_cachedir,
|
get_cachedir,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
|
read_batch_urls,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
setproctitle,
|
setproctitle,
|
||||||
std_headers,
|
std_headers,
|
||||||
@ -424,6 +430,8 @@ def parseOpts(overrideArguments=None):
|
|||||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||||
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
||||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||||
|
'%(height)s and %(width)s for the width and height of the video format. '
|
||||||
|
'%(resolution)s for a textual description of the resolution of the video format. '
|
||||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||||
filesystem.add_option('--autonumber-size',
|
filesystem.add_option('--autonumber-size',
|
||||||
@ -551,21 +559,19 @@ def _real_main(argv=None):
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Batch file verification
|
# Batch file verification
|
||||||
batchurls = []
|
batch_urls = []
|
||||||
if opts.batchfile is not None:
|
if opts.batchfile is not None:
|
||||||
try:
|
try:
|
||||||
if opts.batchfile == '-':
|
if opts.batchfile == '-':
|
||||||
batchfd = sys.stdin
|
batchfd = sys.stdin
|
||||||
else:
|
else:
|
||||||
batchfd = open(opts.batchfile, 'r')
|
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
|
||||||
batchurls = batchfd.readlines()
|
batch_urls = read_batch_urls(batchfd)
|
||||||
batchurls = [x.strip() for x in batchurls]
|
|
||||||
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
|
||||||
except IOError:
|
except IOError:
|
||||||
sys.exit(u'ERROR: batch file could not be read')
|
sys.exit(u'ERROR: batch file could not be read')
|
||||||
all_urls = batchurls + args
|
all_urls = batch_urls + args
|
||||||
all_urls = [url.strip() for url in all_urls]
|
all_urls = [url.strip() for url in all_urls]
|
||||||
_enc = preferredencoding()
|
_enc = preferredencoding()
|
||||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||||
|
@ -12,7 +12,6 @@ from .http import HttpFD
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_pack,
|
struct_pack,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
compat_urllib_request,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
|
|||||||
self.read_unsigned_char()
|
self.read_unsigned_char()
|
||||||
# flags
|
# flags
|
||||||
self.read(3)
|
self.read(3)
|
||||||
# BootstrapinfoVersion
|
|
||||||
bootstrap_info_version = self.read_unsigned_int()
|
self.read_unsigned_int() # BootstrapinfoVersion
|
||||||
# Profile,Live,Update,Reserved
|
# Profile,Live,Update,Reserved
|
||||||
self.read(1)
|
self.read(1)
|
||||||
# time scale
|
# time scale
|
||||||
@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
|
|||||||
self.read_unsigned_long_long()
|
self.read_unsigned_long_long()
|
||||||
# SmpteTimeCodeOffset
|
# SmpteTimeCodeOffset
|
||||||
self.read_unsigned_long_long()
|
self.read_unsigned_long_long()
|
||||||
# MovieIdentifier
|
|
||||||
movie_identifier = self.read_string()
|
self.read_string() # MovieIdentifier
|
||||||
server_count = self.read_unsigned_char()
|
server_count = self.read_unsigned_char()
|
||||||
# ServerEntryTable
|
# ServerEntryTable
|
||||||
for i in range(server_count):
|
for i in range(server_count):
|
||||||
self.read_string()
|
self.read_string()
|
||||||
quality_count = self.read_unsigned_char()
|
quality_count = self.read_unsigned_char()
|
||||||
# QualityEntryTable
|
# QualityEntryTable
|
||||||
for i in range(server_count):
|
for i in range(quality_count):
|
||||||
self.read_string()
|
self.read_string()
|
||||||
# DrmData
|
# DrmData
|
||||||
self.read_string()
|
self.read_string()
|
||||||
|
@ -85,6 +85,7 @@ class HttpFD(FileDownloader):
|
|||||||
else:
|
else:
|
||||||
# The length does not match, we start the download over
|
# The length does not match, we start the download over
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
|
resume_len = 0
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
break
|
break
|
||||||
# Retry
|
# Retry
|
||||||
|
@ -19,12 +19,15 @@ from .bbccouk import BBCCoUkIE
|
|||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
|
from .br import BRIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
|
from .canal13cl import Canal13clIE
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
|
from .ceskatelevize import CeskaTelevizeIE
|
||||||
from .channel9 import Channel9IE
|
from .channel9 import Channel9IE
|
||||||
from .chilloutzone import ChilloutzoneIE
|
from .chilloutzone import ChilloutzoneIE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
@ -88,6 +91,7 @@ from .funnyordie import FunnyOrDieIE
|
|||||||
from .gamekings import GamekingsIE
|
from .gamekings import GamekingsIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
@ -132,11 +136,12 @@ from .lynda import (
|
|||||||
)
|
)
|
||||||
from .m6 import M6IE
|
from .m6 import M6IE
|
||||||
from .macgamestore import MacGameStoreIE
|
from .macgamestore import MacGameStoreIE
|
||||||
|
from .mailru import MailRuIE
|
||||||
from .malemotion import MalemotionIE
|
from .malemotion import MalemotionIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mit import TechTVMITIE, MITIE
|
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mpora import MporaIE
|
from .mpora import MporaIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
@ -151,7 +156,10 @@ from .myspass import MySpassIE
|
|||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import NBCNewsIE
|
from .nbc import (
|
||||||
|
NBCIE,
|
||||||
|
NBCNewsIE,
|
||||||
|
)
|
||||||
from .ndr import NDRIE
|
from .ndr import NDRIE
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
@ -160,7 +168,7 @@ from .nhl import NHLIE, NHLVideocenterIE
|
|||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .normalboots import NormalbootsIE
|
from .normalboots import NormalbootsIE
|
||||||
from .novamov import NovamovIE
|
from .novamov import NovaMovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
@ -171,6 +179,7 @@ from .podomatic import PodomaticIE
|
|||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
from .pornhub import PornHubIE
|
from .pornhub import PornHubIE
|
||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
|
from .prosiebensat1 import ProSiebenSat1IE
|
||||||
from .pyvideo import PyvideoIE
|
from .pyvideo import PyvideoIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
@ -186,6 +195,7 @@ from .rutube import (
|
|||||||
RutubeMovieIE,
|
RutubeMovieIE,
|
||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
|
from .savefrom import SaveFromIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .slashdot import SlashdotIE
|
from .slashdot import SlashdotIE
|
||||||
@ -224,10 +234,12 @@ from .tinypic import TinyPicIE
|
|||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
|
from .trutube import TruTubeIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
from .tutv import TutvIE
|
from .tutv import TutvIE
|
||||||
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE
|
from .tvp import TvpIE
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
@ -238,6 +250,7 @@ from .vesti import VestiIE
|
|||||||
from .vevo import VevoIE
|
from .vevo import VevoIE
|
||||||
from .vice import ViceIE
|
from .vice import ViceIE
|
||||||
from .viddler import ViddlerIE
|
from .viddler import ViddlerIE
|
||||||
|
from .videobam import VideoBamIE
|
||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
@ -272,19 +285,20 @@ from .youku import YoukuIE
|
|||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubePlaylistIE,
|
|
||||||
YoutubeSearchIE,
|
|
||||||
YoutubeSearchDateIE,
|
|
||||||
YoutubeUserIE,
|
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
YoutubeShowIE,
|
|
||||||
YoutubeSubscriptionsIE,
|
|
||||||
YoutubeRecommendedIE,
|
|
||||||
YoutubeTruncatedURLIE,
|
|
||||||
YoutubeWatchLaterIE,
|
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
YoutubeHistoryIE,
|
YoutubeHistoryIE,
|
||||||
|
YoutubePlaylistIE,
|
||||||
|
YoutubeRecommendedIE,
|
||||||
|
YoutubeSearchDateIE,
|
||||||
|
YoutubeSearchIE,
|
||||||
|
YoutubeSearchURLIE,
|
||||||
|
YoutubeShowIE,
|
||||||
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTopListIE,
|
YoutubeTopListIE,
|
||||||
|
YoutubeTruncatedURLIE,
|
||||||
|
YoutubeUserIE,
|
||||||
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class AcademicEarthCourseIE(InfoExtractor):
|
class AcademicEarthCourseIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/(?:courses|playlists)/(?P<id>[^?#/]+)'
|
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||||
IE_NAME = 'AcademicEarth:Course'
|
IE_NAME = 'AcademicEarth:Course'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -14,12 +14,12 @@ class AcademicEarthCourseIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
|
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<p class="excerpt">(.*?)</p>',
|
r'<p class="excerpt"[^>]*?>(.*?)</p>',
|
||||||
webpage, u'description', fatal=False)
|
webpage, u'description', fatal=False)
|
||||||
urls = re.findall(
|
urls = re.findall(
|
||||||
r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
|
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
|
||||||
webpage)
|
webpage)
|
||||||
entries = [self.url_result(u) for u in urls]
|
entries = [self.url_result(u) for u in urls]
|
||||||
|
|
||||||
|
80
youtube_dl/extractor/br.py
Normal file
80
youtube_dl/extractor/br.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class BRIE(InfoExtractor):
|
||||||
|
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
||||||
|
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
|
||||||
|
_BASE_URL = "http://www.br.de"
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
||||||
|
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Feiern und Verzichten",
|
||||||
|
"description": "Anselm Grün: Feiern und Verzichten",
|
||||||
|
"uploader": "BR/Birgit Baier",
|
||||||
|
"upload_date": "20140301"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
page = self._download_webpage(url, display_id)
|
||||||
|
xml_url = self._search_regex(
|
||||||
|
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
||||||
|
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
||||||
|
|
||||||
|
videos = [{
|
||||||
|
"id": xml_video.get("externalId"),
|
||||||
|
"title": xml_video.find("title").text,
|
||||||
|
"formats": self._extract_formats(xml_video.find("assets")),
|
||||||
|
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
||||||
|
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
||||||
|
"uploader": xml_video.find("author").text,
|
||||||
|
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
|
||||||
|
"webpage_url": xml_video.find("permalink").text,
|
||||||
|
} for xml_video in xml.findall("video")]
|
||||||
|
|
||||||
|
if len(videos) > 1:
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'found multiple videos; please '
|
||||||
|
'report this with the video URL to http://yt-dl.org/bug')
|
||||||
|
if not videos:
|
||||||
|
raise ExtractorError('No video entries found')
|
||||||
|
return videos[0]
|
||||||
|
|
||||||
|
def _extract_formats(self, assets):
|
||||||
|
formats = [{
|
||||||
|
"url": asset.find("downloadUrl").text,
|
||||||
|
"ext": asset.find("mediaType").text,
|
||||||
|
"format_id": asset.get("type"),
|
||||||
|
"width": int(asset.find("frameWidth").text),
|
||||||
|
"height": int(asset.find("frameHeight").text),
|
||||||
|
"tbr": int(asset.find("bitrateVideo").text),
|
||||||
|
"abr": int(asset.find("bitrateAudio").text),
|
||||||
|
"vcodec": asset.find("codecVideo").text,
|
||||||
|
"container": asset.find("mediaType").text,
|
||||||
|
"filesize": int(asset.find("size").text),
|
||||||
|
} for asset in assets.findall("asset")
|
||||||
|
if asset.find("downloadUrl") is not None]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_thumbnails(self, variants):
|
||||||
|
thumbnails = [{
|
||||||
|
"url": self._BASE_URL + variant.find("url").text,
|
||||||
|
"width": int(variant.find("width").text),
|
||||||
|
"height": int(variant.find("height").text),
|
||||||
|
} for variant in variants.findall("variant")]
|
||||||
|
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
|
||||||
|
return thumbnails
|
@ -23,8 +23,8 @@ class BreakIE(InfoExtractor):
|
|||||||
video_id = mobj.group(1).split("-")[-1]
|
video_id = mobj.group(1).split("-")[-1]
|
||||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
|
||||||
'info json', flags=re.DOTALL)
|
webpage, 'info json', flags=re.DOTALL)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
video_url = info['videoUri']
|
video_url = info['videoUri']
|
||||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||||
|
48
youtube_dl/extractor/canal13cl.py
Normal file
48
youtube_dl/extractor/canal13cl.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class Canal13clIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||||
|
'md5': '4cb1fa38adcad8fea88487a078831755',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1403022125',
|
||||||
|
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
|
||||||
|
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title', fatal=True)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'twitter:description', webpage, 'description')
|
||||||
|
url = self._html_search_regex(
|
||||||
|
r'articuloVideo = \"(.*?)\"', webpage, 'url')
|
||||||
|
real_id = self._search_regex(
|
||||||
|
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': real_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@ -1,4 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||||
u'file': u'12163.mp4',
|
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||||
u'md5': u'060158428b650f896c542dfbb3d6487f',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '12163',
|
||||||
u'title': u'Terrasses du Numérique'
|
'ext': 'mp4',
|
||||||
|
'title': 'Terrasses du Numérique'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
|
|||||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'class="evenement8">(.*?)</a>', webpage, u'title')
|
r'class="evenement8">(.*?)</a>', webpage, 'title')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'ext': 'mp4',
|
'id': video_id,
|
||||||
'url': video_url,
|
'ext': 'mp4',
|
||||||
'title': title,
|
'url': video_url,
|
||||||
}
|
'title': title,
|
||||||
|
}
|
||||||
|
126
youtube_dl/extractor/ceskatelevize.py
Normal file
126
youtube_dl/extractor/ceskatelevize.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '213512120230004',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'První republika: Španělská chřipka',
|
||||||
|
'duration': 3107.4,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
'skip': 'Works only from Czech Republic.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20138143440',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tsatsiki, maminka a policajt',
|
||||||
|
'duration': 6754.1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
'skip': 'Works only from Czech Republic.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14716',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||||
|
'duration': 90,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
||||||
|
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||||
|
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||||
|
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||||
|
|
||||||
|
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
||||||
|
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'playlist[0][type]': typ,
|
||||||
|
'playlist[0][id]': episode_id,
|
||||||
|
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||||
|
'requestSource': 'iVysilani',
|
||||||
|
}
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
|
||||||
|
data=compat_urllib_parse.urlencode(data))
|
||||||
|
|
||||||
|
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||||
|
req.add_header('x-addr', '127.0.0.1')
|
||||||
|
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
|
playlistpage = self._download_json(req, video_id)
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
|
||||||
|
playlist = self._download_xml(req, video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for i in playlist.find('smilRoot/body'):
|
||||||
|
if 'AD' not in i.attrib['id']:
|
||||||
|
base_url = i.attrib['base']
|
||||||
|
parsedurl = compat_urllib_parse_urlparse(base_url)
|
||||||
|
duration = i.attrib['duration']
|
||||||
|
|
||||||
|
for video in i.findall('video'):
|
||||||
|
if video.attrib['label'] != 'AD':
|
||||||
|
format_id = video.attrib['label']
|
||||||
|
play_path = video.attrib['src']
|
||||||
|
vbr = int(video.attrib['system-bitrate'])
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': base_url,
|
||||||
|
'vbr': vbr,
|
||||||
|
'play_path': play_path,
|
||||||
|
'app': parsedurl.path[1:] + '?' + parsedurl.query,
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
|
||||||
|
'duration': float(duration),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,4 +1,5 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -8,73 +9,63 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class CinemassacreIE(InfoExtractor):
|
class CinemassacreIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?(?P<url>cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?)(?:[/?].*)?'
|
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/.+?'
|
||||||
_TESTS = [{
|
_TESTS = [
|
||||||
u'url': u'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
{
|
||||||
u'file': u'19911.flv',
|
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||||
u'info_dict': {
|
'file': '19911.mp4',
|
||||||
u'upload_date': u'20121110',
|
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||||
u'title': u'“Angry Video Game Nerd: The Movie” – Trailer',
|
'info_dict': {
|
||||||
u'description': u'md5:fb87405fcb42a331742a0dce2708560b',
|
'upload_date': '20121110',
|
||||||
|
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||||
|
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
u'params': {
|
{
|
||||||
# rtmp download
|
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||||
u'skip_download': True,
|
'file': '521be8ef82b16.mp4',
|
||||||
},
|
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||||
},
|
'info_dict': {
|
||||||
{
|
'upload_date': '20131002',
|
||||||
u'url': u'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
'title': 'The Mummy’s Hand (1940)',
|
||||||
u'file': u'521be8ef82b16.flv',
|
},
|
||||||
u'info_dict': {
|
}
|
||||||
u'upload_date': u'20131002',
|
]
|
||||||
u'title': u'The Mummy’s Hand (1940)',
|
|
||||||
},
|
|
||||||
u'params': {
|
|
||||||
# rtmp download
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
webpage_url = u'http://' + mobj.group('url')
|
webpage = self._download_webpage(url, None) # Don't know video id yet
|
||||||
webpage = self._download_webpage(webpage_url, None) # Don't know video id yet
|
|
||||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
raise ExtractorError(u'Can\'t extract embed url and video id')
|
raise ExtractorError('Can\'t extract embed url and video id')
|
||||||
playerdata_url = mobj.group(u'embed_url')
|
playerdata_url = mobj.group('embed_url')
|
||||||
video_id = mobj.group(u'video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
|
video_title = self._html_search_regex(r'<title>(?P<title>.+?)\|',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
|
video_description = self._html_search_regex(r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||||
webpage, u'description', flags=re.DOTALL, fatal=False)
|
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||||
if len(video_description) == 0:
|
if len(video_description) == 0:
|
||||||
video_description = None
|
video_description = None
|
||||||
|
|
||||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||||
url = self._html_search_regex(r'\'streamer\': \'(?P<url>[^\']+)\'', playerdata, u'url')
|
|
||||||
|
|
||||||
sd_file = self._html_search_regex(r'\'file\': \'(?P<sd_file>[^\']+)\'', playerdata, u'sd_file')
|
sd_url = self._html_search_regex(r'file: \'(?P<sd_file>[^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||||
hd_file = self._html_search_regex(r'\'?file\'?: "(?P<hd_file>[^"]+)"', playerdata, u'hd_file')
|
hd_url = self._html_search_regex(r'file: \'(?P<hd_file>[^\']+)\', label: \'HD\'', playerdata, 'hd_file')
|
||||||
video_thumbnail = self._html_search_regex(r'\'image\': \'(?P<thumbnail>[^\']+)\'', playerdata, u'thumbnail', fatal=False)
|
video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'url': url,
|
'url': sd_url,
|
||||||
'play_path': 'mp4:' + sd_file,
|
'ext': 'mp4',
|
||||||
'rtmp_live': True, # workaround
|
|
||||||
'ext': 'flv',
|
|
||||||
'format': 'sd',
|
'format': 'sd',
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': url,
|
'url': hd_url,
|
||||||
'play_path': 'mp4:' + hd_file,
|
'ext': 'mp4',
|
||||||
'rtmp_live': True, # workaround
|
|
||||||
'ext': 'flv',
|
|
||||||
'format': 'hd',
|
'format': 'hd',
|
||||||
'format_id': 'hd',
|
'format_id': 'hd',
|
||||||
},
|
},
|
||||||
|
@ -40,7 +40,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'id': 'W5gMp3ZjYg4',
|
'id': 'W5gMp3ZjYg4',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||||
'uploader': 'Funnyplox TV',
|
'uploader': 'FunnyPlox TV',
|
||||||
'uploader_id': 'funnyploxtv',
|
'uploader_id': 'funnyploxtv',
|
||||||
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
|
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
|
||||||
'upload_date': '20140128',
|
'upload_date': '20140128',
|
||||||
|
@ -88,6 +88,10 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
The following fields are optional:
|
The following fields are optional:
|
||||||
|
|
||||||
|
display_id An alternative identifier for the video, not necessarily
|
||||||
|
unique, but available before title. Typically, id is
|
||||||
|
something like "4234987", title "Dancing naked mole rats",
|
||||||
|
and display_id "dancing-naked-mole-rats"
|
||||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||||
"url") for the varying thumbnails
|
"url") for the varying thumbnails
|
||||||
thumbnail: Full URL to a video thumbnail image.
|
thumbnail: Full URL to a video thumbnail image.
|
||||||
@ -432,14 +436,14 @@ class InfoExtractor(object):
|
|||||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||||
return self._html_search_regex(regexes, html, name, **kargs)
|
return self._html_search_regex(regexes, html, name, **kargs)
|
||||||
|
|
||||||
def _html_search_meta(self, name, html, display_name=None):
|
def _html_search_meta(self, name, html, display_name=None, fatal=False):
|
||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
r'''(?ix)<meta
|
r'''(?ix)<meta
|
||||||
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
||||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||||
html, display_name, fatal=False)
|
html, display_name, fatal=fatal)
|
||||||
|
|
||||||
def _dc_search_uploader(self, html):
|
def _dc_search_uploader(self, html):
|
||||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||||
|
@ -1,7 +1,11 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re, base64, zlib
|
import re
|
||||||
|
import json
|
||||||
|
import base64
|
||||||
|
import zlib
|
||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -19,13 +23,15 @@ from ..aes import (
|
|||||||
inc,
|
inc,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(InfoExtractor):
|
class CrunchyrollIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
'file': '645513.flv',
|
|
||||||
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
|
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '645513',
|
||||||
|
'ext': 'flv',
|
||||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||||
@ -36,7 +42,7 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
# rtmp
|
# rtmp
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
}
|
||||||
|
|
||||||
_FORMAT_IDS = {
|
_FORMAT_IDS = {
|
||||||
'360': ('60', '106'),
|
'360': ('60', '106'),
|
||||||
@ -80,9 +86,8 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
return zlib.decompress(decrypted_data)
|
return zlib.decompress(decrypted_data)
|
||||||
|
|
||||||
def _convert_subtitles_to_srt(self, subtitles):
|
def _convert_subtitles_to_srt(self, subtitles):
|
||||||
i=1
|
|
||||||
output = ''
|
output = ''
|
||||||
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
|
for i, (start, end, text) in enumerate(re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1):
|
||||||
start = start.replace('.', ',')
|
start = start.replace('.', ',')
|
||||||
end = end.replace('.', ',')
|
end = end.replace('.', ',')
|
||||||
text = clean_html(text)
|
text = clean_html(text)
|
||||||
@ -90,7 +95,6 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
||||||
i+=1
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self,url):
|
||||||
@ -108,6 +112,12 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
if note_m:
|
if note_m:
|
||||||
raise ExtractorError(note_m)
|
raise ExtractorError(note_m)
|
||||||
|
|
||||||
|
mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
|
||||||
|
if mobj:
|
||||||
|
msg = json.loads(mobj.group('msg'))
|
||||||
|
if msg.get('type') == 'error':
|
||||||
|
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||||
@ -161,7 +171,7 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
data = base64.b64decode(data)
|
data = base64.b64decode(data)
|
||||||
|
|
||||||
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||||
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
if not lang_code:
|
if not lang_code:
|
||||||
continue
|
continue
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
@ -26,20 +28,21 @@ class FacebookIE(InfoExtractor):
|
|||||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
IE_NAME = u'facebook'
|
IE_NAME = 'facebook'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'https://www.facebook.com/photo.php?v=120708114770723',
|
'url': 'https://www.facebook.com/photo.php?v=120708114770723',
|
||||||
u'file': u'120708114770723.mp4',
|
'md5': '48975a41ccc4b7a581abd68651c1a5a8',
|
||||||
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '120708114770723',
|
||||||
u"duration": 279,
|
'ext': 'mp4',
|
||||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
'duration': 279,
|
||||||
|
'title': 'PEOPLE ARE AWESOME 2013'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def report_login(self):
|
def report_login(self):
|
||||||
"""Report attempt to log in."""
|
"""Report attempt to log in."""
|
||||||
self.to_screen(u'Logging in')
|
self.to_screen('Logging in')
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(useremail, password) = self._get_login_info()
|
(useremail, password) = self._get_login_info()
|
||||||
@ -50,9 +53,11 @@ class FacebookIE(InfoExtractor):
|
|||||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||||
self.report_login()
|
self.report_login()
|
||||||
login_page = self._download_webpage(login_page_req, None, note=False,
|
login_page = self._download_webpage(login_page_req, None, note=False,
|
||||||
errnote=u'Unable to download login page')
|
errnote='Unable to download login page')
|
||||||
lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
|
lsd = self._search_regex(
|
||||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
|
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||||
|
login_page, 'lsd')
|
||||||
|
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
|
||||||
|
|
||||||
login_form = {
|
login_form = {
|
||||||
'email': useremail,
|
'email': useremail,
|
||||||
@ -70,22 +75,22 @@ class FacebookIE(InfoExtractor):
|
|||||||
try:
|
try:
|
||||||
login_results = compat_urllib_request.urlopen(request).read()
|
login_results = compat_urllib_request.urlopen(request).read()
|
||||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||||
return
|
return
|
||||||
|
|
||||||
check_form = {
|
check_form = {
|
||||||
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
|
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, 'fb_dtsg'),
|
||||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
|
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||||
'name_action_selected': 'dont_save',
|
'name_action_selected': 'dont_save',
|
||||||
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
|
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, 'continue'),
|
||||||
}
|
}
|
||||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
||||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
check_response = compat_urllib_request.urlopen(check_req).read()
|
check_response = compat_urllib_request.urlopen(check_req).read()
|
||||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||||
self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
|
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
self._downloader.report_warning('unable to log in: %s' % compat_str(err))
|
||||||
return
|
return
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@ -94,7 +99,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||||
@ -107,10 +112,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||||
if m_msg is not None:
|
if m_msg is not None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||||
expected=True)
|
expected=True)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
data = dict(json.loads(m.group(1)))
|
data = dict(json.loads(m.group(1)))
|
||||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||||
params = json.loads(params_raw)
|
params = json.loads(params_raw)
|
||||||
@ -119,12 +124,12 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_url:
|
if not video_url:
|
||||||
video_url = video_data['sd_src']
|
video_url = video_data['sd_src']
|
||||||
if not video_url:
|
if not video_url:
|
||||||
raise ExtractorError(u'Cannot find video URL')
|
raise ExtractorError('Cannot find video URL')
|
||||||
video_duration = int(video_data['video_duration'])
|
video_duration = int(video_data['video_duration'])
|
||||||
thumbnail = video_data['thumbnail_src']
|
thumbnail = video_data['thumbnail_src']
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
|
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -8,8 +8,8 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
clean_html,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import clean_html
|
|
||||||
|
|
||||||
|
|
||||||
class FourTubeIE(InfoExtractor):
|
class FourTubeIE(InfoExtractor):
|
||||||
|
134
youtube_dl/extractor/gdcvault.py
Normal file
134
youtube_dl/extractor/gdcvault.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
class GDCVaultIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
||||||
|
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1019721',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1015683',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # Requires rtmpdump
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_mp4(self, xml_description):
|
||||||
|
video_formats = []
|
||||||
|
mp4_video = xml_description.find('./metadata/mp4video')
|
||||||
|
if mp4_video is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
|
||||||
|
video_root = mobj.group('root')
|
||||||
|
formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
|
||||||
|
for format in formats:
|
||||||
|
mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
|
||||||
|
url = video_root + mobj.group('path')
|
||||||
|
vbr = format.find('bitrate').text
|
||||||
|
video_formats.append({
|
||||||
|
'url': url,
|
||||||
|
'vbr': int(vbr),
|
||||||
|
})
|
||||||
|
return video_formats
|
||||||
|
|
||||||
|
def _parse_flv(self, xml_description):
|
||||||
|
video_formats = []
|
||||||
|
akami_url = xml_description.find('./metadata/akamaiHost').text
|
||||||
|
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||||
|
video_formats.append({
|
||||||
|
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
||||||
|
'format_note': 'slide deck video',
|
||||||
|
'quality': -2,
|
||||||
|
'preference': -2,
|
||||||
|
'format_id': 'slides',
|
||||||
|
})
|
||||||
|
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||||
|
video_formats.append({
|
||||||
|
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
||||||
|
'format_note': 'speaker video',
|
||||||
|
'quality': -1,
|
||||||
|
'preference': -1,
|
||||||
|
'format_id': 'speaker',
|
||||||
|
})
|
||||||
|
return video_formats
|
||||||
|
|
||||||
|
def _login(self, webpage_url, video_id):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None or password is None:
|
||||||
|
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
|
||||||
|
return None
|
||||||
|
|
||||||
|
mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url)
|
||||||
|
login_url = mobj.group('root_url') + 'api/login.php'
|
||||||
|
logout_url = mobj.group('root_url') + 'logout'
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
self._download_webpage(request, video_id, 'Logging in')
|
||||||
|
start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
|
||||||
|
self._download_webpage(logout_url, video_id, 'Logging out')
|
||||||
|
|
||||||
|
return start_page
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||||
|
start_page = self._download_webpage(webpage_url, video_id)
|
||||||
|
|
||||||
|
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
|
||||||
|
|
||||||
|
if xml_root is None:
|
||||||
|
# Probably need to authenticate
|
||||||
|
start_page = self._login(webpage_url, video_id)
|
||||||
|
if start_page is None:
|
||||||
|
self.report_warning('Could not login.')
|
||||||
|
else:
|
||||||
|
# Grab the url from the authenticated page
|
||||||
|
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
|
||||||
|
|
||||||
|
xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
|
||||||
|
if xml_name is None:
|
||||||
|
# Fallback to the older format
|
||||||
|
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||||
|
|
||||||
|
xml_decription_url = xml_root + 'xml/' + xml_name
|
||||||
|
xml_description = self._download_xml(xml_decription_url, video_id)
|
||||||
|
|
||||||
|
video_title = xml_description.find('./metadata/title').text
|
||||||
|
video_formats = self._parse_mp4(xml_description)
|
||||||
|
if video_formats is None:
|
||||||
|
video_formats = self._parse_flv(xml_description)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'formats': video_formats,
|
||||||
|
}
|
@ -83,10 +83,10 @@ class GenericIE(InfoExtractor):
|
|||||||
# Direct link to a video
|
# Direct link to a video
|
||||||
{
|
{
|
||||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||||
'file': 'trailer.mp4',
|
|
||||||
'md5': '67d406c2bcb6af27fa886f31aa934bbe',
|
'md5': '67d406c2bcb6af27fa886f31aa934bbe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'trailer',
|
'id': 'trailer',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'trailer',
|
'title': 'trailer',
|
||||||
'upload_date': '20100513',
|
'upload_date': '20100513',
|
||||||
}
|
}
|
||||||
@ -94,7 +94,6 @@ class GenericIE(InfoExtractor):
|
|||||||
# ooyala video
|
# ooyala video
|
||||||
{
|
{
|
||||||
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||||
'file': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ.mp4',
|
|
||||||
'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
|
'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
||||||
@ -102,6 +101,22 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': '2cc213299525360.mov', # that's what we get
|
'title': '2cc213299525360.mov', # that's what we get
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# google redirect
|
||||||
|
{
|
||||||
|
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cmQHVoWB5FY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20130224',
|
||||||
|
'uploader_id': 'TheVerge',
|
||||||
|
'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
|
||||||
|
'uploader': 'The Verge',
|
||||||
|
'title': 'First Firefox OS phones side-by-side',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': False,
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@ -363,11 +378,17 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group(1), 'Mpora')
|
return self.url_result(mobj.group(1), 'Mpora')
|
||||||
|
|
||||||
# Look for embedded Novamov player
|
# Look for embedded NovaMov player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Novamov')
|
return self.url_result(mobj.group('url'), 'NovaMov')
|
||||||
|
|
||||||
|
# Look for embedded NowVideo player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'NowVideo')
|
||||||
|
|
||||||
# Look for embedded Facebook player
|
# Look for embedded Facebook player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@ -375,6 +396,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Facebook')
|
return self.url_result(mobj.group('url'), 'Facebook')
|
||||||
|
|
||||||
|
# Look for embedded VK player
|
||||||
|
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'VK')
|
||||||
|
|
||||||
# Look for embedded Huffington Post player
|
# Look for embedded Huffington Post player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
||||||
@ -405,6 +431,18 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is None:
|
if mobj is None:
|
||||||
# HTML5 video
|
# HTML5 video
|
||||||
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||||
|
if mobj is None:
|
||||||
|
mobj = re.search(
|
||||||
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
|
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
new_url = mobj.group(1)
|
||||||
|
self.report_following_redirect(new_url)
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': new_url,
|
||||||
|
}
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError('Unsupported URL: %s' % url)
|
raise ExtractorError('Unsupported URL: %s' % url)
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ from ..utils import compat_urllib_request
|
|||||||
|
|
||||||
|
|
||||||
class IPrimaIE(InfoExtractor):
|
class IPrimaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
|
_VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||||
@ -22,20 +22,32 @@ class IPrimaIE(InfoExtractor):
|
|||||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True, # requires rtmpdump
|
||||||
},
|
},
|
||||||
},
|
}, {
|
||||||
]
|
'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9718337',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tchibo Partička - Jarní móda',
|
||||||
|
'description': 'md5:589f8f59f414220621ff8882eb3ce7be',
|
||||||
|
'thumbnail': 're:^http:.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # requires rtmpdump
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
|
player_url = (
|
||||||
floor(random()*1073741824),
|
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||||
floor(random()*1073741824))
|
(floor(random()*1073741824), floor(random()*1073741824))
|
||||||
|
)
|
||||||
|
|
||||||
req = compat_urllib_request.Request(player_url)
|
req = compat_urllib_request.Request(player_url)
|
||||||
req.add_header('Referer', url)
|
req.add_header('Referer', url)
|
||||||
@ -44,18 +56,20 @@ class IPrimaIE(InfoExtractor):
|
|||||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||||
|
|
||||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||||
|
|
||||||
if zoneGEO != '0':
|
if zoneGEO != '0':
|
||||||
base_url = base_url.replace('token', 'token_'+zoneGEO)
|
base_url = base_url.replace('token', 'token_' + zoneGEO)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id in ['lq', 'hq', 'hd']:
|
for format_id in ['lq', 'hq', 'hd']:
|
||||||
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
filename = self._html_search_regex(
|
||||||
|
r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||||
|
|
||||||
if filename == 'null':
|
if filename == 'null':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
|
real_id = self._search_regex(
|
||||||
|
r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
|
||||||
|
filename, 'real video id')
|
||||||
|
|
||||||
if format_id == 'lq':
|
if format_id == 'lq':
|
||||||
quality = 0
|
quality = 0
|
||||||
@ -63,13 +77,13 @@ class IPrimaIE(InfoExtractor):
|
|||||||
quality = 1
|
quality = 1
|
||||||
elif format_id == 'hd':
|
elif format_id == 'hd':
|
||||||
quality = 2
|
quality = 2
|
||||||
filename = 'hq/'+filename
|
filename = 'hq/' + filename
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': base_url,
|
'url': base_url,
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
|
'play_path': 'mp4:' + filename.replace('"', '')[:-4],
|
||||||
'rtmp_live': True,
|
'rtmp_live': True,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
})
|
})
|
||||||
|
@ -6,7 +6,8 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate
|
unified_strdate,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -32,13 +33,11 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
|
||||||
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
if not videos:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||||
@ -50,20 +49,26 @@ class LifeNewsIE(InfoExtractor):
|
|||||||
view_count = self._html_search_regex(
|
view_count = self._html_search_regex(
|
||||||
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
|
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False)
|
||||||
comment_count = self._html_search_regex(
|
comment_count = self._html_search_regex(
|
||||||
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count', fatal=False)
|
r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False)
|
||||||
|
|
||||||
upload_date = self._html_search_regex(
|
upload_date = self._html_search_regex(
|
||||||
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
|
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date',fatal=False)
|
||||||
if upload_date is not None:
|
if upload_date is not None:
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
return {
|
def make_entry(video_id, media, video_number=None):
|
||||||
'id': video_id,
|
return {
|
||||||
'url': video_url,
|
'id': video_id,
|
||||||
'thumbnail': thumbnail,
|
'url': media[1],
|
||||||
'title': title,
|
'thumbnail': media[0],
|
||||||
'description': description,
|
'title': title if video_number is None else '%s-video%s' % (title, video_number),
|
||||||
'view_count': int_or_none(view_count),
|
'description': description,
|
||||||
'comment_count': int_or_none(comment_count),
|
'view_count': int_or_none(view_count),
|
||||||
'upload_date': upload_date,
|
'comment_count': int_or_none(comment_count),
|
||||||
}
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(videos) == 1:
|
||||||
|
return make_entry(video_id, videos[0])
|
||||||
|
else:
|
||||||
|
return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
|
66
youtube_dl/extractor/mailru.py
Normal file
66
youtube_dl/extractor/mailru.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MailRuIE(InfoExtractor):
|
||||||
|
IE_NAME = 'mailru'
|
||||||
|
IE_DESC = 'Видео@Mail.Ru'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||||
|
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '46301138',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||||
|
'upload_date': '20140224',
|
||||||
|
'uploader': 'sonypicturesrus',
|
||||||
|
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||||
|
'duration': 184,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video_data = self._download_json(
|
||||||
|
'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
author = video_data['author']
|
||||||
|
uploader = author['name']
|
||||||
|
uploader_id = author['id']
|
||||||
|
|
||||||
|
movie = video_data['movie']
|
||||||
|
content_id = str(movie['contentId'])
|
||||||
|
title = movie['title']
|
||||||
|
thumbnail = movie['poster']
|
||||||
|
duration = movie['duration']
|
||||||
|
|
||||||
|
upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
|
||||||
|
view_count = video_data['views_count']
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': video['url'],
|
||||||
|
'format_id': video['name'],
|
||||||
|
} for video in video_data['videos']
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': content_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -166,6 +166,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
|
|
||||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||||
webpage, u'uploader nickname', fatal=False)
|
webpage, u'uploader nickname', fatal=False)
|
||||||
@ -183,6 +184,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
'thumbnail':thumbnail,
|
||||||
'ext': video_ext,
|
'ext': video_ext,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@ -1,24 +1,30 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
ExtractorError,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TechTVMITIE(InfoExtractor):
|
class TechTVMITIE(InfoExtractor):
|
||||||
IE_NAME = u'techtv.mit.edu'
|
IE_NAME = 'techtv.mit.edu'
|
||||||
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
|
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||||
u'file': u'25418.mp4',
|
'md5': '1f8cb3e170d41fd74add04d3c9330e5f',
|
||||||
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '25418',
|
||||||
u'title': u'MIT DNA Learning Center Set',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
|
'title': 'MIT DNA Learning Center Set',
|
||||||
|
'description': 'md5:82313335e8a8a3f243351ba55bc1b474',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -27,12 +33,12 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
raw_page = self._download_webpage(
|
raw_page = self._download_webpage(
|
||||||
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||||
clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
|
clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
|
||||||
|
|
||||||
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
|
base_url = self._search_regex(
|
||||||
raw_page, u'base url')
|
r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url')
|
||||||
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
|
formats_json = self._search_regex(
|
||||||
u'video formats')
|
r'bitrates: (\[.+?\])', raw_page, 'video formats')
|
||||||
formats_mit = json.loads(formats_json)
|
formats_mit = json.loads(formats_json)
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
@ -48,28 +54,31 @@ class TechTVMITIE(InfoExtractor):
|
|||||||
|
|
||||||
title = get_element_by_id('edit-title', clean_page)
|
title = get_element_by_id('edit-title', clean_page)
|
||||||
description = clean_html(get_element_by_id('edit-description', clean_page))
|
description = clean_html(get_element_by_id('edit-description', clean_page))
|
||||||
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
|
thumbnail = self._search_regex(
|
||||||
raw_page, u'thumbnail', flags=re.DOTALL)
|
r'playlist:.*?url: \'(.+?)\'',
|
||||||
|
raw_page, 'thumbnail', flags=re.DOTALL)
|
||||||
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'title': title,
|
'id': video_id,
|
||||||
'formats': formats,
|
'title': title,
|
||||||
'description': description,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'description': description,
|
||||||
}
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class MITIE(TechTVMITIE):
|
class MITIE(TechTVMITIE):
|
||||||
IE_NAME = u'video.mit.edu'
|
IE_NAME = 'video.mit.edu'
|
||||||
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
|
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
|
'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
|
||||||
u'file': u'21783.mp4',
|
'md5': '7db01d5ccc1895fc5010e9c9e13648da',
|
||||||
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '21783',
|
||||||
u'title': u'The Government is Profiling You',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
|
'title': 'The Government is Profiling You',
|
||||||
|
'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -77,7 +86,73 @@ class MITIE(TechTVMITIE):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
|
embed_url = self._search_regex(
|
||||||
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
|
r'<iframe .*?src="(.+?)"', webpage, 'embed url')
|
||||||
u'embed url')
|
|
||||||
return self.url_result(embed_url, ie='TechTVMIT')
|
return self.url_result(embed_url, ie='TechTVMIT')
|
||||||
|
|
||||||
|
|
||||||
|
class OCWMITIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ocw.mit.edu'
|
||||||
|
_VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||||
|
_BASE_URL = 'http://ocw.mit.edu/'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'EObHWIEKGjA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
|
||||||
|
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
|
||||||
|
#'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7K1sB05pE0A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Session 1: Introduction to Derivatives',
|
||||||
|
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
|
||||||
|
#'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
topic = mobj.group('topic')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, topic)
|
||||||
|
title = self._html_search_meta('WT.cg_s', webpage)
|
||||||
|
description = self._html_search_meta('Description', webpage)
|
||||||
|
|
||||||
|
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
|
||||||
|
embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage)
|
||||||
|
if embed_chapter_media:
|
||||||
|
metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
|
||||||
|
metadata = re.split(r', ?', metadata)
|
||||||
|
yt = metadata[1]
|
||||||
|
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
|
||||||
|
else:
|
||||||
|
# search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
|
||||||
|
embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
|
||||||
|
if embed_media:
|
||||||
|
metadata = re.sub(r'[\'"]', '', embed_media.group(1))
|
||||||
|
metadata = re.split(r', ?', metadata)
|
||||||
|
yt = metadata[1]
|
||||||
|
subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find embedded YouTube video.')
|
||||||
|
video_id = YoutubeIE.extract_id(yt)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'url': yt,
|
||||||
|
'url_transparent'
|
||||||
|
'subtitles': subs,
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
}
|
||||||
|
@ -5,18 +5,20 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
compat_urllib_parse,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MixcloudIE(InfoExtractor):
|
class MixcloudIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
||||||
IE_NAME = 'mixcloud'
|
IE_NAME = 'mixcloud'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||||
'file': 'dholbach-cryptkeeper.mp3',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'dholbach-cryptkeeper',
|
||||||
|
'ext': 'mp3',
|
||||||
'title': 'Cryptkeeper',
|
'title': 'Cryptkeeper',
|
||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'Daniel Holbach',
|
||||||
@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
uploader = mobj.group(1)
|
uploader = mobj.group(1)
|
||||||
cloudcast_name = mobj.group(2)
|
cloudcast_name = mobj.group(2)
|
||||||
track_id = '-'.join((uploader, cloudcast_name))
|
track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
|
||||||
|
|
||||||
webpage = self._download_webpage(url, track_id)
|
webpage = self._download_webpage(url, track_id)
|
||||||
|
|
||||||
|
@ -1,19 +1,46 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import find_xpath_attr, compat_str
|
from ..utils import find_xpath_attr, compat_str
|
||||||
|
|
||||||
|
|
||||||
|
class NBCIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
||||||
|
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'u1RInQZRN7QJ',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'I Am a Firefighter',
|
||||||
|
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url')
|
||||||
|
if theplatform_url.startswith('//'):
|
||||||
|
theplatform_url = 'http:' + theplatform_url
|
||||||
|
return self.url_result(theplatform_url)
|
||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(InfoExtractor):
|
class NBCNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
|
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
|
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||||
u'file': u'52753292.flv',
|
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
||||||
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '52753292',
|
||||||
u'title': u'Crew emerges after four-month Mars food study',
|
'ext': 'flv',
|
||||||
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
'title': 'Crew emerges after four-month Mars food study',
|
||||||
|
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -23,10 +50,11 @@ class NBCNewsIE(InfoExtractor):
|
|||||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||||
info = all_info.find('video')
|
info = all_info.find('video')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'title': info.find('headline').text,
|
'id': video_id,
|
||||||
'ext': 'flv',
|
'title': info.find('headline').text,
|
||||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
'ext': 'flv',
|
||||||
'description': compat_str(info.find('caption').text),
|
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
'description': compat_str(info.find('caption').text),
|
||||||
}
|
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||||
|
}
|
||||||
|
@ -1,61 +1,51 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NormalbootsIE(InfoExtractor):
|
class NormalbootsIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
u'file': u'home-alone-games-jontron.mp4',
|
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'home-alone-games-jontron',
|
||||||
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
'ext': 'mp4',
|
||||||
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
'title': 'Home Alone Games - JonTron - NormalBoots',
|
||||||
u'uploader': u'JonTron',
|
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
|
||||||
u'upload_date': u'20140125',
|
'uploader': 'JonTron',
|
||||||
|
'upload_date': '20140125',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'uploader': None,
|
|
||||||
'upload_date': None,
|
|
||||||
}
|
|
||||||
|
|
||||||
if url[:4] != 'http':
|
|
||||||
url = 'http://' + url
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_title = self._og_search_title(webpage)
|
|
||||||
video_description = self._og_search_description(webpage)
|
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
webpage, 'uploader')
|
webpage, 'uploader')
|
||||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
webpage, 'date')
|
webpage, 'date')
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
|
||||||
|
|
||||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||||
player_page = self._download_webpage(player_url, video_id)
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
||||||
|
|
||||||
info['url'] = video_url
|
return {
|
||||||
info['title'] = video_title
|
'id': video_id,
|
||||||
info['description'] = video_description
|
'url': video_url,
|
||||||
info['thumbnail'] = video_thumbnail
|
'title': self._og_search_title(webpage),
|
||||||
info['uploader'] = video_uploader
|
'description': self._og_search_description(webpage),
|
||||||
info['upload_date'] = video_upload_date
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'uploader': video_uploader,
|
||||||
return info
|
'upload_date': video_upload_date,
|
||||||
|
}
|
||||||
|
@ -9,14 +9,25 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NovamovIE(InfoExtractor):
|
class NovaMovIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:(?:www\.)?novamov\.com/video/|(?:(?:embed|www)\.)novamov\.com/embed\.php\?v=)(?P<videoid>[a-z\d]{13})'
|
IE_NAME = 'novamov'
|
||||||
|
IE_DESC = 'NovaMov'
|
||||||
|
|
||||||
|
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
|
||||||
|
|
||||||
|
_HOST = 'www.novamov.com'
|
||||||
|
|
||||||
|
_FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
|
||||||
|
_FILEKEY_REGEX = r'flashvars\.filekey="(?P<filekey>[^"]+)";'
|
||||||
|
_TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
|
||||||
|
_DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.novamov.com/video/4rurhn9x446jj',
|
'url': 'http://www.novamov.com/video/4rurhn9x446jj',
|
||||||
'file': '4rurhn9x446jj.flv',
|
|
||||||
'md5': '7205f346a52bbeba427603ba10d4b935',
|
'md5': '7205f346a52bbeba427603ba10d4b935',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '4rurhn9x446jj',
|
||||||
|
'ext': 'flv',
|
||||||
'title': 'search engine optimization',
|
'title': 'search engine optimization',
|
||||||
'description': 'search engine optimization is used to rank the web page in the google search engine'
|
'description': 'search engine optimization is used to rank the web page in the google search engine'
|
||||||
},
|
},
|
||||||
@ -27,31 +38,26 @@ class NovamovIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
page = self._download_webpage('http://www.novamov.com/video/%s' % video_id,
|
page = self._download_webpage(
|
||||||
video_id, 'Downloading video page')
|
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
|
||||||
|
|
||||||
if re.search(r'This file no longer exists on our servers!</h2>', page) is not None:
|
if re.search(self._FILE_DELETED_REGEX, page) is not None:
|
||||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
filekey = self._search_regex(
|
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
|
||||||
r'flashvars\.filekey="(?P<filekey>[^"]+)";', page, 'filekey')
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
|
||||||
r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>',
|
|
||||||
page, 'title', fatal=False)
|
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
|
||||||
r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>',
|
|
||||||
page, 'description', fatal=False)
|
|
||||||
|
|
||||||
api_response = self._download_webpage(
|
api_response = self._download_webpage(
|
||||||
'http://www.novamov.com/api/player.api.php?key=%s&file=%s' % (filekey, video_id),
|
'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
|
||||||
video_id, 'Downloading video api response')
|
'Downloading video api response')
|
||||||
|
|
||||||
response = compat_urlparse.parse_qs(api_response)
|
response = compat_urlparse.parse_qs(api_response)
|
||||||
|
|
||||||
if 'error_msg' in response:
|
if 'error_msg' in response:
|
||||||
raise ExtractorError('novamov returned error: %s' % response['error_msg'][0], expected=True)
|
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
|
||||||
|
|
||||||
video_url = response['url'][0]
|
video_url = response['url'][0]
|
||||||
|
|
||||||
|
@ -1,46 +1,28 @@
|
|||||||
import re
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .novamov import NovaMovIE
|
||||||
from ..utils import compat_urlparse
|
|
||||||
|
|
||||||
|
|
||||||
class NowVideoIE(InfoExtractor):
|
class NowVideoIE(NovaMovIE):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?nowvideo\.(?:ch|sx)/video/(?P<id>\w+)'
|
IE_NAME = 'nowvideo'
|
||||||
|
IE_DESC = 'NowVideo'
|
||||||
|
|
||||||
|
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
|
||||||
|
|
||||||
|
_HOST = 'www.nowvideo.ch'
|
||||||
|
|
||||||
|
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
|
||||||
|
_FILEKEY_REGEX = r'var fkzd="([^"]+)";'
|
||||||
|
_TITLE_REGEX = r'<h4>([^<]+)</h4>'
|
||||||
|
_DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
|
'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa',
|
||||||
u'file': u'0mw0yow7b6dxa.flv',
|
'md5': 'f8fbbc8add72bd95b7850c6a02fc8817',
|
||||||
u'md5': u'f8fbbc8add72bd95b7850c6a02fc8817',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '0mw0yow7b6dxa',
|
||||||
u"title": u"youtubedl test video _BaW_jenozKc.mp4"
|
'ext': 'flv',
|
||||||
|
'title': 'youtubedl test video _BaW_jenozKc.mp4',
|
||||||
|
'description': 'Description',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage_url = 'http://www.nowvideo.ch/video/' + video_id
|
|
||||||
embed_url = 'http://embed.nowvideo.ch/embed.php?v=' + video_id
|
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
|
||||||
embed_page = self._download_webpage(embed_url, video_id,
|
|
||||||
u'Downloading embed page')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h4>(.*)</h4>',
|
|
||||||
webpage, u'video title')
|
|
||||||
|
|
||||||
video_key = self._search_regex(r'var fkzd="(.*)";',
|
|
||||||
embed_page, u'video key')
|
|
||||||
|
|
||||||
api_call = "http://www.nowvideo.ch/api/player.api.php?file={0}&numOfErrors=0&cid=1&key={1}".format(video_id, video_key)
|
|
||||||
api_response = self._download_webpage(api_call, video_id,
|
|
||||||
u'Downloading API page')
|
|
||||||
video_url = compat_urlparse.parse_qs(api_response)[u'url'][0]
|
|
||||||
|
|
||||||
return [{
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': video_title,
|
|
||||||
}]
|
|
||||||
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -35,7 +36,15 @@ class ORFIE(InfoExtractor):
|
|||||||
data_json = self._search_regex(
|
data_json = self._search_regex(
|
||||||
r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
|
r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
|
||||||
all_data = json.loads(data_json)
|
all_data = json.loads(data_json)
|
||||||
sdata = all_data[0]['values']['segments']
|
|
||||||
|
def get_segments(all_data):
|
||||||
|
for data in all_data:
|
||||||
|
if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
|
||||||
|
return data['values']['segments']
|
||||||
|
|
||||||
|
sdata = get_segments(all_data)
|
||||||
|
if not sdata:
|
||||||
|
raise ExtractorError('Unable to extract segments')
|
||||||
|
|
||||||
def quality_to_int(s):
|
def quality_to_int(s):
|
||||||
m = re.search('([0-9]+)', s)
|
m = re.search('([0-9]+)', s)
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class PodomaticIE(InfoExtractor):
|
class PodomaticIE(InfoExtractor):
|
||||||
@ -9,14 +12,14 @@ class PodomaticIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
_VALID_URL = r'^(?P<proto>https?)://(?P<channel>[^.]+)\.podomatic\.com/entry/(?P<id>[^?]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
"url": "http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00",
|
||||||
u"file": u"2009-01-02T16_03_35-08_00.mp3",
|
"file": "2009-01-02T16_03_35-08_00.mp3",
|
||||||
u"md5": u"84bb855fcf3429e6bf72460e1eed782d",
|
"md5": "84bb855fcf3429e6bf72460e1eed782d",
|
||||||
u"info_dict": {
|
"info_dict": {
|
||||||
u"uploader": u"Science Teaching Tips",
|
"uploader": "Science Teaching Tips",
|
||||||
u"uploader_id": u"scienceteachingtips",
|
"uploader_id": "scienceteachingtips",
|
||||||
u"title": u"64. When the Moon Hits Your Eye",
|
"title": "64. When the Moon Hits Your Eye",
|
||||||
u"duration": 446,
|
"duration": 446,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -36,7 +39,7 @@ class PodomaticIE(InfoExtractor):
|
|||||||
uploader = data['podcast']
|
uploader = data['podcast']
|
||||||
title = data['title']
|
title = data['title']
|
||||||
thumbnail = data['imageLocation']
|
thumbnail = data['imageLocation']
|
||||||
duration = int(data['length'] / 1000.0)
|
duration = int_or_none(data.get('length'), 1000)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
297
youtube_dl/extractor/prosiebensat1.py
Normal file
297
youtube_dl/extractor/prosiebensat1.py
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from hashlib import sha1
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
unified_strdate,
|
||||||
|
clean_html,
|
||||||
|
RegexNotFoundError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ProSiebenSat1IE(InfoExtractor):
|
||||||
|
IE_NAME = 'prosiebensat1'
|
||||||
|
IE_DESC = 'ProSiebenSat.1 Digital'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2104602',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Staffel 2, Episode 18 - Jahresrückblick',
|
||||||
|
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||||
|
'upload_date': '20131231',
|
||||||
|
'duration': 5845.04,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2570327',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lady-Umstyling für Audrina',
|
||||||
|
'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
|
||||||
|
'upload_date': '20131014',
|
||||||
|
'duration': 606.76,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Seems to be broken',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2437108',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Folge 48: Gold Rogers Heimat',
|
||||||
|
'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.',
|
||||||
|
'upload_date': '20140226',
|
||||||
|
'duration': 1401.48,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2904997',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sexy laufen in Ugg Boots',
|
||||||
|
'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
|
||||||
|
'upload_date': '20140122',
|
||||||
|
'duration': 245.32,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2906572',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Im Interview: Kai Wiesinger',
|
||||||
|
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
|
||||||
|
'upload_date': '20140225',
|
||||||
|
'duration': 522.56,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2992323',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
|
||||||
|
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
|
||||||
|
'upload_date': '20140225',
|
||||||
|
'duration': 2410.44,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3004256',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Schalke: Tönnies möchte Raul zurück',
|
||||||
|
'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
|
||||||
|
'upload_date': '20140226',
|
||||||
|
'duration': 228.96,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2572814',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Andreas Kümmert: Rocket Man',
|
||||||
|
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
||||||
|
'upload_date': '20131017',
|
||||||
|
'duration': 469.88,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2156342',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kurztrips zum Valentinstag',
|
||||||
|
'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
|
||||||
|
'upload_date': '20130206',
|
||||||
|
'duration': 307.24,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
_CLIPID_REGEXES = [
|
||||||
|
r'"clip_id"\s*:\s+"(\d+)"',
|
||||||
|
r'clipid: "(\d+)"',
|
||||||
|
]
|
||||||
|
_TITLE_REGEXES = [
|
||||||
|
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||||
|
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
|
||||||
|
r'<!-- start video -->\s*<h1>(.+?)</h1>',
|
||||||
|
r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>',
|
||||||
|
]
|
||||||
|
_DESCRIPTION_REGEXES = [
|
||||||
|
r'<p itemprop="description">\s*(.+?)</p>',
|
||||||
|
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
|
||||||
|
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
|
||||||
|
r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">',
|
||||||
|
]
|
||||||
|
_UPLOAD_DATE_REGEXES = [
|
||||||
|
r'<meta property="og:published_time" content="(.+?)">',
|
||||||
|
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
|
||||||
|
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
|
||||||
|
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
|
||||||
|
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
def extract(patterns, name, page, fatal=False):
|
||||||
|
for pattern in patterns:
|
||||||
|
mobj = re.search(pattern, page)
|
||||||
|
if mobj:
|
||||||
|
return clean_html(mobj.group(1))
|
||||||
|
if fatal:
|
||||||
|
raise RegexNotFoundError(u'Unable to extract %s' % name)
|
||||||
|
return None
|
||||||
|
|
||||||
|
clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True)
|
||||||
|
|
||||||
|
access_token = 'testclient'
|
||||||
|
client_name = 'kolibri-1.2.5'
|
||||||
|
client_location = url
|
||||||
|
|
||||||
|
videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({
|
||||||
|
'access_token': access_token,
|
||||||
|
'client_location': client_location,
|
||||||
|
'client_name': client_name,
|
||||||
|
'ids': clip_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
videos = self._download_json(videos_api_url, clip_id, 'Downloading videos JSON')
|
||||||
|
|
||||||
|
duration = float(videos[0]['duration'])
|
||||||
|
source_ids = [source['id'] for source in videos[0]['sources']]
|
||||||
|
source_ids_str = ','.join(map(str, source_ids))
|
||||||
|
|
||||||
|
g = '01!8d8F_)r9]4s[qeuXfP%'
|
||||||
|
|
||||||
|
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name])
|
||||||
|
.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({
|
||||||
|
'access_token': access_token,
|
||||||
|
'client_id': client_id,
|
||||||
|
'client_location': client_location,
|
||||||
|
'client_name': client_name,
|
||||||
|
}))
|
||||||
|
|
||||||
|
sources = self._download_json(sources_api_url, clip_id, 'Downloading sources JSON')
|
||||||
|
server_id = sources['server_id']
|
||||||
|
|
||||||
|
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id,
|
||||||
|
client_location, source_ids_str, g, client_name])
|
||||||
|
.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({
|
||||||
|
'access_token': access_token,
|
||||||
|
'client_id': client_id,
|
||||||
|
'client_location': client_location,
|
||||||
|
'client_name': client_name,
|
||||||
|
'server_id': server_id,
|
||||||
|
'source_ids': source_ids_str,
|
||||||
|
}))
|
||||||
|
|
||||||
|
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
||||||
|
|
||||||
|
title = extract(self._TITLE_REGEXES, 'title', page, fatal=True)
|
||||||
|
description = extract(self._DESCRIPTION_REGEXES, 'description', page)
|
||||||
|
thumbnail = self._og_search_thumbnail(page)
|
||||||
|
|
||||||
|
upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page)
|
||||||
|
if upload_date:
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
urls_sources = urls['sources']
|
||||||
|
if isinstance(urls_sources, dict):
|
||||||
|
urls_sources = urls_sources.values()
|
||||||
|
|
||||||
|
def fix_bitrate(bitrate):
|
||||||
|
return bitrate / 1000 if bitrate % 1000 == 0 else bitrate
|
||||||
|
|
||||||
|
for source in urls_sources:
|
||||||
|
protocol = source['protocol']
|
||||||
|
if protocol == 'rtmp' or protocol == 'rtmpe':
|
||||||
|
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', source['url'])
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
|
||||||
|
'page_url': 'http://www.prosieben.de',
|
||||||
|
'vbr': fix_bitrate(source['bitrate']),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '%s_%s' % (source['cdn'], source['bitrate']),
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': source['url'],
|
||||||
|
'vbr': fix_bitrate(source['bitrate']),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': clip_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,148 +1,165 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
unified_strdate,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RTLnowIE(InfoExtractor):
|
class RTLnowIE(InfoExtractor):
|
||||||
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
|
"""Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
|
||||||
_VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
|
_VALID_URL = r'''(?x)
|
||||||
_TESTS = [{
|
(?:https?://)?
|
||||||
'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
(?P<url>
|
||||||
'file': '90419.flv',
|
(?P<domain>
|
||||||
'info_dict': {
|
rtl-now\.rtl\.de|
|
||||||
'upload_date': '20070416',
|
rtl2now\.rtl2\.de|
|
||||||
'title': 'Ahornallee - Folge 1 - Der Einzug',
|
(?:www\.)?voxnow\.de|
|
||||||
'description': 'Folge 1 - Der Einzug',
|
(?:www\.)?rtlnitronow\.de|
|
||||||
|
(?:www\.)?superrtlnow\.de|
|
||||||
|
(?:www\.)?n-tvnow\.de)
|
||||||
|
/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
|
||||||
|
(?:container_id|film_id)=(?P<video_id>[0-9]+)&
|
||||||
|
player=1(?:&season=[0-9]+)?(?:&.*)?
|
||||||
|
)'''
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '90419',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Ahornallee - Folge 1 - Der Einzug',
|
||||||
|
'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
|
||||||
|
'upload_date': '20070416',
|
||||||
|
'duration': 1685,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Germany',
|
||||||
},
|
},
|
||||||
'params': {
|
{
|
||||||
'skip_download': True,
|
'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '69756',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
||||||
|
'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
|
||||||
|
'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
||||||
|
'upload_date': '20120519',
|
||||||
|
'duration': 1245,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Germany',
|
||||||
},
|
},
|
||||||
'skip': 'Only works from Germany',
|
{
|
||||||
},
|
'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
||||||
{
|
'info_dict': {
|
||||||
'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
|
'id': '13883',
|
||||||
'file': '69756.flv',
|
'ext': 'flv',
|
||||||
'info_dict': {
|
'title': 'Voxtours - Südafrika-Reporter II',
|
||||||
'upload_date': '20120519',
|
'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
|
||||||
'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...',
|
'upload_date': '20090627',
|
||||||
'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
|
'duration': 1800,
|
||||||
'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
'params': {
|
{
|
||||||
'skip_download': True,
|
'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '99205',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Medicopter 117 - Angst!',
|
||||||
|
'description': 'md5:895b1df01639b5f61a04fc305a5cb94d',
|
||||||
|
'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
|
||||||
|
'upload_date': '20080928',
|
||||||
|
'duration': 2691,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
'skip': 'Only works from Germany',
|
{
|
||||||
},
|
'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0',
|
||||||
{
|
'info_dict': {
|
||||||
'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
|
'id': '153819',
|
||||||
'file': '13883.flv',
|
'ext': 'flv',
|
||||||
'info_dict': {
|
'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner',
|
||||||
'upload_date': '20090627',
|
'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631',
|
||||||
'title': 'Voxtours - Südafrika-Reporter II',
|
'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg',
|
||||||
'description': 'Südafrika-Reporter II',
|
'upload_date': '20140221',
|
||||||
|
'duration': 2429,
|
||||||
|
},
|
||||||
|
'skip': 'Only works from Germany',
|
||||||
},
|
},
|
||||||
'params': {
|
]
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
|
|
||||||
'file': '99205.flv',
|
|
||||||
'info_dict': {
|
|
||||||
'upload_date': '20080928',
|
|
||||||
'title': 'Medicopter 117 - Angst!',
|
|
||||||
'description': 'Angst!',
|
|
||||||
'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
|
|
||||||
'file': '124903.flv',
|
|
||||||
'info_dict': {
|
|
||||||
'upload_date': '20130101',
|
|
||||||
'title': 'Top Gear vom 01.01.2013',
|
|
||||||
'description': 'Episode 1',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Only works from Germany',
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_page_url = 'http://%s/' % mobj.group('domain')
|
||||||
webpage_url = 'http://' + mobj.group('url')
|
|
||||||
video_page_url = 'http://' + mobj.group('domain') + '/'
|
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
|
||||||
|
|
||||||
note_m = re.search(r'''(?sx)
|
mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
|
||||||
<div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
|
if mobj:
|
||||||
<div[ ]id="playerteaser">''', webpage)
|
raise ExtractorError(clean_html(mobj.group(1)), expected=True)
|
||||||
if note_m:
|
|
||||||
msg = clean_html(note_m.group(1))
|
title = self._og_search_title(webpage)
|
||||||
raise ExtractorError(msg)
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
|
||||||
|
|
||||||
|
mobj = re.search(r'<meta itemprop="duration" content="PT(?P<seconds>\d+)S" />', webpage)
|
||||||
|
duration = int(mobj.group('seconds')) if mobj else None
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
|
||||||
r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>',
|
|
||||||
webpage, 'title')
|
|
||||||
playerdata_url = self._html_search_regex(
|
playerdata_url = self._html_search_regex(
|
||||||
r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
|
r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
|
||||||
webpage, 'playerdata_url')
|
|
||||||
|
|
||||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
|
||||||
mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata)
|
|
||||||
if mobj:
|
videoinfo = playerdata.find('./playlist/videoinfo')
|
||||||
video_description = mobj.group('description')
|
|
||||||
if mobj.group('upload_date_Y'):
|
formats = []
|
||||||
video_upload_date = mobj.group('upload_date_Y')
|
for filename in videoinfo.findall('filename'):
|
||||||
elif mobj.group('upload_date_y'):
|
mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
|
||||||
video_upload_date = '20' + mobj.group('upload_date_y')
|
if mobj:
|
||||||
|
fmt = {
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': 'mp4:' + mobj.group('play_path'),
|
||||||
|
'page_url': video_page_url,
|
||||||
|
'player_url': video_page_url + 'includes/vodplayer.swf',
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
video_upload_date = None
|
fmt = {
|
||||||
if video_upload_date:
|
'url': filename.text,
|
||||||
video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d')
|
}
|
||||||
else:
|
fmt.update({
|
||||||
video_description = None
|
'width': int_or_none(filename.get('width')),
|
||||||
video_upload_date = None
|
'height': int_or_none(filename.get('height')),
|
||||||
self._downloader.report_warning('Unable to extract description and upload date')
|
'vbr': int_or_none(filename.get('bitrate')),
|
||||||
|
'ext': 'flv',
|
||||||
# Thumbnail: not every video has an thumbnail
|
})
|
||||||
mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage)
|
formats.append(fmt)
|
||||||
if mobj:
|
|
||||||
video_thumbnail = mobj.group('thumbnail')
|
|
||||||
else:
|
|
||||||
video_thumbnail = None
|
|
||||||
|
|
||||||
mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata)
|
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Unable to extract media URL')
|
|
||||||
video_url = mobj.group('url')
|
|
||||||
video_play_path = 'mp4:' + mobj.group('play_path')
|
|
||||||
video_player_url = video_page_url + 'includes/vodplayer.swf'
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'title': title,
|
||||||
'play_path': video_play_path,
|
'description': description,
|
||||||
'page_url': video_page_url,
|
'thumbnail': thumbnail,
|
||||||
'player_url': video_player_url,
|
'upload_date': upload_date,
|
||||||
'ext': 'flv',
|
'duration': duration,
|
||||||
'title': video_title,
|
'formats': formats,
|
||||||
'description': video_description,
|
|
||||||
'upload_date': video_upload_date,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
}
|
}
|
37
youtube_dl/extractor/savefrom.py
Normal file
37
youtube_dl/extractor/savefrom.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class SaveFromIE(InfoExtractor):
|
||||||
|
IE_NAME = 'savefrom.net'
|
||||||
|
_VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UlVRAPW2WJY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'About Team Radical MMA | MMA Fighting',
|
||||||
|
'upload_date': '20120816',
|
||||||
|
'uploader': 'Howcast',
|
||||||
|
'uploader_id': 'Howcast',
|
||||||
|
'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
}
|
@ -217,7 +217,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
return self._extract_info_dict(info, full_title, secret_token=token)
|
return self._extract_info_dict(info, full_title, secret_token=token)
|
||||||
|
|
||||||
class SoundcloudSetIE(SoundcloudIE):
|
class SoundcloudSetIE(SoundcloudIE):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||||
IE_NAME = 'soundcloud:set'
|
IE_NAME = 'soundcloud:set'
|
||||||
# it's in tests/test_playlists.py
|
# it's in tests/test_playlists.py
|
||||||
_TESTS = []
|
_TESTS = []
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -8,23 +7,27 @@ from ..utils import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
unified_strdate,
|
||||||
|
str_to_int,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import aes_decrypt_text
|
||||||
aes_decrypt_text
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SpankwireIE(InfoExtractor):
|
class SpankwireIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
||||||
'file': '103545.mp4',
|
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
|
||||||
'md5': '1b3f55e345500552dbc252a3e9c1af43',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"uploader": "oreusz",
|
'id': '103545',
|
||||||
"title": "Buckcherry`s X Rated Music Video Crazy Bitch",
|
'ext': 'mp4',
|
||||||
"description": "Crazy Bitch X rated music video.",
|
'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
|
||||||
"age_limit": 18,
|
'description': 'Crazy Bitch X rated music video.',
|
||||||
|
'uploader': 'oreusz',
|
||||||
|
'uploader_id': '124697',
|
||||||
|
'upload_date': '20070508',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(
|
|
||||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
|
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
||||||
|
uploader_id = self._html_search_regex(
|
||||||
|
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
|
||||||
|
upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
|
||||||
|
if upload_date:
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
|
||||||
|
if view_count:
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
|
comment_count = int_or_none(self._html_search_regex(
|
||||||
|
r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
||||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||||
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for video_url in video_urls:
|
for video_url in video_urls:
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
|
||||||
format = path.split('/')[4].split('_')[:2]
|
format = path.split('/')[4].split('_')[:2]
|
||||||
resolution, bitrate_str = format
|
resolution, bitrate_str = format
|
||||||
format = "-".join(format)
|
format = "-".join(format)
|
||||||
height = int(resolution.rstrip('P'))
|
height = int(resolution.rstrip('Pp'))
|
||||||
tbr = int(bitrate_str.rstrip('K'))
|
tbr = int(bitrate_str.rstrip('Kk'))
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': extension,
|
|
||||||
'resolution': resolution,
|
'resolution': resolution,
|
||||||
'format': format,
|
'format': format,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'title': title,
|
||||||
'title': video_title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@ -39,6 +39,8 @@ class TestURLIE(InfoExtractor):
|
|||||||
('Found multiple matching extractors: %s' %
|
('Found multiple matching extractors: %s' %
|
||||||
' '.join(ie.IE_NAME for ie in matching_extractors)),
|
' '.join(ie.IE_NAME for ie in matching_extractors)),
|
||||||
expected=True)
|
expected=True)
|
||||||
|
else:
|
||||||
|
extractor = matching_extractors[0]
|
||||||
|
|
||||||
num_str = mobj.group('num')
|
num_str = mobj.group('num')
|
||||||
num = int(num_str) if num_str else 0
|
num = int(num_str) if num_str else 0
|
||||||
|
@ -13,7 +13,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
|
|||||||
class ThePlatformIE(InfoExtractor):
|
class ThePlatformIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
||||||
(?P<config>[^/\?]+/(?:swf|config)/select/)?
|
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -54,10 +54,15 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
|
|
||||||
f4m_node = body.find(_x('smil:seq/smil:video'))
|
f4m_node = body.find(_x('smil:seq/smil:video'))
|
||||||
if f4m_node is not None:
|
if f4m_node is not None:
|
||||||
|
f4m_url = f4m_node.attrib['src']
|
||||||
|
if 'manifest.f4m?' not in f4m_url:
|
||||||
|
f4m_url += '?'
|
||||||
|
# the parameters are from syfy.com, other sites may use others,
|
||||||
|
# they also work for nbc.com
|
||||||
|
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
|
||||||
formats = [{
|
formats = [{
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
# the parameters are from syfy.com, other sites may use others
|
'url': f4m_url,
|
||||||
'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3',
|
|
||||||
}]
|
}]
|
||||||
else:
|
else:
|
||||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||||
@ -95,9 +100,10 @@ class ThePlatformIE(InfoExtractor):
|
|||||||
if mobj.group('config'):
|
if mobj.group('config'):
|
||||||
config_url = url+ '&form=json'
|
config_url = url+ '&form=json'
|
||||||
config_url = config_url.replace('swf/', 'config/')
|
config_url = config_url.replace('swf/', 'config/')
|
||||||
|
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||||
config_json = self._download_webpage(config_url, video_id, u'Downloading config')
|
config_json = self._download_webpage(config_url, video_id, u'Downloading config')
|
||||||
config = json.loads(config_json)
|
config = json.loads(config_json)
|
||||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4'
|
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||||
else:
|
else:
|
||||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||||
'format=smil&mbr=true'.format(video_id))
|
'format=smil&mbr=true'.format(video_id))
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from youtube_dl.utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class TinyPicIE(InfoExtractor):
|
class TinyPicIE(InfoExtractor):
|
||||||
|
44
youtube_dl/extractor/trutube.py
Normal file
44
youtube_dl/extractor/trutube.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class TruTubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
|
||||||
|
'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14880',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
|
||||||
|
'thumbnail': 're:^http:.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_title = self._og_search_title(webpage).strip()
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
all_formats = re.finditer(
|
||||||
|
r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
|
||||||
|
formats = [{
|
||||||
|
'format_id': m.group('key'),
|
||||||
|
'quality': -i,
|
||||||
|
'url': m.group('url'),
|
||||||
|
} for i, m in enumerate(all_formats)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
84
youtube_dl/extractor/tvigle.py
Normal file
84
youtube_dl/extractor/tvigle.py
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TvigleIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tvigle'
|
||||||
|
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
||||||
|
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
|
||||||
|
'md5': '09afba4616666249f087efc6dcf83cb3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '503081',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Брат 2 ',
|
||||||
|
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
|
||||||
|
'upload_date': '20110919',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
|
||||||
|
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '676433',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
|
||||||
|
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
|
||||||
|
'upload_date': '20121218',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video_data = self._download_xml(
|
||||||
|
'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
|
||||||
|
|
||||||
|
video = video_data.find('./video')
|
||||||
|
|
||||||
|
title = video.get('name')
|
||||||
|
description = video.get('anons')
|
||||||
|
if description:
|
||||||
|
description = clean_html(description)
|
||||||
|
thumbnail = video_data.get('img')
|
||||||
|
upload_date = unified_strdate(video.get('date'))
|
||||||
|
like_count = int_or_none(video.get('vtp'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
|
||||||
|
video_url = video.get(format_id)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'format_note': format_note,
|
||||||
|
'quality': num,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'like_count': like_count,
|
||||||
|
'age_limit': 18,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -4,6 +4,7 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
class VeohIE(InfoExtractor):
|
class VeohIE(InfoExtractor):
|
||||||
@ -24,6 +25,13 @@ class VeohIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
age_limit = 0
|
||||||
|
if 'class="adultwarning-container"' in webpage:
|
||||||
|
self.report_age_confirmation()
|
||||||
|
age_limit = 18
|
||||||
|
request = compat_urllib_request.Request(url)
|
||||||
|
request.add_header('Cookie', 'confirmedAdult=true')
|
||||||
|
webpage = self._download_webpage(request, video_id)
|
||||||
|
|
||||||
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
|
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
|
||||||
if m_youtube is not None:
|
if m_youtube is not None:
|
||||||
@ -44,4 +52,5 @@ class VeohIE(InfoExtractor):
|
|||||||
'thumbnail': info.get('highResImage') or info.get('medResImage'),
|
'thumbnail': info.get('highResImage') or info.get('medResImage'),
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
'view_count': info['views'],
|
'view_count': info['views'],
|
||||||
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
@ -113,8 +113,8 @@ class VestiIE(InfoExtractor):
|
|||||||
priority_transport = playlist['priority_transport']
|
priority_transport = playlist['priority_transport']
|
||||||
|
|
||||||
thumbnail = media['picture']
|
thumbnail = media['picture']
|
||||||
width = media['width']
|
width = int_or_none(media['width'])
|
||||||
height = media['height']
|
height = int_or_none(media['height'])
|
||||||
description = media['anons']
|
description = media['anons']
|
||||||
title = media['title']
|
title = media['title']
|
||||||
duration = int_or_none(media.get('duration'))
|
duration = int_or_none(media.get('duration'))
|
||||||
|
@ -24,9 +24,10 @@ class VevoIE(InfoExtractor):
|
|||||||
(?P<id>[^&?#]+)'''
|
(?P<id>[^&?#]+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
'file': 'GB1101300280.mp4',
|
|
||||||
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'GB1101300280',
|
||||||
|
'ext': 'mp4',
|
||||||
"upload_date": "20130624",
|
"upload_date": "20130624",
|
||||||
"uploader": "Hurts",
|
"uploader": "Hurts",
|
||||||
"title": "Somebody to Die For",
|
"title": "Somebody to Die For",
|
||||||
@ -34,6 +35,33 @@ class VevoIE(InfoExtractor):
|
|||||||
"width": 1920,
|
"width": 1920,
|
||||||
"height": 1080,
|
"height": 1080,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'v3 SMIL format',
|
||||||
|
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||||
|
'md5': '893ec0e0d4426a1d96c01de8f2bdff58',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'USUV71302923',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20140219',
|
||||||
|
'uploader': 'Cassadee Pope',
|
||||||
|
'title': 'I Wish I Could Break Your Heart',
|
||||||
|
'duration': 226.101,
|
||||||
|
'age_limit': 0,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Age-limited video',
|
||||||
|
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'USRV81300282',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'age_limit': 18,
|
||||||
|
'title': 'Tunnel Vision (Explicit)',
|
||||||
|
'uploader': 'Justin Timberlake',
|
||||||
|
'upload_date': '20130704',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'true',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||||
|
|
||||||
@ -105,9 +133,31 @@ class VevoIE(InfoExtractor):
|
|||||||
video_info = self._download_json(json_url, video_id)['video']
|
video_info = self._download_json(json_url, video_id)['video']
|
||||||
|
|
||||||
formats = self._formats_from_json(video_info)
|
formats = self._formats_from_json(video_info)
|
||||||
|
|
||||||
|
is_explicit = video_info.get('isExplicit')
|
||||||
|
if is_explicit is True:
|
||||||
|
age_limit = 18
|
||||||
|
elif is_explicit is False:
|
||||||
|
age_limit = 0
|
||||||
|
else:
|
||||||
|
age_limit = None
|
||||||
|
|
||||||
|
# Download SMIL
|
||||||
|
smil_blocks = sorted((
|
||||||
|
f for f in video_info['videoVersions']
|
||||||
|
if f['sourceType'] == 13),
|
||||||
|
key=lambda f: f['version'])
|
||||||
|
|
||||||
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
|
if smil_blocks:
|
||||||
|
smil_url_m = self._search_regex(
|
||||||
|
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
||||||
|
fatal=False)
|
||||||
|
if smil_url_m is not None:
|
||||||
|
smil_url = smil_url_m
|
||||||
|
|
||||||
try:
|
try:
|
||||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
|
||||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
|
||||||
smil_xml = self._download_webpage(smil_url, video_id,
|
smil_xml = self._download_webpage(smil_url, video_id,
|
||||||
'Downloading SMIL info')
|
'Downloading SMIL info')
|
||||||
formats.extend(self._formats_from_smil(smil_xml))
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
@ -128,4 +178,5 @@ class VevoIE(InfoExtractor):
|
|||||||
'upload_date': upload_date.strftime('%Y%m%d'),
|
'upload_date': upload_date.strftime('%Y%m%d'),
|
||||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||||
'duration': video_info['duration'],
|
'duration': video_info['duration'],
|
||||||
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
80
youtube_dl/extractor/videobam.py
Normal file
80
youtube_dl/extractor/videobam.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class VideoBamIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://videobam.com/OiJQM',
|
||||||
|
'md5': 'db471f27763a531f10416a0c58b5a1e0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OiJQM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Is Alcohol Worse Than Ecstasy?',
|
||||||
|
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
|
||||||
|
'uploader': 'frihetsvinge',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://videobam.com/pqLvq',
|
||||||
|
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
|
||||||
|
'note': 'HD video',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pqLvq',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for preference, format_id in enumerate(['low', 'high']):
|
||||||
|
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': preference,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
|
||||||
|
formats = [{
|
||||||
|
'url': item['url'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
} for item in player_config['playlist'] if 'autoPlay' in item]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(page, default='VideoBam', fatal=False)
|
||||||
|
description = self._og_search_description(page, default=None)
|
||||||
|
thumbnail = self._og_search_thumbnail(page)
|
||||||
|
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
||||||
|
view_count = int_or_none(
|
||||||
|
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@ -221,7 +221,9 @@ class VimeoIE(SubtitlesInfoExtractor):
|
|||||||
# Extract video thumbnail
|
# Extract video thumbnail
|
||||||
video_thumbnail = config["video"].get("thumbnail")
|
video_thumbnail = config["video"].get("thumbnail")
|
||||||
if video_thumbnail is None:
|
if video_thumbnail is None:
|
||||||
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
|
video_thumbs = config["video"].get("thumbs")
|
||||||
|
if video_thumbs and isinstance(video_thumbs, dict):
|
||||||
|
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||||
|
|
||||||
# Extract video description
|
# Extract video description
|
||||||
video_description = None
|
video_description = None
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
class VineIE(InfoExtractor):
|
class VineIE(InfoExtractor):
|
||||||
@ -13,31 +15,46 @@ class VineIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b9KOOWX7HUx',
|
'id': 'b9KOOWX7HUx',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'Jack Dorsey',
|
|
||||||
'title': 'Chicken.',
|
'title': 'Chicken.',
|
||||||
|
'description': 'Chicken.',
|
||||||
|
'upload_date': '20130519',
|
||||||
|
'uploader': 'Jack Dorsey',
|
||||||
|
'uploader_id': '76',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage_url = 'https://vine.co/v/' + video_id
|
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
|
||||||
|
|
||||||
video_url = self._html_search_meta('twitter:player:stream', webpage,
|
data = json.loads(self._html_search_regex(
|
||||||
'video URL')
|
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
formats = [
|
||||||
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
{
|
||||||
|
'url': data['videoLowURL'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'low',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': data['videoUrl'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'standard',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'description': data['description'],
|
||||||
'uploader': uploader,
|
'thumbnail': data['thumbnailUrl'],
|
||||||
|
'upload_date': unified_strdate(data['created']),
|
||||||
|
'uploader': data['username'],
|
||||||
|
'uploader_id': data['userIdStr'],
|
||||||
|
'like_count': data['likes']['count'],
|
||||||
|
'comment_count': data['comments']['count'],
|
||||||
|
'repost_count': data['reposts']['count'],
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class VKIE(InfoExtractor):
|
class VKIE(InfoExtractor):
|
||||||
IE_NAME = 'vk.com'
|
IE_NAME = 'vk.com'
|
||||||
_VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
|
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
||||||
_NETRC_MACHINE = 'vk'
|
_NETRC_MACHINE = 'vk'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -42,6 +42,18 @@ class VKIE(InfoExtractor):
|
|||||||
'duration': 558,
|
'duration': 558,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'note': 'Embedded video',
|
||||||
|
'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
|
||||||
|
'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '162925554',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Vladimir Gavrin',
|
||||||
|
'title': 'Lin Dan',
|
||||||
|
'duration': 101,
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vk.com/video-8871596_164049491',
|
'url': 'http://vk.com/video-8871596_164049491',
|
||||||
'md5': 'a590bcaf3d543576c9bd162812387666',
|
'md5': 'a590bcaf3d543576c9bd162812387666',
|
||||||
@ -54,7 +66,7 @@ class VKIE(InfoExtractor):
|
|||||||
'duration': 8352,
|
'duration': 8352,
|
||||||
},
|
},
|
||||||
'skip': 'Requires vk account credentials',
|
'skip': 'Requires vk account credentials',
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@ -82,7 +94,10 @@ class VKIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
if not video_id:
|
||||||
|
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
|
||||||
|
|
||||||
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
|
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
|
||||||
info_page = self._download_webpage(info_url, video_id)
|
info_page = self._download_webpage(info_url, video_id)
|
||||||
|
@ -22,7 +22,7 @@ class WorldStarHipHopIE(InfoExtractor):
|
|||||||
webpage_src = self._download_webpage(url, video_id)
|
webpage_src = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
||||||
webpage_src)
|
webpage_src)
|
||||||
|
|
||||||
if m_vevo_id is not None:
|
if m_vevo_id is not None:
|
||||||
self.to_screen(u'Vevo video detected:')
|
self.to_screen(u'Vevo video detected:')
|
||||||
|
@ -103,6 +103,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
if not hd:
|
if not hd:
|
||||||
|
mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url')
|
||||||
webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
|
webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
|
||||||
if is_hd(webpage):
|
if is_hd(webpage):
|
||||||
video_url = extract_video_url(webpage)
|
video_url = extract_video_url(webpage)
|
||||||
|
@ -7,19 +7,24 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
parse_duration,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class XTubeIE(InfoExtractor):
|
class XTubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
||||||
'file': 'kVTUy_G222_.mp4',
|
|
||||||
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "strange erotica",
|
'id': 'kVTUy_G222_',
|
||||||
"description": "surreal gay themed erotica...almost an ET kind of thing",
|
'ext': 'mp4',
|
||||||
"uploader": "greenshowers",
|
'title': 'strange erotica',
|
||||||
"age_limit": 18,
|
'description': 'surreal gay themed erotica...almost an ET kind of thing',
|
||||||
|
'uploader': 'greenshowers',
|
||||||
|
'duration': 450,
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
video_uploader = self._html_search_regex(
|
||||||
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False)
|
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
||||||
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
video_description = self._html_search_regex(
|
||||||
|
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
|
||||||
|
video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
|
||||||
|
if view_count:
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
|
||||||
|
if comment_count:
|
||||||
|
comment_count = str_to_int(comment_count)
|
||||||
|
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
extension = os.path.splitext(path)[1][1:]
|
||||||
format = path.split('/')[5].split('_')[:2]
|
format = path.split('/')[5].split('_')[:2]
|
||||||
@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor):
|
|||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': extension,
|
'ext': extension,
|
||||||
'format': format,
|
'format': format,
|
||||||
|
@ -29,7 +29,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
PagedList,
|
PagedList,
|
||||||
RegexNotFoundError,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
@ -200,9 +199,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
|
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
|
||||||
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
|
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
|
||||||
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
|
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
|
||||||
'138': {'ext': 'mp4', 'height': 1081, 'resolution': '>1080p', 'format_note': 'DASH video', 'preference': -40},
|
'138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40},
|
||||||
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
|
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
|
||||||
'264': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
|
'264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40},
|
||||||
|
|
||||||
# Dash mp4 audio
|
# Dash mp4 audio
|
||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||||
@ -1489,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
# the id of the playlist is just 'RD' + video_id
|
# the id of the playlist is just 'RD' + video_id
|
||||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
|
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
|
||||||
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
||||||
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
|
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
|
||||||
get_element_by_attribute('class', 'title ', webpage))
|
title_span = (search_title('playlist-title') or
|
||||||
|
search_title('title long-title') or search_title('title'))
|
||||||
title = clean_html(title_span)
|
title = clean_html(title_span)
|
||||||
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id)
|
video_re = r'''(?x)data-video-username="(.*?)".*?
|
||||||
ids = orderedSet(re.findall(video_re, webpage))
|
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
|
||||||
|
matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
|
||||||
|
# Some of the videos may have been deleted, their username field is empty
|
||||||
|
ids = [video_id for (username, video_id) in matches if username]
|
||||||
url_results = self._ids_to_results(ids)
|
url_results = self._ids_to_results(ids)
|
||||||
|
|
||||||
return self.playlist_result(url_results, playlist_id, title)
|
return self.playlist_result(url_results, playlist_id, title)
|
||||||
@ -1642,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
||||||
_GDATA_PAGE_SIZE = 50
|
_GDATA_PAGE_SIZE = 50
|
||||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||||
@ -1741,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
|||||||
for video_id in video_ids]
|
for video_id in video_ids]
|
||||||
return self.playlist_result(videos, query)
|
return self.playlist_result(videos, query)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||||
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
|
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
|
||||||
_SEARCH_KEY = 'ytsearchdate'
|
_SEARCH_KEY = 'ytsearchdate'
|
||||||
IE_DESC = u'YouTube.com searches, newest videos first'
|
IE_DESC = u'YouTube.com searches, newest videos first'
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeSearchURLIE(InfoExtractor):
|
||||||
|
IE_DESC = u'YouTube.com search URLs'
|
||||||
|
IE_NAME = u'youtube:search_url'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
query = compat_urllib_parse.unquote_plus(mobj.group('query'))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, query)
|
||||||
|
result_code = self._search_regex(
|
||||||
|
r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
|
||||||
|
|
||||||
|
part_codes = re.findall(
|
||||||
|
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
|
||||||
|
entries = []
|
||||||
|
for part_code in part_codes:
|
||||||
|
part_title = self._html_search_regex(
|
||||||
|
r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
|
||||||
|
part_url_snippet = self._html_search_regex(
|
||||||
|
r'(?s)href="([^"]+)"', part_code, 'item URL')
|
||||||
|
part_url = compat_urlparse.urljoin(
|
||||||
|
'https://www.youtube.com/', part_url_snippet)
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': part_url,
|
||||||
|
'title': part_title,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': query,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubeShowIE(InfoExtractor):
|
class YoutubeShowIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com (multi-season) shows'
|
IE_DESC = u'YouTube.com (multi-season) shows'
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
|
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
|
'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
|
||||||
u"file": u"2037704.webm",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '2037704',
|
||||||
u"upload_date": u"20131127",
|
'ext': 'webm',
|
||||||
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
|
'title': 'ZDFspezial - Ende des Machtpokers',
|
||||||
u"uploader": u"spezial",
|
'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
|
||||||
u"title": u"ZDFspezial - Ende des Machtpokers"
|
'duration': 1022,
|
||||||
|
'uploader': 'spezial',
|
||||||
|
'uploader_id': '225948',
|
||||||
|
'upload_date': '20131127',
|
||||||
},
|
},
|
||||||
u"skip": u"Videos on ZDF.de are depublicised in short order",
|
'skip': 'Videos on ZDF.de are depublicised in short order',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
doc = self._download_xml(
|
doc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading video info',
|
note='Downloading video info',
|
||||||
errnote=u'Failed to download video info')
|
errnote='Failed to download video info')
|
||||||
|
|
||||||
title = doc.find('.//information/title').text
|
title = doc.find('.//information/title').text
|
||||||
description = doc.find('.//information/detail').text
|
description = doc.find('.//information/detail').text
|
||||||
|
duration = int(doc.find('.//details/lengthSec').text)
|
||||||
uploader_node = doc.find('.//details/originChannelTitle')
|
uploader_node = doc.find('.//details/originChannelTitle')
|
||||||
uploader = None if uploader_node is None else uploader_node.text
|
uploader = None if uploader_node is None else uploader_node.text
|
||||||
duration_str = doc.find('.//details/length').text
|
uploader_id_node = doc.find('.//details/originChannelId')
|
||||||
duration_m = re.match(r'''(?x)^
|
uploader_id = None if uploader_id_node is None else uploader_id_node.text
|
||||||
(?P<hours>[0-9]{2})
|
|
||||||
:(?P<minutes>[0-9]{2})
|
|
||||||
:(?P<seconds>[0-9]{2})
|
|
||||||
(?:\.(?P<ms>[0-9]+)?)
|
|
||||||
''', duration_str)
|
|
||||||
duration = (
|
|
||||||
(
|
|
||||||
(int(duration_m.group('hours')) * 60 * 60) +
|
|
||||||
(int(duration_m.group('minutes')) * 60) +
|
|
||||||
int(duration_m.group('seconds'))
|
|
||||||
)
|
|
||||||
if duration_m
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
upload_date = unified_strdate(doc.find('.//details/airtime').text)
|
upload_date = unified_strdate(doc.find('.//details/airtime').text)
|
||||||
|
|
||||||
def xml_to_format(fnode):
|
def xml_to_format(fnode):
|
||||||
video_url = fnode.find('url').text
|
video_url = fnode.find('url').text
|
||||||
is_available = u'http://www.metafilegenerator' not in video_url
|
is_available = 'http://www.metafilegenerator' not in video_url
|
||||||
|
|
||||||
format_id = fnode.attrib['basetype']
|
format_id = fnode.attrib['basetype']
|
||||||
format_m = re.match(r'''(?x)
|
format_m = re.match(r'''(?x)
|
||||||
@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor):
|
|||||||
|
|
||||||
quality = fnode.find('./quality').text
|
quality = fnode.find('./quality').text
|
||||||
abr = int(fnode.find('./audioBitrate').text) // 1000
|
abr = int(fnode.find('./audioBitrate').text) // 1000
|
||||||
vbr = int(fnode.find('./videoBitrate').text) // 1000
|
vbr_node = fnode.find('./videoBitrate')
|
||||||
|
vbr = None if vbr_node is None else int(vbr_node.text) // 1000
|
||||||
|
|
||||||
format_note = u''
|
width_node = fnode.find('./width')
|
||||||
|
width = None if width_node is None else int_or_none(width_node.text)
|
||||||
|
height_node = fnode.find('./height')
|
||||||
|
height = None if height_node is None else int_or_none(height_node.text)
|
||||||
|
|
||||||
|
format_note = ''
|
||||||
if not format_note:
|
if not format_note:
|
||||||
format_note = None
|
format_note = None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'format_id': format_id + u'-' + quality,
|
'format_id': format_id + '-' + quality,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'acodec': format_m.group('acodec'),
|
'acodec': format_m.group('acodec'),
|
||||||
'vcodec': format_m.group('vcodec'),
|
'vcodec': format_m.group('vcodec'),
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'width': int_or_none(fnode.find('./width').text),
|
'width': width,
|
||||||
'height': int_or_none(fnode.find('./height').text),
|
'height': height,
|
||||||
'filesize': int_or_none(fnode.find('./filesize').text),
|
'filesize': int_or_none(fnode.find('./filesize').text),
|
||||||
'format_note': format_note,
|
'format_note': format_note,
|
||||||
'protocol': proto,
|
'protocol': proto,
|
||||||
@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import ctypes
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
import email.utils
|
import email.utils
|
||||||
@ -771,6 +772,7 @@ def unified_strdate(date_str):
|
|||||||
'%B %d %Y',
|
'%B %d %Y',
|
||||||
'%b %d %Y',
|
'%b %d %Y',
|
||||||
'%Y-%m-%d',
|
'%Y-%m-%d',
|
||||||
|
'%d.%m.%Y',
|
||||||
'%d/%m/%Y',
|
'%d/%m/%Y',
|
||||||
'%Y/%m/%d %H:%M:%S',
|
'%Y/%m/%d %H:%M:%S',
|
||||||
'%Y-%m-%d %H:%M:%S',
|
'%Y-%m-%d %H:%M:%S',
|
||||||
@ -779,6 +781,7 @@ def unified_strdate(date_str):
|
|||||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||||
'%Y-%m-%dT%H:%M:%S',
|
'%Y-%m-%dT%H:%M:%S',
|
||||||
|
'%Y-%m-%dT%H:%M:%S.%f',
|
||||||
'%Y-%m-%dT%H:%M',
|
'%Y-%m-%dT%H:%M',
|
||||||
]
|
]
|
||||||
for expression in format_expressions:
|
for expression in format_expressions:
|
||||||
@ -1244,3 +1247,19 @@ except TypeError:
|
|||||||
else:
|
else:
|
||||||
struct_pack = struct.pack
|
struct_pack = struct.pack
|
||||||
struct_unpack = struct.unpack
|
struct_unpack = struct.unpack
|
||||||
|
|
||||||
|
|
||||||
|
def read_batch_urls(batch_fd):
|
||||||
|
def fixup(url):
|
||||||
|
if not isinstance(url, compat_str):
|
||||||
|
url = url.decode('utf-8', 'replace')
|
||||||
|
BOM_UTF8 = u'\xef\xbb\xbf'
|
||||||
|
if url.startswith(BOM_UTF8):
|
||||||
|
url = url[len(BOM_UTF8):]
|
||||||
|
url = url.strip()
|
||||||
|
if url.startswith(('#', ';', ']')):
|
||||||
|
return False
|
||||||
|
return url
|
||||||
|
|
||||||
|
with contextlib.closing(batch_fd) as fd:
|
||||||
|
return [url for url in map(fixup, fd) if url]
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.02.21.1'
|
__version__ = '2014.03.04.2'
|
||||||
|
Reference in New Issue
Block a user