Compare commits
110 Commits
2014.08.22
...
2014.08.27
Author | SHA1 | Date | |
---|---|---|---|
|
a7680bf330 | ||
|
6d3d3fc083 | ||
|
aff216edf4 | ||
|
1cb6dcdbbe | ||
|
3f514a353e | ||
|
da9ec3b932 | ||
|
191b7cbba9 | ||
|
e8c59b9642 | ||
|
6abb066128 | ||
|
8f1ea7cbb6 | ||
|
a204c85408 | ||
|
15a1f4b8fe | ||
|
c7bee2a725 | ||
|
dbc1366b50 | ||
|
704df56da7 | ||
|
33ac271ba7 | ||
|
0963f92f23 | ||
|
9a66c1079c | ||
|
f971dcbba0 | ||
|
0990305d2a | ||
|
bcc069a937 | ||
|
34708e1bb6 | ||
|
829476b80a | ||
|
1dd70fe330 | ||
|
067e922295 | ||
|
c28df2478f | ||
|
241f7a8ade | ||
|
b252735910 | ||
|
7adcbe7594 | ||
|
8d31fa3cce | ||
|
1f06864e9a | ||
|
348ae0a79e | ||
|
528d455632 | ||
|
ba5d51b340 | ||
|
7833d941bb | ||
|
a2360a4c80 | ||
|
a7cacbca2b | ||
|
c6b4132a0a | ||
|
ad260c90ab | ||
|
b8313f07bc | ||
|
92a17d28ac | ||
|
5f90042bd6 | ||
|
9480d1a566 | ||
|
36b0079f23 | ||
|
28028629b9 | ||
|
11f75cac3d | ||
|
e673db0194 | ||
|
ebab4520ff | ||
|
a71d1414eb | ||
|
423817c468 | ||
|
51ed9fce09 | ||
|
d43aeb1d00 | ||
|
4d805e063c | ||
|
24e5e24166 | ||
|
4d54ef20a2 | ||
|
54036b3991 | ||
|
e5402ac120 | ||
|
f56f8399c7 | ||
|
cf0c5fa3a1 | ||
|
8c2ccefae6 | ||
|
1f8b6af773 | ||
|
8f9b683eeb | ||
|
b5f4775b38 | ||
|
01d906ffe9 | ||
|
614582bcc4 | ||
|
e1ab5000b2 | ||
|
a5ed3e571e | ||
|
10eaeb20c5 | ||
|
fa8deaf38b | ||
|
6857590059 | ||
|
a3db22ebdf | ||
|
c8e9a235d9 | ||
|
30b871b0ca | ||
|
eb9da9b732 | ||
|
d769be6c96 | ||
|
a54bda3ae2 | ||
|
00558d9414 | ||
|
49f3c16543 | ||
|
2ef6fcb5d8 | ||
|
38fc045253 | ||
|
af1fd929c6 | ||
|
b7b04c9234 | ||
|
bc0bb6fd30 | ||
|
430826c9d4 | ||
|
68909f0c4e | ||
|
9d048a17d8 | ||
|
492641d10a | ||
|
2b9faf5542 | ||
|
ed2d6a1960 | ||
|
be843678b1 | ||
|
c71dfccc98 | ||
|
1a9ccac7c1 | ||
|
e330d59abb | ||
|
394df6d7d0 | ||
|
218f754940 | ||
|
a053c3493a | ||
|
50b294aab8 | ||
|
756b046f3e | ||
|
165250ff5e | ||
|
83317f6938 | ||
|
8c778adc39 | ||
|
71b6065009 | ||
|
c065fd35ae | ||
|
37e3cbe22e | ||
|
610134730a | ||
|
212a5e28ba | ||
|
ee1a7032d5 | ||
|
7ed806d241 | ||
|
dd06c95e43 | ||
|
3442b30ab2 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -26,5 +26,6 @@ updates_key.pem
|
|||||||
*.m4a
|
*.m4a
|
||||||
*.m4v
|
*.m4v
|
||||||
*.part
|
*.part
|
||||||
|
*.swp
|
||||||
test/testdata
|
test/testdata
|
||||||
.tox
|
.tox
|
||||||
|
@@ -255,6 +255,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
## Authentication Options:
|
## Authentication Options:
|
||||||
-u, --username USERNAME account username
|
-u, --username USERNAME account username
|
||||||
-p, --password PASSWORD account password
|
-p, --password PASSWORD account password
|
||||||
|
-2, --twofactor TWOFACTOR two-factor auth code
|
||||||
-n, --netrc use .netrc authentication data
|
-n, --netrc use .netrc authentication data
|
||||||
--video-password PASSWORD video password (vimeo, smotri)
|
--video-password PASSWORD video password (vimeo, smotri)
|
||||||
|
|
||||||
@@ -287,6 +288,10 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
postprocessors (default)
|
postprocessors (default)
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||||
postprocessors
|
postprocessors
|
||||||
|
--exec CMD Execute a command on the file after
|
||||||
|
downloading, similar to find's -exec
|
||||||
|
syntax. Example: --exec 'adb push {}
|
||||||
|
/sdcard/Music/ && rm {}'
|
||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
@@ -429,6 +434,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
'id': '42',
|
'id': '42',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video title goes here',
|
'title': 'Video title goes here',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
# TODO more properties, either as:
|
# TODO more properties, either as:
|
||||||
# * A value
|
# * A value
|
||||||
# * MD5 checksum; start the string with md5:
|
# * MD5 checksum; start the string with md5:
|
||||||
|
@@ -102,7 +102,10 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
match_rex = re.compile(match_str)
|
match_rex = re.compile(match_str)
|
||||||
|
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
isinstance(got, compat_str) and match_rex.match(got),
|
isinstance(got, compat_str),
|
||||||
|
'Expected a %r object, but got %r' % (compat_str, type(got)))
|
||||||
|
self.assertTrue(
|
||||||
|
match_rex.match(got),
|
||||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
elif isinstance(expected, type):
|
elif isinstance(expected, type):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
|
@@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
'138', '137', '248', '136', '247', '135', '246',
|
'138', '137', '248', '136', '247', '135', '246',
|
||||||
'245', '244', '134', '243', '133', '242', '160',
|
'245', '244', '134', '243', '133', '242', '160',
|
||||||
# Dash audio
|
# Dash audio
|
||||||
'141', '172', '140', '139', '171',
|
'141', '172', '140', '171', '139',
|
||||||
]
|
]
|
||||||
|
|
||||||
for f1id, f2id in zip(order, order[1:]):
|
for f1id, f2id in zip(order, order[1:]):
|
||||||
|
@@ -7,6 +7,7 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
|
assertGreaterEqual,
|
||||||
get_params,
|
get_params,
|
||||||
gettestcases,
|
gettestcases,
|
||||||
expect_info_dict,
|
expect_info_dict,
|
||||||
@@ -136,12 +137,18 @@ def generator(test_case):
|
|||||||
self.assertEqual(res_dict['_type'], 'playlist')
|
self.assertEqual(res_dict['_type'], 'playlist')
|
||||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
self.assertGreaterEqual(
|
assertGreaterEqual(
|
||||||
|
self,
|
||||||
len(res_dict['entries']),
|
len(res_dict['entries']),
|
||||||
test_case['playlist_mincount'],
|
test_case['playlist_mincount'],
|
||||||
'Expected at least %d in playlist %s, but got only %d' % (
|
'Expected at least %d in playlist %s, but got only %d' % (
|
||||||
test_case['playlist_mincount'], test_case['url'],
|
test_case['playlist_mincount'], test_case['url'],
|
||||||
len(res_dict['entries'])))
|
len(res_dict['entries'])))
|
||||||
|
if 'playlist_count' in test_case:
|
||||||
|
self.assertEqual(
|
||||||
|
len(res_dict['entries']),
|
||||||
|
test_case['playlist_count'],
|
||||||
|
'Expected at %d in playlist %s, but got %d.')
|
||||||
|
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
tc_filename = get_tc_filename(tc)
|
tc_filename = get_tc_filename(tc)
|
||||||
|
@@ -62,6 +62,7 @@ from youtube_dl.extractor import (
|
|||||||
InstagramUserIE,
|
InstagramUserIE,
|
||||||
CSpanIE,
|
CSpanIE,
|
||||||
AolIE,
|
AolIE,
|
||||||
|
GameOnePlaylistIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -309,24 +310,6 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
||||||
self.assertEqual(len(result['entries']), 3)
|
self.assertEqual(len(result['entries']), 3)
|
||||||
|
|
||||||
def test_GoogleSearch(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = GoogleSearchIE(dl)
|
|
||||||
result = ie.extract('gvsearch15:python language')
|
|
||||||
self.assertIsPlaylist(result)
|
|
||||||
self.assertEqual(result['id'], 'python language')
|
|
||||||
self.assertEqual(result['title'], 'python language')
|
|
||||||
self.assertEqual(len(result['entries']), 15)
|
|
||||||
|
|
||||||
def test_generic_rss_feed(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = GenericIE(dl)
|
|
||||||
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
|
|
||||||
self.assertIsPlaylist(result)
|
|
||||||
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
|
|
||||||
self.assertEqual(result['title'], 'Zero Punctuation')
|
|
||||||
self.assertTrue(len(result['entries']) > 10)
|
|
||||||
|
|
||||||
def test_ted_playlist(self):
|
def test_ted_playlist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = TEDIE(dl)
|
ie = TEDIE(dl)
|
||||||
@@ -407,5 +390,6 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], 'rbhagwati2')
|
self.assertEqual(result['id'], 'rbhagwati2')
|
||||||
assertGreaterEqual(self, len(result['entries']), 179)
|
assertGreaterEqual(self, len(result['entries']), 179)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -219,6 +219,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_duration('0h0m0s'), 0)
|
self.assertEqual(parse_duration('0h0m0s'), 0)
|
||||||
self.assertEqual(parse_duration('0m0s'), 0)
|
self.assertEqual(parse_duration('0m0s'), 0)
|
||||||
self.assertEqual(parse_duration('0s'), 0)
|
self.assertEqual(parse_duration('0s'), 0)
|
||||||
|
self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
|
||||||
|
|
||||||
def test_fix_xml_ampersands(self):
|
def test_fix_xml_ampersands(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@@ -172,6 +172,7 @@ class YoutubeDL(object):
|
|||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||||
otherwise prefer avconv.
|
otherwise prefer avconv.
|
||||||
|
exec_cmd: Arbitrary command to run after downloading
|
||||||
"""
|
"""
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
@@ -424,7 +425,7 @@ class YoutubeDL(object):
|
|||||||
autonumber_templ = '%0' + str(autonumber_size) + 'd'
|
autonumber_templ = '%0' + str(autonumber_size) + 'd'
|
||||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||||
if template_dict.get('playlist_index') is not None:
|
if template_dict.get('playlist_index') is not None:
|
||||||
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
|
||||||
if template_dict.get('resolution') is None:
|
if template_dict.get('resolution') is None:
|
||||||
if template_dict.get('width') and template_dict.get('height'):
|
if template_dict.get('width') and template_dict.get('height'):
|
||||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||||
@@ -480,7 +481,10 @@ class YoutubeDL(object):
|
|||||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
age_limit = self.params.get('age_limit')
|
age_limit = self.params.get('age_limit')
|
||||||
if age_limit is not None:
|
if age_limit is not None:
|
||||||
if age_limit < info_dict.get('age_limit', 0):
|
actual_age_limit = info_dict.get('age_limit')
|
||||||
|
if actual_age_limit is None:
|
||||||
|
actual_age_limit = 0
|
||||||
|
if age_limit < actual_age_limit:
|
||||||
return 'Skipping "' + title + '" because it is age restricted'
|
return 'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return '%s has already been recorded in archive' % video_title
|
return '%s has already been recorded in archive' % video_title
|
||||||
@@ -633,6 +637,7 @@ class YoutubeDL(object):
|
|||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
||||||
extra = {
|
extra = {
|
||||||
|
'n_entries': n_entries,
|
||||||
'playlist': playlist,
|
'playlist': playlist,
|
||||||
'playlist_index': i + playliststart,
|
'playlist_index': i + playliststart,
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
|
@@ -71,6 +71,9 @@ __authors__ = (
|
|||||||
'Sebastian Haas',
|
'Sebastian Haas',
|
||||||
'Alexander Kirk',
|
'Alexander Kirk',
|
||||||
'Erik Johnson',
|
'Erik Johnson',
|
||||||
|
'Keith Beckman',
|
||||||
|
'Ole Ernst',
|
||||||
|
'Aaron McDaniel (mcd1992)',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@@ -117,6 +120,7 @@ from .postprocessor import (
|
|||||||
FFmpegExtractAudioPP,
|
FFmpegExtractAudioPP,
|
||||||
FFmpegEmbedSubtitlePP,
|
FFmpegEmbedSubtitlePP,
|
||||||
XAttrMetadataPP,
|
XAttrMetadataPP,
|
||||||
|
ExecAfterDownloadPP,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -316,6 +320,8 @@ def parseOpts(overrideArguments=None):
|
|||||||
dest='username', metavar='USERNAME', help='account username')
|
dest='username', metavar='USERNAME', help='account username')
|
||||||
authentication.add_option('-p', '--password',
|
authentication.add_option('-p', '--password',
|
||||||
dest='password', metavar='PASSWORD', help='account password')
|
dest='password', metavar='PASSWORD', help='account password')
|
||||||
|
authentication.add_option('-2', '--twofactor',
|
||||||
|
dest='twofactor', metavar='TWOFACTOR', help='two-factor auth code')
|
||||||
authentication.add_option('-n', '--netrc',
|
authentication.add_option('-n', '--netrc',
|
||||||
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
|
action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
|
||||||
authentication.add_option('--video-password',
|
authentication.add_option('--video-password',
|
||||||
@@ -546,7 +552,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
help='Prefer avconv over ffmpeg for running the postprocessors (default)')
|
help='Prefer avconv over ffmpeg for running the postprocessors (default)')
|
||||||
postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
|
postproc.add_option('--prefer-ffmpeg', action='store_true', dest='prefer_ffmpeg',
|
||||||
help='Prefer ffmpeg over avconv for running the postprocessors')
|
help='Prefer ffmpeg over avconv for running the postprocessors')
|
||||||
|
postproc.add_option(
|
||||||
|
'--exec', metavar='CMD', dest='exec_cmd',
|
||||||
|
help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'' )
|
||||||
|
|
||||||
parser.add_option_group(general)
|
parser.add_option_group(general)
|
||||||
parser.add_option_group(selection)
|
parser.add_option_group(selection)
|
||||||
@@ -750,6 +758,7 @@ def _real_main(argv=None):
|
|||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
'username': opts.username,
|
'username': opts.username,
|
||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
|
'twofactor': opts.twofactor,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
'quiet': (opts.quiet or any_printing),
|
'quiet': (opts.quiet or any_printing),
|
||||||
'no_warnings': opts.no_warnings,
|
'no_warnings': opts.no_warnings,
|
||||||
@@ -826,6 +835,7 @@ def _real_main(argv=None):
|
|||||||
'default_search': opts.default_search,
|
'default_search': opts.default_search,
|
||||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||||
'encoding': opts.encoding,
|
'encoding': opts.encoding,
|
||||||
|
'exec_cmd': opts.exec_cmd,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
@@ -849,6 +859,13 @@ def _real_main(argv=None):
|
|||||||
ydl.add_post_processor(FFmpegAudioFixPP())
|
ydl.add_post_processor(FFmpegAudioFixPP())
|
||||||
ydl.add_post_processor(AtomicParsleyPP())
|
ydl.add_post_processor(AtomicParsleyPP())
|
||||||
|
|
||||||
|
|
||||||
|
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||||
|
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||||
|
if opts.exec_cmd:
|
||||||
|
ydl.add_post_processor(ExecAfterDownloadPP(
|
||||||
|
verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
|
||||||
|
|
||||||
# Update version
|
# Update version
|
||||||
if opts.update_self:
|
if opts.update_self:
|
||||||
update_self(ydl.to_screen, opts.verbose)
|
update_self(ydl.to_screen, opts.verbose)
|
||||||
|
@@ -27,8 +27,16 @@ class HttpFD(FileDownloader):
|
|||||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||||
if 'http_referer' in info_dict:
|
if 'http_referer' in info_dict:
|
||||||
headers['Referer'] = info_dict['http_referer']
|
headers['Referer'] = info_dict['http_referer']
|
||||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
add_headers = info_dict.get('http_headers')
|
||||||
request = compat_urllib_request.Request(url, None, headers)
|
if add_headers:
|
||||||
|
headers.update(add_headers)
|
||||||
|
data = info_dict.get('http_post_data')
|
||||||
|
http_method = info_dict.get('http_method')
|
||||||
|
basic_request = compat_urllib_request.Request(url, data, headers)
|
||||||
|
request = compat_urllib_request.Request(url, data, headers)
|
||||||
|
if http_method is not None:
|
||||||
|
basic_request.get_method = lambda: http_method
|
||||||
|
request.get_method = lambda: http_method
|
||||||
|
|
||||||
is_test = self.params.get('test', False)
|
is_test = self.params.get('test', False)
|
||||||
|
|
||||||
|
@@ -9,7 +9,7 @@ from .allocine import AllocineIE
|
|||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appletrailers import AppleTrailersIE
|
from .appletrailers import AppleTrailersIE
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
from .ard import ARDIE
|
from .ard import ARDIE, ARDMediathekIE
|
||||||
from .arte import (
|
from .arte import (
|
||||||
ArteTvIE,
|
ArteTvIE,
|
||||||
ArteTVPlus7IE,
|
ArteTVPlus7IE,
|
||||||
@@ -88,6 +88,7 @@ from .engadget import EngadgetIE
|
|||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
from .everyonesmixtape import EveryonesMixtapeIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
|
from .expotv import ExpoTVIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .faz import FazIE
|
from .faz import FazIE
|
||||||
@@ -115,7 +116,10 @@ from .freesound import FreesoundIE
|
|||||||
from .freespeech import FreespeechIE
|
from .freespeech import FreespeechIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
from .gamekings import GamekingsIE
|
from .gamekings import GamekingsIE
|
||||||
from .gameone import GameOneIE
|
from .gameone import (
|
||||||
|
GameOneIE,
|
||||||
|
GameOnePlaylistIE,
|
||||||
|
)
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
from .gametrailers import GametrailersIE
|
from .gametrailers import GametrailersIE
|
||||||
@@ -126,6 +130,7 @@ from .googleplus import GooglePlusIE
|
|||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
from .gorillavid import GorillaVidIE
|
from .gorillavid import GorillaVidIE
|
||||||
from .goshgay import GoshgayIE
|
from .goshgay import GoshgayIE
|
||||||
|
from .grooveshark import GroovesharkIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
@@ -183,6 +188,7 @@ from .malemotion import MalemotionIE
|
|||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
|
from .ministrygrid import MinistryGridIE
|
||||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||||
from .mitele import MiTeleIE
|
from .mitele import MiTeleIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
@@ -194,6 +200,7 @@ from .mooshare import MooshareIE
|
|||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motherless import MotherlessIE
|
from .motherless import MotherlessIE
|
||||||
from .motorsport import MotorsportIE
|
from .motorsport import MotorsportIE
|
||||||
|
from .movieclips import MovieClipsIE
|
||||||
from .moviezine import MoviezineIE
|
from .moviezine import MoviezineIE
|
||||||
from .movshare import MovShareIE
|
from .movshare import MovShareIE
|
||||||
from .mtv import (
|
from .mtv import (
|
||||||
@@ -202,6 +209,7 @@ from .mtv import (
|
|||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
)
|
)
|
||||||
from .musicplayon import MusicPlayOnIE
|
from .musicplayon import MusicPlayOnIE
|
||||||
|
from .musicvault import MusicVaultIE
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
from .myspace import MySpaceIE
|
from .myspace import MySpaceIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
@@ -243,6 +251,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
|||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .playfm import PlayFMIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
@@ -263,7 +272,7 @@ from .rtbf import RTBFIE
|
|||||||
from .rtlnl import RtlXlIE
|
from .rtlnl import RtlXlIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
@@ -274,6 +283,7 @@ from .rutube import (
|
|||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
|
from .sbs import SBSIE
|
||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
@@ -306,6 +316,7 @@ from .spankwire import SpankwireIE
|
|||||||
from .spiegel import SpiegelIE
|
from .spiegel import SpiegelIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
from .spike import SpikeIE
|
from .spike import SpikeIE
|
||||||
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
@@ -386,6 +397,7 @@ from .vuclip import VuClipIE
|
|||||||
from .vulture import VultureIE
|
from .vulture import VultureIE
|
||||||
from .washingtonpost import WashingtonPostIE
|
from .washingtonpost import WashingtonPostIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
|
from .wayofthemaster import WayOfTheMasterIE
|
||||||
from .wdr import (
|
from .wdr import (
|
||||||
WDRIE,
|
WDRIE,
|
||||||
WDRMobileIE,
|
WDRMobileIE,
|
||||||
|
@@ -10,10 +10,14 @@ from ..utils import (
|
|||||||
qualities,
|
qualities,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDMediathekIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ARD:mediathek'
|
||||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@@ -128,3 +132,60 @@ class ARDIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ARDIE(InfoExtractor):
|
||||||
|
_VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||||
|
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
||||||
|
'info_dict': {
|
||||||
|
'display_id': 'die-story-im-ersten-mission-unter-falscher-flagge',
|
||||||
|
'id': '100',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 2600,
|
||||||
|
'title': 'Die Story im Ersten: Mission unter falscher Flagge',
|
||||||
|
'upload_date': '20140804',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
||||||
|
doc = self._download_xml(player_url, display_id)
|
||||||
|
video_node = doc.find('./video')
|
||||||
|
upload_date = unified_strdate(video_node.find('./broadcastDate').text)
|
||||||
|
thumbnail = video_node.find('.//teaserImage//variant/url').text
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for a in video_node.findall('.//asset'):
|
||||||
|
f = {
|
||||||
|
'format_id': a.attrib['type'],
|
||||||
|
'width': int_or_none(a.find('./frameWidth').text),
|
||||||
|
'height': int_or_none(a.find('./frameHeight').text),
|
||||||
|
'vbr': int_or_none(a.find('./bitrateVideo').text),
|
||||||
|
'abr': int_or_none(a.find('./bitrateAudio').text),
|
||||||
|
'vcodec': a.find('./codecVideo').text,
|
||||||
|
'tbr': int_or_none(a.find('./totalBitrate').text),
|
||||||
|
}
|
||||||
|
if a.find('./serverPrefix').text:
|
||||||
|
f['url'] = a.find('./serverPrefix').text
|
||||||
|
f['playpath'] = a.find('./fileName').text
|
||||||
|
else:
|
||||||
|
f['url'] = a.find('./fileName').text
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': mobj.group('id'),
|
||||||
|
'formats': formats,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': video_node.find('./title').text,
|
||||||
|
'duration': parse_duration(video_node.find('./duration').text),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -177,16 +177,26 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||||
IE_NAME = 'arte.tv:creative'
|
IE_NAME = 'arte.tv:creative'
|
||||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
|
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/(?:magazine?/)?(?P<id>[^?#]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '050489-002',
|
'id': '72176',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
'title': 'Folge 2 - Corporate Design',
|
||||||
|
'upload_date': '20131004',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '160676',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Monty Python live (mostly)',
|
||||||
|
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
|
||||||
|
'upload_date': '20140805',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class BlipTVIE(SubtitlesInfoExtractor):
|
class BlipTVIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
|
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@@ -49,6 +49,21 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
'uploader_id': '792887',
|
'uploader_id': '792887',
|
||||||
'duration': 279,
|
'duration': 279,
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
|
||||||
|
'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
|
||||||
|
'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6573122',
|
||||||
|
'ext': 'mov',
|
||||||
|
'upload_date': '20130520',
|
||||||
|
'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
|
||||||
|
'title': 'Red vs. Blue Season 11 Trailer',
|
||||||
|
'timestamp': 1369029609,
|
||||||
|
'uploader': 'redvsblue',
|
||||||
|
'uploader_id': '792887',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -150,7 +165,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
|
||||||
_PAGE_SIZE = 12
|
_PAGE_SIZE = 12
|
||||||
IE_NAME = 'blip.tv:user'
|
IE_NAME = 'blip.tv:user'
|
||||||
|
|
||||||
|
@@ -154,12 +154,14 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
def _extract_brightcove_urls(cls, webpage):
|
def _extract_brightcove_urls(cls, webpage):
|
||||||
"""Return a list of all Brightcove URLs from the webpage """
|
"""Return a list of all Brightcove URLs from the webpage """
|
||||||
|
|
||||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
url_m = re.search(
|
||||||
|
r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
|
||||||
|
webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
url = unescapeHTML(url_m.group(1))
|
url = unescapeHTML(url_m.group(1))
|
||||||
# Some sites don't add it, we can't download with this url, for example:
|
# Some sites don't add it, we can't download with this url, for example:
|
||||||
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
||||||
if 'playerKey' in url:
|
if 'playerKey' in url or 'videoId' in url:
|
||||||
return [url]
|
return [url]
|
||||||
|
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
@@ -188,9 +190,13 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
referer = smuggled_data.get('Referer', url)
|
referer = smuggled_data.get('Referer', url)
|
||||||
return self._get_video_info(
|
return self._get_video_info(
|
||||||
videoPlayer[0], query_str, query, referer=referer)
|
videoPlayer[0], query_str, query, referer=referer)
|
||||||
else:
|
elif 'playerKey' in query:
|
||||||
player_key = query['playerKey']
|
player_key = query['playerKey']
|
||||||
return self._get_playlist_info(player_key[0])
|
return self._get_playlist_info(player_key[0])
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
def _get_video_info(self, video_id, query_str, query, referer=None):
|
def _get_video_info(self, video_id, query_str, query, referer=None):
|
||||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||||
@@ -202,6 +208,13 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
req.add_header('Referer', referer)
|
req.add_header('Referer', referer)
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
error_msg = self._html_search_regex(
|
||||||
|
r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
|
||||||
|
'error message', default=None)
|
||||||
|
if error_msg is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'brightcove said: %s' % error_msg, expected=True)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
||||||
info = json.loads(info)['data']
|
info = json.loads(info)['data']
|
||||||
|
@@ -84,6 +84,12 @@ class InfoExtractor(object):
|
|||||||
format, irrespective of the file format.
|
format, irrespective of the file format.
|
||||||
-1 for default (order by other properties),
|
-1 for default (order by other properties),
|
||||||
-2 or smaller for less than default.
|
-2 or smaller for less than default.
|
||||||
|
* http_referer HTTP Referer header value to set.
|
||||||
|
* http_method HTTP method to use for the download.
|
||||||
|
* http_headers A dictionary of additional HTTP headers
|
||||||
|
to add to the request.
|
||||||
|
* http_post_data Additional data to send with a POST
|
||||||
|
request.
|
||||||
url: Final video URL.
|
url: Final video URL.
|
||||||
ext: Video filename extension.
|
ext: Video filename extension.
|
||||||
format: The video format, defaults to ext (used for --get-format)
|
format: The video format, defaults to ext (used for --get-format)
|
||||||
@@ -108,7 +114,7 @@ class InfoExtractor(object):
|
|||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
If not explicitly set, calculated from timestamp.
|
If not explicitly set, calculated from timestamp.
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
location: Physical location of the video.
|
location: Physical location where the video was filmed.
|
||||||
subtitles: The subtitle file contents as a dictionary in the format
|
subtitles: The subtitle file contents as a dictionary in the format
|
||||||
{language: subtitles}.
|
{language: subtitles}.
|
||||||
duration: Length of the video in seconds, as an integer.
|
duration: Length of the video in seconds, as an integer.
|
||||||
@@ -434,6 +440,22 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return (username, password)
|
return (username, password)
|
||||||
|
|
||||||
|
def _get_tfa_info(self):
|
||||||
|
"""
|
||||||
|
Get the two-factor authentication info
|
||||||
|
TODO - asking the user will be required for sms/phone verify
|
||||||
|
currently just uses the command line option
|
||||||
|
If there's no info available, return None
|
||||||
|
"""
|
||||||
|
if self._downloader is None:
|
||||||
|
return None
|
||||||
|
downloader_params = self._downloader.params
|
||||||
|
|
||||||
|
if downloader_params.get('twofactor', None) is not None:
|
||||||
|
return downloader_params['twofactor']
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
@@ -598,11 +620,15 @@ class InfoExtractor(object):
|
|||||||
'Unable to download f4m manifest')
|
'Unable to download f4m manifest')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
|
||||||
|
for i, media_el in enumerate(media_nodes):
|
||||||
|
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||||
|
format_id = 'f4m-%d' % (i if tbr is None else tbr)
|
||||||
formats.append({
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
'url': manifest_url,
|
'url': manifest_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'tbr': int_or_none(media_el.attrib.get('bitrate')),
|
'tbr': tbr,
|
||||||
'width': int_or_none(media_el.attrib.get('width')),
|
'width': int_or_none(media_el.attrib.get('width')),
|
||||||
'height': int_or_none(media_el.attrib.get('height')),
|
'height': int_or_none(media_el.attrib.get('height')),
|
||||||
})
|
})
|
||||||
@@ -610,6 +636,55 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'm3u8-meta',
|
||||||
|
'url': m3u8_url,
|
||||||
|
'ext': ext,
|
||||||
|
'protocol': 'm3u8',
|
||||||
|
'preference': -1,
|
||||||
|
'resolution': 'multiple',
|
||||||
|
'format_note': 'Quality selection URL',
|
||||||
|
}]
|
||||||
|
|
||||||
|
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
||||||
|
last_info = None
|
||||||
|
kv_rex = re.compile(
|
||||||
|
r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)')
|
||||||
|
for line in m3u8_doc.splitlines():
|
||||||
|
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||||
|
last_info = {}
|
||||||
|
for m in kv_rex.finditer(line):
|
||||||
|
v = m.group('val')
|
||||||
|
if v.startswith('"'):
|
||||||
|
v = v[1:-1]
|
||||||
|
last_info[m.group('key')] = v
|
||||||
|
elif line.startswith('#') or not line.strip():
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||||
|
|
||||||
|
f = {
|
||||||
|
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
||||||
|
'url': line.strip(),
|
||||||
|
'tbr': tbr,
|
||||||
|
'ext': ext,
|
||||||
|
}
|
||||||
|
codecs = last_info.get('CODECS')
|
||||||
|
if codecs:
|
||||||
|
video, audio = codecs.split(',')
|
||||||
|
f['vcodec'] = video.partition('.')[0]
|
||||||
|
f['acodec'] = audio.partition('.')[0]
|
||||||
|
resolution = last_info.get('RESOLUTION')
|
||||||
|
if resolution:
|
||||||
|
width_str, height_str = resolution.split('x')
|
||||||
|
f['width'] = int(width_str)
|
||||||
|
f['height'] = int(height_str)
|
||||||
|
formats.append(f)
|
||||||
|
last_info = {}
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@@ -1,19 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
|
|
||||||
class EbaumsWorldIE(InfoExtractor):
|
class EbaumsWorldIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
|
_VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
|
'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
|
||||||
u'file': u'83367677.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '83367677',
|
||||||
u'title': u'A Giant Python Opens The Door',
|
'ext': 'mp4',
|
||||||
u'description': u'This is how nightmares start...',
|
'title': 'A Giant Python Opens The Door',
|
||||||
u'uploader': u'jihadpizza',
|
'description': 'This is how nightmares start...',
|
||||||
|
'uploader': 'jihadpizza',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -28,7 +30,6 @@ class EbaumsWorldIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': config.find('title').text,
|
'title': config.find('title').text,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': determine_ext(video_url),
|
|
||||||
'description': config.find('description').text,
|
'description': config.find('description').text,
|
||||||
'thumbnail': config.find('image').text,
|
'thumbnail': config.find('image').text,
|
||||||
'uploader': config.find('username').text,
|
'uploader': config.find('username').text,
|
||||||
|
@@ -1,10 +1,13 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -12,86 +15,98 @@ class EightTracksIE(InfoExtractor):
|
|||||||
IE_NAME = '8tracks'
|
IE_NAME = '8tracks'
|
||||||
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
_VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"name": u"EightTracks",
|
"name": "EightTracks",
|
||||||
u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
|
||||||
u"playlist": [
|
"info_dict": {
|
||||||
|
'id': '1336550',
|
||||||
|
'display_id': 'youtube-dl-test-tracks-a',
|
||||||
|
"description": "test chars: \"'/\\ä↭",
|
||||||
|
"title": "youtube-dl test tracks \"'/\\ä↭<>",
|
||||||
|
},
|
||||||
|
"playlist": [
|
||||||
{
|
{
|
||||||
u"file": u"11885610.m4a",
|
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
|
||||||
u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885610",
|
||||||
u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"11885608.m4a",
|
"md5": "4ab26f05c1f7291ea460a3920be8021f",
|
||||||
u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885608",
|
||||||
u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"11885679.m4a",
|
"md5": "d30b5b5f74217410f4689605c35d1fd7",
|
||||||
u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885679",
|
||||||
u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"11885680.m4a",
|
"md5": "4eb0a669317cd725f6bbd336a29f923a",
|
||||||
u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885680",
|
||||||
u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"11885682.m4a",
|
"md5": "1893e872e263a2705558d1d319ad19e8",
|
||||||
u"md5": u"1893e872e263a2705558d1d319ad19e8",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885682",
|
||||||
u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"11885683.m4a",
|
"md5": "b673c46f47a216ab1741ae8836af5899",
|
||||||
u"md5": u"b673c46f47a216ab1741ae8836af5899",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885683",
|
||||||
u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"11885684.m4a",
|
"md5": "1d74534e95df54986da7f5abf7d842b7",
|
||||||
u"md5": u"1d74534e95df54986da7f5abf7d842b7",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885684",
|
||||||
u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"file": u"11885685.m4a",
|
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
|
||||||
u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
|
"info_dict": {
|
||||||
u"info_dict": {
|
"id": "11885685",
|
||||||
u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
"ext": "m4a",
|
||||||
u"uploader_id": u"ytdl"
|
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
|
||||||
|
"uploader_id": "ytdl"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
playlist_id = mobj.group('id')
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
|
json_like = self._search_regex(
|
||||||
|
r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
|
||||||
data = json.loads(json_like)
|
data = json.loads(json_like)
|
||||||
|
|
||||||
session = str(random.randint(0, 1000000000))
|
session = str(random.randint(0, 1000000000))
|
||||||
@@ -99,21 +114,30 @@ class EightTracksIE(InfoExtractor):
|
|||||||
track_count = data['tracks_count']
|
track_count = data['tracks_count']
|
||||||
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
|
||||||
next_url = first_url
|
next_url = first_url
|
||||||
res = []
|
entries = []
|
||||||
for i in range(track_count):
|
for i in range(track_count):
|
||||||
api_json = self._download_webpage(next_url, playlist_id,
|
api_json = self._download_webpage(
|
||||||
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
|
next_url, playlist_id,
|
||||||
errnote=u'Failed to download song information')
|
note='Downloading song information %d/%d' % (i + 1, track_count),
|
||||||
|
errnote='Failed to download song information')
|
||||||
api_data = json.loads(api_json)
|
api_data = json.loads(api_json)
|
||||||
track_data = api_data[u'set']['track']
|
track_data = api_data['set']['track']
|
||||||
info = {
|
info = {
|
||||||
'id': track_data['id'],
|
'id': compat_str(track_data['id']),
|
||||||
'url': track_data['track_file_stream_url'],
|
'url': track_data['track_file_stream_url'],
|
||||||
'title': track_data['performer'] + u' - ' + track_data['name'],
|
'title': track_data['performer'] + u' - ' + track_data['name'],
|
||||||
'raw_title': track_data['name'],
|
'raw_title': track_data['name'],
|
||||||
'uploader_id': data['user']['login'],
|
'uploader_id': data['user']['login'],
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
}
|
}
|
||||||
res.append(info)
|
entries.append(info)
|
||||||
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
|
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
|
||||||
return res
|
session, mix_id, track_data['id'])
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': compat_str(mix_id),
|
||||||
|
'display_id': playlist_id,
|
||||||
|
'title': data.get('name'),
|
||||||
|
'description': data.get('description'),
|
||||||
|
}
|
||||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import fix_xml_ampersands
|
||||||
|
|
||||||
|
|
||||||
class EmpflixIE(InfoExtractor):
|
class EmpflixIE(InfoExtractor):
|
||||||
@@ -36,7 +37,8 @@ class EmpflixIE(InfoExtractor):
|
|||||||
webpage, 'flashvars.config')
|
webpage, 'flashvars.config')
|
||||||
|
|
||||||
cfg_xml = self._download_xml(
|
cfg_xml = self._download_xml(
|
||||||
cfg_url, video_id, note='Downloading metadata')
|
cfg_url, video_id, note='Downloading metadata',
|
||||||
|
transform_source=fix_xml_ampersands)
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
@@ -44,11 +46,13 @@ class EmpflixIE(InfoExtractor):
|
|||||||
'format_id': item.find('res').text,
|
'format_id': item.find('res').text,
|
||||||
} for item in cfg_xml.findall('./quality/item')
|
} for item in cfg_xml.findall('./quality/item')
|
||||||
]
|
]
|
||||||
|
thumbnail = cfg_xml.find('./startThumb').text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
73
youtube_dl/extractor/expotv.py
Normal file
73
youtube_dl/extractor/expotv.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ExpoTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
|
||||||
|
'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17561',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20060212',
|
||||||
|
'title': 'My Favorite Online Scrapbook Store',
|
||||||
|
'view_count': int,
|
||||||
|
'description': 'You\'ll find most everything you need at this virtual store front.',
|
||||||
|
'uploader': 'Anna T.',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_key = self._search_regex(
|
||||||
|
r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
|
||||||
|
config_url = 'http://client.expotv.com/video/config/%s/%s' % (
|
||||||
|
video_id, player_key)
|
||||||
|
config = self._download_json(
|
||||||
|
config_url, video_id,
|
||||||
|
note='Downloading video configuration')
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': fcfg['file'],
|
||||||
|
'height': int_or_none(fcfg.get('height')),
|
||||||
|
'format_note': fcfg.get('label'),
|
||||||
|
'ext': self._search_regex(
|
||||||
|
r'filename=.*\.([a-z0-9_A-Z]+)&', fcfg['file'],
|
||||||
|
'file extension', default=None),
|
||||||
|
} for fcfg in config['sources']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = config.get('image')
|
||||||
|
view_count = int_or_none(self._search_regex(
|
||||||
|
r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
|
||||||
|
uploader = self._search_regex(
|
||||||
|
r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
|
||||||
|
fatal=False)
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
|
||||||
|
fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
@@ -88,3 +88,28 @@ class GameOneIE(InfoExtractor):
|
|||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GameOnePlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
|
||||||
|
IE_NAME = 'gameone:playlist'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.gameone.de/tv',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'GameOne',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 294,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
||||||
|
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
||||||
|
entries = [
|
||||||
|
self.url_result('http://www.gameone.de/tv/%d' % video_id, 'GameOne')
|
||||||
|
for video_id in range(max_id, 0, -1)]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': 'GameOne',
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
@@ -8,18 +8,19 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
orderedSet,
|
||||||
parse_xml,
|
parse_xml,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unsmuggle_url,
|
||||||
url_basename,
|
url_basename,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
@@ -289,6 +290,68 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# YouTube embed via <data-embed-url="">
|
||||||
|
{
|
||||||
|
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jpSGZsgga_I',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Asphalt 8: Airborne - Launch Trailer',
|
||||||
|
'uploader': 'Gameloft',
|
||||||
|
'uploader_id': 'gameloft',
|
||||||
|
'upload_date': '20130821',
|
||||||
|
'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Camtasia studio
|
||||||
|
{
|
||||||
|
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
|
||||||
|
'playlist': [{
|
||||||
|
'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
|
||||||
|
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'duration': 2235.90,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
|
||||||
|
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
|
||||||
|
'ext': 'flv',
|
||||||
|
'duration': 2235.93,
|
||||||
|
}
|
||||||
|
}],
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Flowplayer
|
||||||
|
{
|
||||||
|
'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
|
||||||
|
'md5': '9d65602bf31c6e20014319c7d07fba27',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5123ea6d5e5a7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'age_limit': 18,
|
||||||
|
'uploader': 'www.handjobhub.com',
|
||||||
|
'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# RSS feed
|
||||||
|
{
|
||||||
|
'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||||
|
'title': 'Zero Punctuation',
|
||||||
|
'description': 're:'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@@ -301,58 +364,6 @@ class GenericIE(InfoExtractor):
|
|||||||
"""Report information extraction."""
|
"""Report information extraction."""
|
||||||
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
|
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
|
||||||
|
|
||||||
def _send_head(self, url):
|
|
||||||
"""Check if it is a redirect, like url shorteners, in case return the new url."""
|
|
||||||
|
|
||||||
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
|
||||||
"""
|
|
||||||
Subclass the HTTPRedirectHandler to make it use our
|
|
||||||
HEADRequest also on the redirected URL
|
|
||||||
"""
|
|
||||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
|
||||||
if code in (301, 302, 303, 307):
|
|
||||||
newurl = newurl.replace(' ', '%20')
|
|
||||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
|
||||||
if k.lower() not in ("content-length", "content-type"))
|
|
||||||
try:
|
|
||||||
# This function was deprecated in python 3.3 and removed in 3.4
|
|
||||||
origin_req_host = req.get_origin_req_host()
|
|
||||||
except AttributeError:
|
|
||||||
origin_req_host = req.origin_req_host
|
|
||||||
return HEADRequest(newurl,
|
|
||||||
headers=newheaders,
|
|
||||||
origin_req_host=origin_req_host,
|
|
||||||
unverifiable=True)
|
|
||||||
else:
|
|
||||||
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
|
|
||||||
|
|
||||||
class HTTPMethodFallback(compat_urllib_request.BaseHandler):
|
|
||||||
"""
|
|
||||||
Fallback to GET if HEAD is not allowed (405 HTTP error)
|
|
||||||
"""
|
|
||||||
def http_error_405(self, req, fp, code, msg, headers):
|
|
||||||
fp.read()
|
|
||||||
fp.close()
|
|
||||||
|
|
||||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
|
||||||
if k.lower() not in ("content-length", "content-type"))
|
|
||||||
return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
|
|
||||||
headers=newheaders,
|
|
||||||
origin_req_host=req.get_origin_req_host(),
|
|
||||||
unverifiable=True))
|
|
||||||
|
|
||||||
# Build our opener
|
|
||||||
opener = compat_urllib_request.OpenerDirector()
|
|
||||||
for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
|
|
||||||
HTTPMethodFallback, HEADRedirectHandler,
|
|
||||||
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
|
|
||||||
opener.add_handler(handler())
|
|
||||||
|
|
||||||
response = opener.open(HEADRequest(url))
|
|
||||||
if response is None:
|
|
||||||
raise ExtractorError('Invalid URL protocol')
|
|
||||||
return response
|
|
||||||
|
|
||||||
def _extract_rss(self, url, video_id, doc):
|
def _extract_rss(self, url, video_id, doc):
|
||||||
playlist_title = doc.find('./channel/title').text
|
playlist_title = doc.find('./channel/title').text
|
||||||
playlist_desc_el = doc.find('./channel/description')
|
playlist_desc_el = doc.find('./channel/description')
|
||||||
@@ -372,6 +383,43 @@ class GenericIE(InfoExtractor):
|
|||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_camtasia(self, url, video_id, webpage):
|
||||||
|
""" Returns None if no camtasia video can be found. """
|
||||||
|
|
||||||
|
camtasia_cfg = self._search_regex(
|
||||||
|
r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
|
||||||
|
webpage, 'camtasia configuration file', default=None)
|
||||||
|
if camtasia_cfg is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
title = self._html_search_meta('DC.title', webpage, fatal=True)
|
||||||
|
|
||||||
|
camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
|
||||||
|
camtasia_cfg = self._download_xml(
|
||||||
|
camtasia_url, video_id,
|
||||||
|
note='Downloading camtasia configuration',
|
||||||
|
errnote='Failed to download camtasia configuration')
|
||||||
|
fileset_node = camtasia_cfg.find('./playlist/array/fileset')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for n in fileset_node.getchildren():
|
||||||
|
url_n = n.find('./uri')
|
||||||
|
if url_n is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
|
||||||
|
'title': '%s - %s' % (title, n.tag),
|
||||||
|
'url': compat_urlparse.urljoin(url, url_n.text),
|
||||||
|
'duration': float_or_none(n.find('./duration').text),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': title,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
return {
|
return {
|
||||||
@@ -408,17 +456,31 @@ class GenericIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
assert ':' in default_search
|
assert ':' in default_search
|
||||||
return self.url_result(default_search + url)
|
return self.url_result(default_search + url)
|
||||||
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
|
||||||
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
|
force_videoid = None
|
||||||
|
if smuggled_data and 'force_videoid' in smuggled_data:
|
||||||
|
force_videoid = smuggled_data['force_videoid']
|
||||||
|
video_id = force_videoid
|
||||||
|
else:
|
||||||
|
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
||||||
|
|
||||||
self.to_screen('%s: Requesting header' % video_id)
|
self.to_screen('%s: Requesting header' % video_id)
|
||||||
|
|
||||||
try:
|
head_req = HEADRequest(url)
|
||||||
response = self._send_head(url)
|
response = self._request_webpage(
|
||||||
|
head_req, video_id,
|
||||||
|
note=False, errnote='Could not send HEAD request to %s' % url,
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
if response is not False:
|
||||||
# Check for redirect
|
# Check for redirect
|
||||||
new_url = response.geturl()
|
new_url = response.geturl()
|
||||||
if url != new_url:
|
if url != new_url:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
|
if force_videoid:
|
||||||
|
new_url = smuggle_url(
|
||||||
|
new_url, {'force_videoid': force_videoid})
|
||||||
return self.url_result(new_url)
|
return self.url_result(new_url)
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
@@ -439,10 +501,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
except compat_urllib_error.HTTPError:
|
|
||||||
# This may be a stupid server that doesn't like HEAD, our UA, or so
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -460,6 +518,11 @@ class GenericIE(InfoExtractor):
|
|||||||
except compat_xml_parse_error:
|
except compat_xml_parse_error:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Is it a Camtasia project?
|
||||||
|
camtasia_res = self._extract_camtasia(url, video_id, webpage)
|
||||||
|
if camtasia_res is not None:
|
||||||
|
return camtasia_res
|
||||||
|
|
||||||
# Sometimes embedded video player is hidden behind percent encoding
|
# Sometimes embedded video player is hidden behind percent encoding
|
||||||
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
||||||
# Unescaping the whole page allows to handle those cases in a generic way
|
# Unescaping the whole page allows to handle those cases in a generic way
|
||||||
@@ -475,10 +538,26 @@ class GenericIE(InfoExtractor):
|
|||||||
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
||||||
default='video')
|
default='video')
|
||||||
|
|
||||||
|
# Try to detect age limit automatically
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
# And then there are the jokers who advertise that they use RTA,
|
||||||
|
# but actually don't.
|
||||||
|
AGE_LIMIT_MARKERS = [
|
||||||
|
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
|
||||||
|
]
|
||||||
|
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||||
|
age_limit = 18
|
||||||
|
|
||||||
# video uploader is domain name
|
# video uploader is domain name
|
||||||
video_uploader = self._search_regex(
|
video_uploader = self._search_regex(
|
||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
|
# Helper method
|
||||||
|
def _playlist_from_matches(matches, getter, ie=None):
|
||||||
|
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||||
if bc_urls:
|
if bc_urls:
|
||||||
@@ -514,6 +593,7 @@ class GenericIE(InfoExtractor):
|
|||||||
matches = re.findall(r'''(?x)
|
matches = re.findall(r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
<iframe[^>]+?src=|
|
<iframe[^>]+?src=|
|
||||||
|
data-video-url=|
|
||||||
<embed[^>]+?src=|
|
<embed[^>]+?src=|
|
||||||
embedSWF\(?:\s*
|
embedSWF\(?:\s*
|
||||||
)
|
)
|
||||||
@@ -522,19 +602,15 @@ class GenericIE(InfoExtractor):
|
|||||||
(?:embed|v)/.+?)
|
(?:embed|v)/.+?)
|
||||||
\1''', webpage)
|
\1''', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
return _playlist_from_matches(
|
||||||
for tuppl in matches]
|
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
|
||||||
return self.playlist_result(
|
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
|
||||||
|
|
||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]))
|
return _playlist_from_matches(
|
||||||
for tuppl in matches]
|
matches, lambda m: unescapeHTML(m[1]))
|
||||||
return self.playlist_result(
|
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
match = re.search(
|
match = re.search(
|
||||||
@@ -553,7 +629,7 @@ class GenericIE(InfoExtractor):
|
|||||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
|
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
|
||||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
|
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return self.url_result(mobj.group(1), 'BlipTV')
|
return self.url_result(mobj.group(1), 'BlipTV')
|
||||||
|
|
||||||
@@ -648,10 +724,8 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for funnyordie embed
|
# Look for funnyordie embed
|
||||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
|
return _playlist_from_matches(
|
||||||
for eurl in matches]
|
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
||||||
return self.playlist_result(
|
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
|
||||||
|
|
||||||
# Look for embedded RUTV player
|
# Look for embedded RUTV player
|
||||||
rutv_url = RUTVIE._extract_url(webpage)
|
rutv_url = RUTVIE._extract_url(webpage)
|
||||||
@@ -713,6 +787,13 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Yahoo')
|
return self.url_result(mobj.group('url'), 'Yahoo')
|
||||||
|
|
||||||
|
# Look for embedded sbs.com.au player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'SBS')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
@@ -730,6 +811,14 @@ class GenericIE(InfoExtractor):
|
|||||||
if not found:
|
if not found:
|
||||||
# Broaden the findall a little bit: JWPlayer JS loader
|
# Broaden the findall a little bit: JWPlayer JS loader
|
||||||
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||||
|
if not found:
|
||||||
|
# Flow player
|
||||||
|
found = re.findall(r'''(?xs)
|
||||||
|
flowplayer\("[^"]+",\s*
|
||||||
|
\{[^}]+?\}\s*,
|
||||||
|
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||||
|
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||||
|
''', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
# Try to find twitter cards info
|
# Try to find twitter cards info
|
||||||
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||||
@@ -739,10 +828,16 @@ class GenericIE(InfoExtractor):
|
|||||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||||
if m_video_type is not None:
|
if m_video_type is not None:
|
||||||
found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
def check_video(vurl):
|
||||||
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
|
vext = determine_ext(vpath)
|
||||||
|
return '.' in vpath and vext not in ('swf', 'png', 'jpg')
|
||||||
|
found = list(filter(
|
||||||
|
check_video,
|
||||||
|
re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
|
||||||
if not found:
|
if not found:
|
||||||
# HTML5 video
|
# HTML5 video
|
||||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
|
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
found = re.search(
|
found = re.search(
|
||||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||||
@@ -776,6 +871,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
'age_limit': age_limit,
|
||||||
})
|
})
|
||||||
|
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
|
@@ -14,6 +14,14 @@ class GoogleSearchIE(SearchInfoExtractor):
|
|||||||
_MAX_RESULTS = 1000
|
_MAX_RESULTS = 1000
|
||||||
IE_NAME = 'video.google:search'
|
IE_NAME = 'video.google:search'
|
||||||
_SEARCH_KEY = 'gvsearch'
|
_SEARCH_KEY = 'gvsearch'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'gvsearch15:python language',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'python language',
|
||||||
|
'title': 'python language',
|
||||||
|
},
|
||||||
|
'playlist_count': 15,
|
||||||
|
}
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
|
190
youtube_dl/extractor/grooveshark.py
Normal file
190
youtube_dl/extractor/grooveshark.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import os.path
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
|
||||||
|
def __init__(self):
|
||||||
|
self._current_object = None
|
||||||
|
self.objects = []
|
||||||
|
compat_html_parser.HTMLParser.__init__(self)
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
attrs = dict((k, v) for k, v in attrs)
|
||||||
|
if tag == 'object':
|
||||||
|
self._current_object = {'attrs': attrs, 'params': []}
|
||||||
|
elif tag == 'param':
|
||||||
|
self._current_object['params'].append(attrs)
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if tag == 'object':
|
||||||
|
self.objects.append(self._current_object)
|
||||||
|
self._current_object = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def extract_object_tags(cls, html):
|
||||||
|
p = cls()
|
||||||
|
p.feed(html)
|
||||||
|
p.close()
|
||||||
|
return p.objects
|
||||||
|
|
||||||
|
|
||||||
|
class GroovesharkIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
|
||||||
|
'md5': '7ecf8aefa59d6b2098517e1baa530023',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6SS1DW',
|
||||||
|
'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 227,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
do_playerpage_request = True
|
||||||
|
do_bootstrap_request = True
|
||||||
|
|
||||||
|
def _parse_target(self, target):
|
||||||
|
uri = compat_urlparse.urlparse(target)
|
||||||
|
hash = uri.fragment[1:].split('?')[0]
|
||||||
|
token = os.path.basename(hash.rstrip('/'))
|
||||||
|
return (uri, hash, token)
|
||||||
|
|
||||||
|
def _build_bootstrap_url(self, target):
|
||||||
|
(uri, hash, token) = self._parse_target(target)
|
||||||
|
query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
|
||||||
|
return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
|
||||||
|
|
||||||
|
def _build_meta_url(self, target):
|
||||||
|
(uri, hash, token) = self._parse_target(target)
|
||||||
|
query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
|
||||||
|
return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
|
||||||
|
|
||||||
|
def _build_stream_url(self, meta):
|
||||||
|
return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
|
||||||
|
|
||||||
|
def _build_swf_referer(self, target, obj):
|
||||||
|
(uri, _, _) = self._parse_target(target)
|
||||||
|
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
|
||||||
|
|
||||||
|
def _transform_bootstrap(self, js):
|
||||||
|
return re.split('(?m)^\s*try\s*{', js)[0] \
|
||||||
|
.split(' = ', 1)[1].strip().rstrip(';')
|
||||||
|
|
||||||
|
def _transform_meta(self, js):
|
||||||
|
return js.split('\n')[0].split('=')[1].rstrip(';')
|
||||||
|
|
||||||
|
def _get_meta(self, target):
|
||||||
|
(meta_url, token) = self._build_meta_url(target)
|
||||||
|
self.to_screen('Metadata URL: %s' % meta_url)
|
||||||
|
|
||||||
|
headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
|
||||||
|
req = compat_urllib_request.Request(meta_url, headers=headers)
|
||||||
|
res = self._download_json(req, token,
|
||||||
|
transform_source=self._transform_meta)
|
||||||
|
|
||||||
|
if 'getStreamKeyWithSong' not in res:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
|
||||||
|
|
||||||
|
if res['getStreamKeyWithSong'] is None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
return res['getStreamKeyWithSong']
|
||||||
|
|
||||||
|
def _get_bootstrap(self, target):
|
||||||
|
(bootstrap_url, token) = self._build_bootstrap_url(target)
|
||||||
|
|
||||||
|
headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
|
||||||
|
req = compat_urllib_request.Request(bootstrap_url, headers=headers)
|
||||||
|
res = self._download_json(req, token, fatal=False,
|
||||||
|
note='Downloading player bootstrap data',
|
||||||
|
errnote='Unable to download player bootstrap data',
|
||||||
|
transform_source=self._transform_bootstrap)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _get_playerpage(self, target):
|
||||||
|
(_, _, token) = self._parse_target(target)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
target, token,
|
||||||
|
note='Downloading player page',
|
||||||
|
errnote='Unable to download player page',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
|
if webpage is not None:
|
||||||
|
# Search (for example German) error message
|
||||||
|
error_msg = self._html_search_regex(
|
||||||
|
r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
|
||||||
|
'error message', default=None)
|
||||||
|
if error_msg is not None:
|
||||||
|
error_msg = error_msg.replace('\n', ' ')
|
||||||
|
raise ExtractorError('Grooveshark said: %s' % error_msg)
|
||||||
|
|
||||||
|
if webpage is not None:
|
||||||
|
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||||
|
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
||||||
|
|
||||||
|
return (webpage, None)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
(target_uri, _, token) = self._parse_target(url)
|
||||||
|
|
||||||
|
# 1. Fill cookiejar by making a request to the player page
|
||||||
|
swf_referer = None
|
||||||
|
if self.do_playerpage_request:
|
||||||
|
(_, player_objs) = self._get_playerpage(url)
|
||||||
|
if player_objs is not None:
|
||||||
|
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||||
|
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||||
|
|
||||||
|
# 2. Ask preload.php for swf bootstrap data to better mimic webapp
|
||||||
|
if self.do_bootstrap_request:
|
||||||
|
bootstrap = self._get_bootstrap(url)
|
||||||
|
self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
|
||||||
|
|
||||||
|
# 3. Ask preload.php for track metadata.
|
||||||
|
meta = self._get_meta(url)
|
||||||
|
|
||||||
|
# 4. Construct stream request for track.
|
||||||
|
stream_url = self._build_stream_url(meta)
|
||||||
|
duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
|
||||||
|
post_dict = {'streamKey': meta['streamKey']['streamKey']}
|
||||||
|
post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
|
||||||
|
headers = {
|
||||||
|
'Content-Length': len(post_data),
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
|
}
|
||||||
|
if swf_referer is not None:
|
||||||
|
headers['Referer'] = swf_referer
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': token,
|
||||||
|
'title': meta['song']['Name'],
|
||||||
|
'http_method': 'POST',
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': 'mp3',
|
||||||
|
'format': 'mp3 audio',
|
||||||
|
'duration': duration,
|
||||||
|
'http_post_data': post_data,
|
||||||
|
'http_headers': headers,
|
||||||
|
}
|
@@ -18,6 +18,7 @@ class IGNIE(InfoExtractor):
|
|||||||
_DESCRIPTION_RE = [
|
_DESCRIPTION_RE = [
|
||||||
r'<span class="page-object-description">(.+?)</span>',
|
r'<span class="page-object-description">(.+?)</span>',
|
||||||
r'id="my_show_video">.*?<p>(.*?)</p>',
|
r'id="my_show_video">.*?<p>(.*?)</p>',
|
||||||
|
r'<meta name="description" content="(.*?)"',
|
||||||
]
|
]
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@@ -55,6 +56,17 @@ class IGNIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||||
|
'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '078fdd005f6d3c02f63d795faa1b984f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||||
|
'description': 'Giant skeletons, bloody hunts, and captivating'
|
||||||
|
' natural beauty take our breath away.',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _find_video_id(self, webpage):
|
def _find_video_id(self, webpage):
|
||||||
@@ -62,6 +74,7 @@ class IGNIE(InfoExtractor):
|
|||||||
r'data-video-id="(.+?)"',
|
r'data-video-id="(.+?)"',
|
||||||
r'<object id="vid_(.+?)"',
|
r'<object id="vid_(.+?)"',
|
||||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||||
|
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
|
||||||
]
|
]
|
||||||
return self._search_regex(res_id, webpage, 'video id')
|
return self._search_regex(res_id, webpage, 'video id')
|
||||||
|
|
||||||
@@ -70,10 +83,7 @@ class IGNIE(InfoExtractor):
|
|||||||
name_or_id = mobj.group('name_or_id')
|
name_or_id = mobj.group('name_or_id')
|
||||||
page_type = mobj.group('type')
|
page_type = mobj.group('type')
|
||||||
webpage = self._download_webpage(url, name_or_id)
|
webpage = self._download_webpage(url, name_or_id)
|
||||||
if page_type == 'articles':
|
if page_type != 'video':
|
||||||
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
|
|
||||||
return self.url_result(video_url, ie='IGN')
|
|
||||||
elif page_type != 'video':
|
|
||||||
multiple_urls = re.findall(
|
multiple_urls = re.findall(
|
||||||
'<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
'<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||||
webpage)
|
webpage)
|
||||||
|
57
youtube_dl/extractor/ministrygrid.py
Normal file
57
youtube_dl/extractor/ministrygrid.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
smuggle_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MinistryGridIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
|
||||||
|
'md5': '844be0d2a1340422759c2a9101bab017',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3453494717001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Gospel by Numbers',
|
||||||
|
'description': 'Coming soon from T4G 2014!',
|
||||||
|
'uploader': 'LifeWay Christian Resources (MG)',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
portlets_json = self._search_regex(
|
||||||
|
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
|
||||||
|
portlets = json.loads(portlets_json)
|
||||||
|
pl_id = self._search_regex(
|
||||||
|
r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
|
||||||
|
|
||||||
|
for i, portlet in enumerate(portlets):
|
||||||
|
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
|
||||||
|
portlet_code = self._download_webpage(
|
||||||
|
portlet_url, video_id,
|
||||||
|
note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
|
||||||
|
fatal=False)
|
||||||
|
video_iframe_url = self._search_regex(
|
||||||
|
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
|
||||||
|
default=None)
|
||||||
|
if video_iframe_url:
|
||||||
|
surl = smuggle_url(
|
||||||
|
video_iframe_url, {'force_videoid': video_id})
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'url': surl,
|
||||||
|
}
|
||||||
|
|
||||||
|
raise ExtractorError('Could not find video iframe in any portlets')
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
@@ -38,7 +39,7 @@ class MixcloudIE(InfoExtractor):
|
|||||||
try:
|
try:
|
||||||
# We only want to know if the request succeed
|
# We only want to know if the request succeed
|
||||||
# don't download the whole file
|
# don't download the whole file
|
||||||
self._request_webpage(url, None, False)
|
self._request_webpage(HEADRequest(url), None, False)
|
||||||
return url
|
return url
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
url = None
|
url = None
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -8,15 +10,17 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MofosexIE(InfoExtractor):
|
class MofosexIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
|
_VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
|
||||||
u'file': u'5018.mp4',
|
'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
|
||||||
u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '5018',
|
||||||
u"title": u"Japanese Teen Music Video",
|
'ext': 'mp4',
|
||||||
u"age_limit": 18,
|
'title': 'Japanese Teen Music Video',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -29,8 +33,8 @@ class MofosexIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title')
|
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url'))
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
extension = os.path.splitext(path)[1][1:]
|
||||||
format = path.split('/')[5].split('_')[:2]
|
format = path.split('/')[5].split('_')[:2]
|
||||||
|
78
youtube_dl/extractor/movieclips.py
Normal file
78
youtube_dl/extractor/movieclips.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
compat_str,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MovieClipsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Wy7ZU',
|
||||||
|
'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'My Week with Marilyn - Do You Love Me?',
|
||||||
|
'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
show_id = display_id or video_id
|
||||||
|
|
||||||
|
config = self._download_xml(
|
||||||
|
'http://config.movieclips.com/player/config/%s' % video_id,
|
||||||
|
show_id, 'Downloading player config')
|
||||||
|
|
||||||
|
if config.find('./country-region').text == 'false':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
|
||||||
|
|
||||||
|
properties = config.find('./video/properties')
|
||||||
|
smil_file = properties.attrib['smil_file']
|
||||||
|
|
||||||
|
smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
|
||||||
|
base_url = smil.find('./head/meta').attrib['base']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in smil.findall('./body/switch/video'):
|
||||||
|
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||||
|
src = video.attrib['src']
|
||||||
|
formats.append({
|
||||||
|
'url': base_url,
|
||||||
|
'play_path': src,
|
||||||
|
'ext': src.split(':')[0],
|
||||||
|
'vbr': vbr,
|
||||||
|
'format_id': '%dk' % vbr,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
|
||||||
|
description = clean_html(compat_str(properties.attrib['clip_description']))
|
||||||
|
thumbnail = properties.attrib['image']
|
||||||
|
categories = properties.attrib['clip_categories'].split(',')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
78
youtube_dl/extractor/musicvault.py
Normal file
78
youtube_dl/extractor/musicvault.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
strip_jsonp,
|
||||||
|
parse_duration,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MusicVaultIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html',
|
||||||
|
'md5': '2cdbb3ae75f7fb3519821507d2fb3c15',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1010863',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader_id': 'the-allman-brothers-band',
|
||||||
|
'title': 'Straight from the Heart',
|
||||||
|
'duration': 244,
|
||||||
|
'uploader': 'The Allman Brothers Band',
|
||||||
|
'thumbnail': 're:^https?://.*/thumbnail/.*',
|
||||||
|
'upload_date': '19811216',
|
||||||
|
'location': 'Capitol Theatre (Passaic, NJ)',
|
||||||
|
'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<meta itemprop="thumbnail" content="([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
data_div = self._search_regex(
|
||||||
|
r'(?s)<div class="data">(.*?)</div>', webpage, 'data fields')
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h2.*?>(.*?)</h2>', data_div, 'title')
|
||||||
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))
|
||||||
|
location = self._html_search_regex(
|
||||||
|
r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False)
|
||||||
|
|
||||||
|
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||||
|
|
||||||
|
VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'
|
||||||
|
kaltura_id = self._search_regex(
|
||||||
|
r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',
|
||||||
|
webpage, 'kaltura ID')
|
||||||
|
video_url = VIDEO_URL_TEMPLATE % {
|
||||||
|
'entry_id': kaltura_id,
|
||||||
|
'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'),
|
||||||
|
'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'),
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': mobj.group('id'),
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': display_id,
|
||||||
|
'uploader_id': mobj.group('uploader_id'),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': self._html_search_meta('description', webpage),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'location': location,
|
||||||
|
'title': title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
request, video_id, 'Downloading %s page' % format_id)
|
request, video_id, 'Downloading %s page' % format_id)
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
|
r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
@@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
|
[r'<span title="([^"]+)">',
|
||||||
thumbnail = self._html_search_regex(
|
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
|
||||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
thumbnails = [
|
||||||
webpage, 'thumbnail URL', fatal=False)
|
{
|
||||||
|
'url': thumb_url,
|
||||||
|
} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
|
||||||
|
]
|
||||||
|
thumbnail = thumbnails[0]['url'] if thumbnails else None
|
||||||
duration = parse_duration(self._html_search_regex(
|
duration = parse_duration(self._html_search_regex(
|
||||||
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
|
r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
|
r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
|
'thumbnails': thumbnails,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@@ -6,7 +6,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urlparse,
|
|
||||||
js_to_json,
|
js_to_json,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -54,6 +54,18 @@ class PBSIE(InfoExtractor):
|
|||||||
'duration': 801,
|
'duration': 801,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
|
||||||
|
'md5': 'c62859342be2a0358d6c9eb306595978',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2365297708',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||||
|
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||||
|
'duration': 6559,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _extract_ids(self, url):
|
def _extract_ids(self, url):
|
||||||
@@ -75,7 +87,7 @@ class PBSIE(InfoExtractor):
|
|||||||
return media_id, presumptive_id
|
return media_id, presumptive_id
|
||||||
|
|
||||||
url = self._search_regex(
|
url = self._search_regex(
|
||||||
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||||
webpage, 'player URL')
|
webpage, 'player URL')
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
82
youtube_dl/extractor/playfm.py
Normal file
82
youtube_dl/extractor/playfm.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlayFMIE(InfoExtractor):
|
||||||
|
IE_NAME = 'play.fm'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
|
||||||
|
'md5': 'c505f8307825a245d0c7ad1850001f22',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '137220',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
|
||||||
|
'uploader': 'Sven Tasnadi',
|
||||||
|
'uploader_id': 'sventasnadi',
|
||||||
|
'duration': 5627.428,
|
||||||
|
'upload_date': '20140712',
|
||||||
|
'view_count': int,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
upload_date = mobj.group('upload_date')
|
||||||
|
|
||||||
|
rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
|
||||||
|
req = compat_urllib_request.Request(
|
||||||
|
'http://www.play.fm/flexRead/recording', data=rec_data)
|
||||||
|
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
rec_doc = self._download_xml(req, video_id)
|
||||||
|
|
||||||
|
error_node = rec_doc.find('./error')
|
||||||
|
if error_node is not None:
|
||||||
|
raise ExtractorError('An error occured: %s (code %s)' % (
|
||||||
|
error_node.text, rec_doc.find('./status').text))
|
||||||
|
|
||||||
|
recording = rec_doc.find('./recording')
|
||||||
|
title = recording.find('./title').text
|
||||||
|
view_count = int_or_none(recording.find('./stats/playcount').text)
|
||||||
|
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
||||||
|
thumbnail = recording.find('./image').text
|
||||||
|
|
||||||
|
artist = recording.find('./artists/artist')
|
||||||
|
uploader = artist.find('./name').text
|
||||||
|
uploader_id = artist.find('./slug').text
|
||||||
|
|
||||||
|
video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
|
||||||
|
'http:', recording.find('./url').text,
|
||||||
|
recording.find('./_class').text, recording.find('./file_id').text,
|
||||||
|
rec_doc.find('./uuid').text, video_id,
|
||||||
|
rec_doc.find('./jingle/file_id').text,
|
||||||
|
'http%3A%2F%2Fwww.play.fm%2Fplayer',
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp3',
|
||||||
|
'filesize': int_or_none(recording.find('./size').text),
|
||||||
|
'title': title,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'view_count': view_count,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
}
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -9,15 +11,16 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class PornotubeIE(InfoExtractor):
|
class PornotubeIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
|
_VALID_URL = r'https?://(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
'url': 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
||||||
u'file': u'1689755.flv',
|
'md5': '374dd6dcedd24234453b295209aa69b6',
|
||||||
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '1689755',
|
||||||
u"upload_date": u"20090708",
|
'ext': 'flv',
|
||||||
u"title": u"Marilyn-Monroe-Bathing",
|
'upload_date': '20090708',
|
||||||
u"age_limit": 18
|
'title': 'Marilyn-Monroe-Bathing',
|
||||||
|
'age_limit': 18
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,22 +35,22 @@ class PornotubeIE(InfoExtractor):
|
|||||||
|
|
||||||
# Get the video URL
|
# Get the video URL
|
||||||
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
|
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
|
||||||
video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
|
video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
|
||||||
video_url = compat_urllib_parse.unquote(video_url)
|
video_url = compat_urllib_parse.unquote(video_url)
|
||||||
|
|
||||||
#Get the uploaded date
|
#Get the uploaded date
|
||||||
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
|
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
|
||||||
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
|
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
|
||||||
if upload_date: upload_date = unified_strdate(upload_date)
|
if upload_date:
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
info = {'id': video_id,
|
return {
|
||||||
'url': video_url,
|
'id': video_id,
|
||||||
'uploader': None,
|
'url': video_url,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format': 'flv',
|
'format': 'flv',
|
||||||
'age_limit': age_limit}
|
'age_limit': age_limit,
|
||||||
|
}
|
||||||
return [info]
|
|
||||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_duration
|
||||||
|
|
||||||
|
|
||||||
class RtlXlIE(InfoExtractor):
|
class RtlXlIE(InfoExtractor):
|
||||||
@@ -20,6 +21,7 @@ class RtlXlIE(InfoExtractor):
|
|||||||
'onze mobiele apps.',
|
'onze mobiele apps.',
|
||||||
'timestamp': 1408051800,
|
'timestamp': 1408051800,
|
||||||
'upload_date': '20140814',
|
'upload_date': '20140814',
|
||||||
|
'duration': 576.880,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# We download the first bytes of the first fragment, it can't be
|
# We download the first bytes of the first fragment, it can't be
|
||||||
@@ -35,7 +37,7 @@ class RtlXlIE(InfoExtractor):
|
|||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||||
uuid)
|
uuid)
|
||||||
meta = info['meta']
|
|
||||||
material = info['material'][0]
|
material = info['material'][0]
|
||||||
episode_info = info['episodes'][0]
|
episode_info = info['episodes'][0]
|
||||||
|
|
||||||
@@ -45,8 +47,9 @@ class RtlXlIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': uuid,
|
'id': uuid,
|
||||||
'title': '%s - %s' % (progname, subtitle),
|
'title': '%s - %s' % (progname, subtitle),
|
||||||
'formats': self._extract_f4m_formats(f4m_url, uuid),
|
'formats': self._extract_f4m_formats(f4m_url, uuid),
|
||||||
'timestamp': material['original_date'],
|
'timestamp': material['original_date'],
|
||||||
'description': episode_info['synopsis'],
|
'description': episode_info['synopsis'],
|
||||||
|
'duration': parse_duration(material.get('duration')),
|
||||||
}
|
}
|
||||||
|
@@ -1,21 +1,66 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import base64
|
import base64
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _decrypt_url(png):
|
||||||
|
encrypted_data = base64.b64decode(png)
|
||||||
|
text_index = encrypted_data.find(b'tEXt')
|
||||||
|
text_chunk = encrypted_data[text_index - 4:]
|
||||||
|
length = struct_unpack('!I', text_chunk[:4])[0]
|
||||||
|
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
||||||
|
data = bytearray(text_chunk[8:8 + length])
|
||||||
|
data = [chr(b) for b in data if b != 0]
|
||||||
|
hash_index = data.index('#')
|
||||||
|
alphabet_data = data[:hash_index]
|
||||||
|
url_data = data[hash_index + 1:]
|
||||||
|
|
||||||
|
alphabet = []
|
||||||
|
e = 0
|
||||||
|
d = 0
|
||||||
|
for l in alphabet_data:
|
||||||
|
if d == 0:
|
||||||
|
alphabet.append(l)
|
||||||
|
d = e = (e + 1) % 4
|
||||||
|
else:
|
||||||
|
d -= 1
|
||||||
|
url = ''
|
||||||
|
f = 0
|
||||||
|
e = 3
|
||||||
|
b = 1
|
||||||
|
for letter in url_data:
|
||||||
|
if f == 0:
|
||||||
|
l = int(letter) * 10
|
||||||
|
f = 1
|
||||||
|
else:
|
||||||
|
if e == 0:
|
||||||
|
l += int(letter)
|
||||||
|
url += alphabet[l]
|
||||||
|
e = (b + 3) % 4
|
||||||
|
f = 0
|
||||||
|
b += 1
|
||||||
|
else:
|
||||||
|
e -= 1
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class RTVEALaCartaIE(InfoExtractor):
|
class RTVEALaCartaIE(InfoExtractor):
|
||||||
IE_NAME = 'rtve.es:alacarta'
|
IE_NAME = 'rtve.es:alacarta'
|
||||||
IE_DESC = 'RTVE a la carta'
|
IE_DESC = 'RTVE a la carta'
|
||||||
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||||
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'note': 'Live stream',
|
||||||
def _decrypt_url(self, png):
|
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||||
encrypted_data = base64.b64decode(png)
|
'info_dict': {
|
||||||
text_index = encrypted_data.find(b'tEXt')
|
'id': '1694255',
|
||||||
text_chunk = encrypted_data[text_index-4:]
|
'ext': 'flv',
|
||||||
length = struct_unpack('!I', text_chunk[:4])[0]
|
'title': 'TODO',
|
||||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
}
|
||||||
data = bytearray(text_chunk[8:8+length])
|
}]
|
||||||
data = [chr(b) for b in data if b != 0]
|
|
||||||
hash_index = data.index('#')
|
|
||||||
alphabet_data = data[:hash_index]
|
|
||||||
url_data = data[hash_index+1:]
|
|
||||||
|
|
||||||
alphabet = []
|
|
||||||
e = 0
|
|
||||||
d = 0
|
|
||||||
for l in alphabet_data:
|
|
||||||
if d == 0:
|
|
||||||
alphabet.append(l)
|
|
||||||
d = e = (e + 1) % 4
|
|
||||||
else:
|
|
||||||
d -= 1
|
|
||||||
url = ''
|
|
||||||
f = 0
|
|
||||||
e = 3
|
|
||||||
b = 1
|
|
||||||
for letter in url_data:
|
|
||||||
if f == 0:
|
|
||||||
l = int(letter)*10
|
|
||||||
f = 1
|
|
||||||
else:
|
|
||||||
if e == 0:
|
|
||||||
l += int(letter)
|
|
||||||
url += alphabet[l]
|
|
||||||
e = (b + 3) % 4
|
|
||||||
f = 0
|
|
||||||
b += 1
|
|
||||||
else:
|
|
||||||
e -= 1
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -74,11 +86,57 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
video_id)['page']['items'][0]
|
video_id)['page']['items'][0]
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
video_url = self._decrypt_url(png)
|
video_url = _decrypt_url(png)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': info['image'],
|
'thumbnail': info.get('image'),
|
||||||
|
'page_url': url,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RTVELiveIE(InfoExtractor):
|
||||||
|
IE_NAME = 'rtve.es:live'
|
||||||
|
IE_DESC = 'RTVE.es live streams'
|
||||||
|
_VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.rtve.es/noticias/directo-la-1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'directo-la-1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'live stream',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
start_time = time.gmtime()
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_url = self._search_regex(
|
||||||
|
r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' en directo')
|
||||||
|
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||||
|
|
||||||
|
vidplayer_id = self._search_regex(
|
||||||
|
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||||
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||||
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
|
video_url = _decrypt_url(png)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'app': 'rtve-live-live?ovpfv=2.1.2',
|
||||||
|
'player_url': player_url,
|
||||||
|
'rtmp_live': True,
|
||||||
}
|
}
|
||||||
|
56
youtube_dl/extractor/sbs.py
Normal file
56
youtube_dl/extractor/sbs.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SBSIE(InfoExtractor):
|
||||||
|
IE_DESC = 'sbs.com.au'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# Original URL is handled by the generic IE which finds the iframe:
|
||||||
|
# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
|
||||||
|
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
||||||
|
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '320403011771',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Dingo Conservation',
|
||||||
|
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
||||||
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
|
},
|
||||||
|
'add_ies': ['generic'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
release_urls_json = js_to_json(self._search_regex(
|
||||||
|
r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
|
||||||
|
webpage, ''))
|
||||||
|
release_urls = json.loads(release_urls_json)
|
||||||
|
theplatform_url = (
|
||||||
|
release_urls.get('progressive') or release_urls.get('standard'))
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' (The Feed)')
|
||||||
|
description = self._html_search_meta('description', webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'url': theplatform_url,
|
||||||
|
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@@ -61,7 +61,10 @@ class SockshareIE(InfoExtractor):
|
|||||||
r'<a href="([^"]*)".+class="download_file_link"',
|
r'<a href="([^"]*)".+class="download_file_link"',
|
||||||
webpage, 'file url')
|
webpage, 'file url')
|
||||||
video_url = "http://www.sockshare.com" + video_url
|
video_url = "http://www.sockshare.com" + video_url
|
||||||
title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
|
title = self._html_search_regex((
|
||||||
|
r'<h1>(.+)<strong>',
|
||||||
|
r'var name = "([^"]+)";'),
|
||||||
|
webpage, 'title', default=None)
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'<img\s+src="([^"]*)".+?name="bg"',
|
r'<img\s+src="([^"]*)".+?name="bg"',
|
||||||
webpage, 'thumbnail')
|
webpage, 'thumbnail')
|
||||||
|
77
youtube_dl/extractor/sportdeutschland.py
Normal file
77
youtube_dl/extractor/sportdeutschland.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_request,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SportDeutschlandIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
|
||||||
|
'categories': ['Badminton'],
|
||||||
|
'view_count': int,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
|
||||||
|
'timestamp': 1409043600,
|
||||||
|
'upload_date': '20140826',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Live stream',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
sport_id = mobj.group('sport')
|
||||||
|
|
||||||
|
api_url = 'http://splink.tv/api/permalinks/%s/%s' % (
|
||||||
|
sport_id, video_id)
|
||||||
|
req = compat_urllib_request.Request(api_url, headers={
|
||||||
|
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||||
|
'Referer': url,
|
||||||
|
})
|
||||||
|
data = self._download_json(req, video_id)
|
||||||
|
|
||||||
|
categories = list(data.get('section', {}).get('tags', {}).values())
|
||||||
|
asset = data['asset']
|
||||||
|
|
||||||
|
smil_url = asset['video']
|
||||||
|
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||||
|
|
||||||
|
smil_doc = self._download_xml(
|
||||||
|
smil_url, video_id, note='Downloading SMIL metadata')
|
||||||
|
base_url = smil_doc.find('./head/meta').attrib['base']
|
||||||
|
formats.extend([{
|
||||||
|
'format_id': 'rmtp',
|
||||||
|
'url': base_url,
|
||||||
|
'play_path': n.attrib['src'],
|
||||||
|
'ext': 'flv',
|
||||||
|
'preference': -100,
|
||||||
|
'format_note': 'Seems to fail at example stream',
|
||||||
|
} for n in smil_doc.findall('./body/video')])
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': asset['title'],
|
||||||
|
'thumbnail': asset.get('image'),
|
||||||
|
'description': asset.get('teaser'),
|
||||||
|
'categories': categories,
|
||||||
|
'view_count': asset.get('views'),
|
||||||
|
'rtmp_live': asset['live'],
|
||||||
|
'timestamp': parse_iso8601(asset.get('date')),
|
||||||
|
}
|
||||||
|
|
@@ -151,6 +151,19 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'duration': 62,
|
'duration': 62,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'note': 'video player needs Referer',
|
||||||
|
'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
|
||||||
|
'md5': '6295fdab8f4bf6a002d058b2c6dce276',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '91613211',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
|
||||||
|
'uploader': 'DevWeek Events',
|
||||||
|
'duration': 2773,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -205,6 +218,8 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if data is not None:
|
if data is not None:
|
||||||
headers = headers.copy()
|
headers = headers.copy()
|
||||||
headers.update(data)
|
headers.update(data)
|
||||||
|
if 'Referer' not in headers:
|
||||||
|
headers['Referer'] = url
|
||||||
|
|
||||||
# Extract ID from URL
|
# Extract ID from URL
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@@ -44,7 +44,7 @@ class VodlockerIE(InfoExtractor):
|
|||||||
req, video_id, 'Downloading video page')
|
req, video_id, 'Downloading video page')
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
|
r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
|
r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
|
||||||
url = self._search_regex(
|
url = self._search_regex(
|
||||||
|
@@ -2,29 +2,43 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import unified_strdate
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class WatIE(InfoExtractor):
|
class WatIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
_VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
|
||||||
IE_NAME = 'wat.tv'
|
IE_NAME = 'wat.tv'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
{
|
||||||
'info_dict': {
|
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
|
||||||
'id': '10631273',
|
'md5': 'ce70e9223945ed26a8056d413ca55dc9',
|
||||||
'ext': 'mp4',
|
'info_dict': {
|
||||||
'title': 'World War Z - Philadelphia VOST',
|
'id': '11713067',
|
||||||
'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
'display_id': 'soupe-figues-l-orange-aux-epices',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Soupe de figues à l\'orange et aux épices',
|
||||||
|
'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
|
||||||
|
'upload_date': '20140819',
|
||||||
|
'duration': 120,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
'params': {
|
{
|
||||||
# Sometimes wat serves the whole file with the --test option
|
'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
|
||||||
'skip_download': True,
|
'md5': 'fbc84e4378165278e743956d9c1bf16b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11713075',
|
||||||
|
'display_id': 'gregory-lemarchal-voix-ange',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
|
||||||
|
'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
|
||||||
|
'upload_date': '20140816',
|
||||||
|
'duration': 2910,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
]
|
||||||
|
|
||||||
def download_video_info(self, real_id):
|
def download_video_info(self, real_id):
|
||||||
# 'contentv4' is used in the website, but it also returns the related
|
# 'contentv4' is used in the website, but it also returns the related
|
||||||
@@ -36,13 +50,20 @@ class WatIE(InfoExtractor):
|
|||||||
def real_id_for_chapter(chapter):
|
def real_id_for_chapter(chapter):
|
||||||
return chapter['tc_start'].split('-')[0]
|
return chapter['tc_start'].split('-')[0]
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
short_id = mobj.group('shortID')
|
short_id = mobj.group('short_id')
|
||||||
webpage = self._download_webpage(url, short_id)
|
display_id = mobj.group('display_id')
|
||||||
|
webpage = self._download_webpage(url, display_id or short_id)
|
||||||
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
|
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
|
||||||
|
|
||||||
video_info = self.download_video_info(real_id)
|
video_info = self.download_video_info(real_id)
|
||||||
|
|
||||||
|
geo_list = video_info.get('geoList')
|
||||||
|
country = geo_list[0] if geo_list else ''
|
||||||
|
|
||||||
chapters = video_info['chapters']
|
chapters = video_info['chapters']
|
||||||
first_chapter = chapters[0]
|
first_chapter = chapters[0]
|
||||||
|
files = video_info['files']
|
||||||
|
first_file = files[0]
|
||||||
|
|
||||||
if real_id_for_chapter(first_chapter) != real_id:
|
if real_id_for_chapter(first_chapter) != real_id:
|
||||||
self.to_screen('Multipart video detected')
|
self.to_screen('Multipart video detected')
|
||||||
@@ -61,12 +82,47 @@ class WatIE(InfoExtractor):
|
|||||||
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
||||||
# Otherwise we can continue and extract just one part, we have to use
|
# Otherwise we can continue and extract just one part, we have to use
|
||||||
# the short id for getting the video url
|
# the short id for getting the video url
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||||
|
'format_id': 'Mobile',
|
||||||
|
}]
|
||||||
|
|
||||||
|
fmts = [('SD', 'web')]
|
||||||
|
if first_file.get('hasHD'):
|
||||||
|
fmts.append(('HD', 'webhd'))
|
||||||
|
|
||||||
|
def compute_token(param):
|
||||||
|
timestamp = '%08x' % int(self._download_webpage(
|
||||||
|
'http://www.wat.tv/servertime', real_id,
|
||||||
|
'Downloading server time').split('|')[0])
|
||||||
|
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
|
||||||
|
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
|
||||||
|
|
||||||
|
for fmt in fmts:
|
||||||
|
webid = '/%s/%s' % (fmt[1], real_id)
|
||||||
|
video_url = self._download_webpage(
|
||||||
|
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
|
||||||
|
real_id,
|
||||||
|
'Downloding %s video URL' % fmt[0],
|
||||||
|
'Failed to download %s video URL' % fmt[0],
|
||||||
|
False)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': fmt[0],
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': real_id,
|
'id': real_id,
|
||||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
'display_id': display_id,
|
||||||
'title': first_chapter['title'],
|
'title': first_chapter['title'],
|
||||||
'thumbnail': first_chapter['preview'],
|
'thumbnail': first_chapter['preview'],
|
||||||
'description': first_chapter['description'],
|
'description': first_chapter['description'],
|
||||||
'view_count': video_info['views'],
|
'view_count': video_info['views'],
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
|
'duration': first_file['duration'],
|
||||||
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
52
youtube_dl/extractor/wayofthemaster.py
Normal file
52
youtube_dl/extractor/wayofthemaster.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class WayOfTheMasterIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.wayofthemaster.com/hbks.shtml',
|
||||||
|
'md5': '5316b57487ada8480606a93cb3d18d24',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hbks',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Intelligent Design vs. Evolution',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
|
||||||
|
webpage, 'title', default=None)
|
||||||
|
if title is None:
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title>(.*?)</title>', webpage, 'page title')
|
||||||
|
|
||||||
|
url_base = self._search_regex(
|
||||||
|
r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
|
||||||
|
webpage, 'URL base')
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'low',
|
||||||
|
'quality': 1,
|
||||||
|
'url': url_base + '_low.mp4',
|
||||||
|
}, {
|
||||||
|
'format_id': 'high',
|
||||||
|
'quality': 2,
|
||||||
|
'url': url_base + '_high.mp4',
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class XHamsterIE(InfoExtractor):
|
class XHamsterIE(InfoExtractor):
|
||||||
"""Information Extractor for xHamster"""
|
"""Information Extractor for xHamster"""
|
||||||
_VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
_VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||||
|
@@ -37,6 +37,7 @@ from ..utils import (
|
|||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
"""Provide base functions for Youtube extractors"""
|
"""Provide base functions for Youtube extractors"""
|
||||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||||
|
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
|
||||||
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
|
||||||
_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
_AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
|
||||||
_NETRC_MACHINE = 'youtube'
|
_NETRC_MACHINE = 'youtube'
|
||||||
@@ -50,12 +51,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
fatal=False))
|
fatal=False))
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
|
"""
|
||||||
|
Attempt to log in to YouTube.
|
||||||
|
True is returned if successful or skipped.
|
||||||
|
False is returned if login failed.
|
||||||
|
|
||||||
|
If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
|
||||||
|
"""
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
# No authentication to be performed
|
# No authentication to be performed
|
||||||
if username is None:
|
if username is None:
|
||||||
if self._LOGIN_REQUIRED:
|
if self._LOGIN_REQUIRED:
|
||||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
return False
|
return True
|
||||||
|
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
self._LOGIN_URL, None,
|
self._LOGIN_URL, None,
|
||||||
@@ -73,6 +81,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
u'Email': username,
|
u'Email': username,
|
||||||
u'GALX': galx,
|
u'GALX': galx,
|
||||||
u'Passwd': password,
|
u'Passwd': password,
|
||||||
|
|
||||||
u'PersistentCookie': u'yes',
|
u'PersistentCookie': u'yes',
|
||||||
u'_utf8': u'霱',
|
u'_utf8': u'霱',
|
||||||
u'bgresponse': u'js_disabled',
|
u'bgresponse': u'js_disabled',
|
||||||
@@ -88,6 +97,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
u'uilel': u'3',
|
u'uilel': u'3',
|
||||||
u'hl': u'en_US',
|
u'hl': u'en_US',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||||
# chokes on unicode
|
# chokes on unicode
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||||
@@ -99,6 +109,68 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
note=u'Logging in', errnote=u'unable to log in', fatal=False)
|
note=u'Logging in', errnote=u'unable to log in', fatal=False)
|
||||||
if login_results is False:
|
if login_results is False:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
|
||||||
|
raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||||
|
|
||||||
|
# Two-Factor
|
||||||
|
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||||
|
|
||||||
|
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
|
||||||
|
tfa_code = self._get_tfa_info()
|
||||||
|
|
||||||
|
if tfa_code is None:
|
||||||
|
self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>')
|
||||||
|
self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Unlike the first login form, secTok and timeStmp are both required for the TFA form
|
||||||
|
|
||||||
|
match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||||
|
if match is None:
|
||||||
|
self._downloader.report_warning(u'Failed to get secTok - did the page structure change?')
|
||||||
|
secTok = match.group(1)
|
||||||
|
match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||||
|
if match is None:
|
||||||
|
self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?')
|
||||||
|
timeStmp = match.group(1)
|
||||||
|
|
||||||
|
tfa_form_strs = {
|
||||||
|
u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
|
||||||
|
u'smsToken': u'',
|
||||||
|
u'smsUserPin': tfa_code,
|
||||||
|
u'smsVerifyPin': u'Verify',
|
||||||
|
|
||||||
|
u'PersistentCookie': u'yes',
|
||||||
|
u'checkConnection': u'',
|
||||||
|
u'checkedDomains': u'youtube',
|
||||||
|
u'pstMsg': u'1',
|
||||||
|
u'secTok': secTok,
|
||||||
|
u'timeStmp': timeStmp,
|
||||||
|
u'service': u'youtube',
|
||||||
|
u'hl': u'en_US',
|
||||||
|
}
|
||||||
|
tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in tfa_form_strs.items())
|
||||||
|
tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
|
||||||
|
|
||||||
|
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
||||||
|
tfa_results = self._download_webpage(
|
||||||
|
tfa_req, None,
|
||||||
|
note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False)
|
||||||
|
|
||||||
|
if tfa_results is False:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
|
||||||
|
self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||||
|
return False
|
||||||
|
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
||||||
|
self._downloader.report_warning(u'unable to log in - did the page structure change?')
|
||||||
|
return False
|
||||||
|
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
||||||
|
self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||||
|
return False
|
||||||
|
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
|
@@ -9,6 +9,7 @@ from .ffmpeg import (
|
|||||||
FFmpegEmbedSubtitlePP,
|
FFmpegEmbedSubtitlePP,
|
||||||
)
|
)
|
||||||
from .xattrpp import XAttrMetadataPP
|
from .xattrpp import XAttrMetadataPP
|
||||||
|
from .execafterdownload import ExecAfterDownloadPP
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'AtomicParsleyPP',
|
'AtomicParsleyPP',
|
||||||
@@ -19,4 +20,5 @@ __all__ = [
|
|||||||
'FFmpegExtractAudioPP',
|
'FFmpegExtractAudioPP',
|
||||||
'FFmpegEmbedSubtitlePP',
|
'FFmpegEmbedSubtitlePP',
|
||||||
'XAttrMetadataPP',
|
'XAttrMetadataPP',
|
||||||
|
'ExecAfterDownloadPP',
|
||||||
]
|
]
|
||||||
|
31
youtube_dl/postprocessor/execafterdownload.py
Normal file
31
youtube_dl/postprocessor/execafterdownload.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from .common import PostProcessor
|
||||||
|
from ..utils import (
|
||||||
|
shlex_quote,
|
||||||
|
PostProcessingError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ExecAfterDownloadPP(PostProcessor):
|
||||||
|
def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None):
|
||||||
|
self.verboseOutput = verboseOutput
|
||||||
|
self.exec_cmd = exec_cmd
|
||||||
|
|
||||||
|
def run(self, information):
|
||||||
|
cmd = self.exec_cmd
|
||||||
|
if not '{}' in cmd:
|
||||||
|
cmd += ' {}'
|
||||||
|
|
||||||
|
cmd = cmd.replace('{}', shlex_quote(information['filepath']))
|
||||||
|
|
||||||
|
self._downloader.to_screen("[exec] Executing command: %s" % cmd)
|
||||||
|
retCode = subprocess.call(cmd, shell=True)
|
||||||
|
if retCode != 0:
|
||||||
|
raise PostProcessingError(
|
||||||
|
'Command returned error code %d' % retCode)
|
||||||
|
|
||||||
|
return None, information # by default, keep file and do nothing
|
||||||
|
|
@@ -192,6 +192,13 @@ try:
|
|||||||
except ImportError: # Python 2.6
|
except ImportError: # Python 2.6
|
||||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||||
|
|
||||||
|
try:
|
||||||
|
from shlex import quote as shlex_quote
|
||||||
|
except ImportError: # Python < 3.3
|
||||||
|
def shlex_quote(s):
|
||||||
|
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||||
|
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
if type(c) is int: return c
|
if type(c) is int: return c
|
||||||
else: return ord(c)
|
else: return ord(c)
|
||||||
@@ -759,10 +766,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def http_request(self, req):
|
def http_request(self, req):
|
||||||
for h,v in std_headers.items():
|
for h, v in std_headers.items():
|
||||||
if h in req.headers:
|
if h not in req.headers:
|
||||||
del req.headers[h]
|
req.add_header(h, v)
|
||||||
req.add_header(h, v)
|
|
||||||
if 'Youtubedl-no-compression' in req.headers:
|
if 'Youtubedl-no-compression' in req.headers:
|
||||||
if 'Accept-encoding' in req.headers:
|
if 'Accept-encoding' in req.headers:
|
||||||
del req.headers['Accept-encoding']
|
del req.headers['Accept-encoding']
|
||||||
@@ -855,6 +861,7 @@ def unified_strdate(date_str):
|
|||||||
'%Y/%m/%d',
|
'%Y/%m/%d',
|
||||||
'%d.%m.%Y',
|
'%d.%m.%Y',
|
||||||
'%d/%m/%Y',
|
'%d/%m/%Y',
|
||||||
|
'%d/%m/%y',
|
||||||
'%Y/%m/%d %H:%M:%S',
|
'%Y/%m/%d %H:%M:%S',
|
||||||
'%Y-%m-%d %H:%M:%S',
|
'%Y-%m-%d %H:%M:%S',
|
||||||
'%d.%m.%Y %H:%M',
|
'%d.%m.%Y %H:%M',
|
||||||
@@ -1285,6 +1292,12 @@ def remove_start(s, start):
|
|||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def remove_end(s, end):
|
||||||
|
if s.endswith(end):
|
||||||
|
return s[:-len(end)]
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
def url_basename(url):
|
def url_basename(url):
|
||||||
path = compat_urlparse.urlparse(url).path
|
path = compat_urlparse.urlparse(url).path
|
||||||
return path.strip(u'/').split(u'/')[-1]
|
return path.strip(u'/').split(u'/')[-1]
|
||||||
@@ -1324,7 +1337,7 @@ def parse_duration(s):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
|
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?(?P<ms>\.[0-9]+)?$', s)
|
||||||
if not m:
|
if not m:
|
||||||
return None
|
return None
|
||||||
res = int(m.group('secs'))
|
res = int(m.group('secs'))
|
||||||
@@ -1332,6 +1345,8 @@ def parse_duration(s):
|
|||||||
res += int(m.group('mins')) * 60
|
res += int(m.group('mins')) * 60
|
||||||
if m.group('hours'):
|
if m.group('hours'):
|
||||||
res += int(m.group('hours')) * 60 * 60
|
res += int(m.group('hours')) * 60 * 60
|
||||||
|
if m.group('ms'):
|
||||||
|
res += float(m.group('ms'))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@@ -1442,6 +1457,12 @@ def urlencode_postdata(*args, **kargs):
|
|||||||
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
etree_iter = xml.etree.ElementTree.Element.iter
|
||||||
|
except AttributeError: # Python <=2.6
|
||||||
|
etree_iter = lambda n: n.findall('.//*')
|
||||||
|
|
||||||
|
|
||||||
def parse_xml(s):
|
def parse_xml(s):
|
||||||
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
|
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
|
||||||
def doctype(self, name, pubid, system):
|
def doctype(self, name, pubid, system):
|
||||||
@@ -1449,7 +1470,14 @@ def parse_xml(s):
|
|||||||
|
|
||||||
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
||||||
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
||||||
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||||
|
# Fix up XML parser in Python 2.x
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
for n in etree_iter(tree):
|
||||||
|
if n.text is not None:
|
||||||
|
if not isinstance(n.text, compat_str):
|
||||||
|
n.text = n.text.decode('utf-8')
|
||||||
|
return tree
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.08.22.1'
|
__version__ = '2014.08.27.1'
|
||||||
|
Reference in New Issue
Block a user