Compare commits
109 Commits
2014.03.24
...
2014.04.03
Author | SHA1 | Date | |
---|---|---|---|
|
a9f304031b | ||
|
9271bc8355 | ||
|
968ed2a777 | ||
|
24de5d2556 | ||
|
d26e981df4 | ||
|
e45d40b171 | ||
|
4a419b8851 | ||
|
bec1fad223 | ||
|
177fed41bc | ||
|
b900e7cba4 | ||
|
14cb4979f0 | ||
|
69e61e30fe | ||
|
cce929eaac | ||
|
b6cfde99b7 | ||
|
1be99f052d | ||
|
2410c43d83 | ||
|
aea6e7fc3c | ||
|
91a76c40c0 | ||
|
d2b194607c | ||
|
f6177462db | ||
|
9ddaf4ef8c | ||
|
97b5573848 | ||
|
18c95c1ab0 | ||
|
0479c625a4 | ||
|
f659951e22 | ||
|
5853a7316e | ||
|
a612753db9 | ||
|
c8fc3fb524 | ||
|
5912c639df | ||
|
017e4dd58c | ||
|
651486621d | ||
|
28d9032c88 | ||
|
16f4eb723a | ||
|
1cbd410620 | ||
|
d41ac5f5dc | ||
|
9c1fc022ae | ||
|
83d548ef0f | ||
|
c72477bd32 | ||
|
9a7b072e38 | ||
|
cbc4a6cc7e | ||
|
cd7481a39e | ||
|
acd213ed6d | ||
|
77ffa95701 | ||
|
2b25cb5d76 | ||
|
62fec3b2ff | ||
|
e79162558e | ||
|
2da67107ee | ||
|
2ff7f8975e | ||
|
87a2566048 | ||
|
986f56736b | ||
|
2583a0308b | ||
|
40c716d2a2 | ||
|
79bfd01001 | ||
|
f2bcdd8e02 | ||
|
8c5850eeb4 | ||
|
bd3e077a2d | ||
|
7e70ac36b3 | ||
|
2cc0082dc0 | ||
|
056b56688a | ||
|
b17418313f | ||
|
e9a6fd6a68 | ||
|
bf30f3bd9d | ||
|
330edf2d84 | ||
|
43f775e4ca | ||
|
8f6562448c | ||
|
263f4b514b | ||
|
f0da3f1ef9 | ||
|
cb3ac1c610 | ||
|
8efd15f477 | ||
|
d26ebe990f | ||
|
28acf5500a | ||
|
214c22c704 | ||
|
8cdafb47b9 | ||
|
0dae5083f1 | ||
|
4c89bbd22c | ||
|
e2b06e76c1 | ||
|
e9c076c317 | ||
|
6c072e7d25 | ||
|
ac6c104871 | ||
|
69c01a9f68 | ||
|
e55213ce35 | ||
|
24a2aac445 | ||
|
98acdc895b | ||
|
bd3b5b8b10 | ||
|
9a90636805 | ||
|
6a66ae96ed | ||
|
2c8a4ba6b5 | ||
|
ad8915b729 | ||
|
34cbc7ee8d | ||
|
a59e40a1ea | ||
|
ad0a75db6b | ||
|
1d0e49e1c7 | ||
|
b4461b6ebe | ||
|
80959224fe | ||
|
865cbf4fc5 | ||
|
196f061cac | ||
|
99b380c33b | ||
|
02e4482e22 | ||
|
b8a792de80 | ||
|
fac55558ad | ||
|
b2799ff96d | ||
|
7a249480b4 | ||
|
f605128d13 | ||
|
ba40a74666 | ||
|
fb8ae2d438 | ||
|
893f8832b5 | ||
|
878d11ec29 | ||
|
515bbe4b5b | ||
|
75f2e25ba9 |
@@ -3,5 +3,4 @@ include test/*.py
|
|||||||
include test/*.json
|
include test/*.json
|
||||||
include youtube-dl.bash-completion
|
include youtube-dl.bash-completion
|
||||||
include youtube-dl.1
|
include youtube-dl.1
|
||||||
recursive-include docs *
|
recursive-include docs Makefile conf.py *.rst
|
||||||
prune docs/_build
|
|
||||||
|
3
Makefile
3
Makefile
@@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
|||||||
--exclude '__pycache' \
|
--exclude '__pycache' \
|
||||||
--exclude '.git' \
|
--exclude '.git' \
|
||||||
--exclude 'testdata' \
|
--exclude 'testdata' \
|
||||||
|
--exclude 'docs/_build' \
|
||||||
-- \
|
-- \
|
||||||
bin devscripts test youtube_dl \
|
bin devscripts test youtube_dl docs \
|
||||||
CHANGELOG LICENSE README.md README.txt \
|
CHANGELOG LICENSE README.md README.txt \
|
||||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||||
youtube-dl
|
youtube-dl
|
||||||
|
@@ -65,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
configuration in ~/.config/youtube-dl.conf
|
configuration in ~/.config/youtube-dl.conf
|
||||||
(%APPDATA%/youtube-dl/config.txt on
|
(%APPDATA%/youtube-dl/config.txt on
|
||||||
Windows)
|
Windows)
|
||||||
|
--encoding ENCODING Force the specified encoding (experimental)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
@@ -169,6 +170,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
## Verbosity / Simulation Options:
|
## Verbosity / Simulation Options:
|
||||||
-q, --quiet activates quiet mode
|
-q, --quiet activates quiet mode
|
||||||
|
--no-warnings Ignore warnings
|
||||||
-s, --simulate do not download the video and do not write
|
-s, --simulate do not download the video and do not write
|
||||||
anything to disk
|
anything to disk
|
||||||
--skip-download do not download the video
|
--skip-download do not download the video
|
||||||
@@ -180,7 +182,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--get-duration simulate, quiet but print video length
|
--get-duration simulate, quiet but print video length
|
||||||
--get-filename simulate, quiet but print output filename
|
--get-filename simulate, quiet but print output filename
|
||||||
--get-format simulate, quiet but print output format
|
--get-format simulate, quiet but print output format
|
||||||
-j, --dump-json simulate, quiet but print JSON information
|
-j, --dump-json simulate, quiet but print JSON information.
|
||||||
|
See --output for a description of available
|
||||||
|
keys.
|
||||||
--newline output progress bar as new lines
|
--newline output progress bar as new lines
|
||||||
--no-progress do not print progress bar
|
--no-progress do not print progress bar
|
||||||
--console-title display progress in console titlebar
|
--console-title display progress in console titlebar
|
||||||
|
@@ -143,5 +143,19 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||||
|
|
||||||
|
def test_ComedyCentralShows(self):
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
self.assertMatch(
|
||||||
|
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
|
||||||
|
['ComedyCentralShows'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -42,6 +42,7 @@ from youtube_dl.extractor import (
|
|||||||
ToypicsUserIE,
|
ToypicsUserIE,
|
||||||
XTubeUserIE,
|
XTubeUserIE,
|
||||||
InstagramUserIE,
|
InstagramUserIE,
|
||||||
|
CSpanIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -314,6 +315,19 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
expect_info_dict(self, EXPECTED, test_video)
|
expect_info_dict(self, EXPECTED, test_video)
|
||||||
|
|
||||||
|
def test_CSpan_playlist(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = CSpanIE(dl)
|
||||||
|
result = ie.extract(
|
||||||
|
'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], '342759')
|
||||||
|
self.assertEqual(
|
||||||
|
result['title'], 'General Motors Ignition Switch Recall')
|
||||||
|
self.assertEqual(len(result['entries']), 9)
|
||||||
|
whole_duration = sum(e['duration'] for e in result['entries'])
|
||||||
|
self.assertEqual(whole_duration, 14855)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
# Various small unit tests
|
# Various small unit tests
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
#from youtube_dl.utils import htmlentity_transform
|
#from youtube_dl.utils import htmlentity_transform
|
||||||
@@ -36,6 +37,7 @@ from youtube_dl.utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_jsonp,
|
||||||
)
|
)
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
@@ -272,5 +274,11 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||||
|
|
||||||
|
def test_strip_jsonp(self):
|
||||||
|
stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -8,6 +8,7 @@ import datetime
|
|||||||
import errno
|
import errno
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import locale
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
@@ -94,6 +95,7 @@ class YoutubeDL(object):
|
|||||||
usenetrc: Use netrc for authentication instead.
|
usenetrc: Use netrc for authentication instead.
|
||||||
verbose: Print additional info to stdout.
|
verbose: Print additional info to stdout.
|
||||||
quiet: Do not print messages to stdout.
|
quiet: Do not print messages to stdout.
|
||||||
|
no_warnings: Do not print out anything for warnings.
|
||||||
forceurl: Force printing final URL.
|
forceurl: Force printing final URL.
|
||||||
forcetitle: Force printing title.
|
forcetitle: Force printing title.
|
||||||
forceid: Force printing ID.
|
forceid: Force printing ID.
|
||||||
@@ -158,6 +160,7 @@ class YoutubeDL(object):
|
|||||||
include_ads: Download ads as well
|
include_ads: Download ads as well
|
||||||
default_search: Prepend this string if an input url is not valid.
|
default_search: Prepend this string if an input url is not valid.
|
||||||
'auto' for elaborate guessing
|
'auto' for elaborate guessing
|
||||||
|
encoding: Use this encoding instead of the system-specified.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@@ -376,6 +379,8 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('logger') is not None:
|
if self.params.get('logger') is not None:
|
||||||
self.params['logger'].warning(message)
|
self.params['logger'].warning(message)
|
||||||
else:
|
else:
|
||||||
|
if self.params.get('no_warnings'):
|
||||||
|
return
|
||||||
if self._err_file.isatty() and os.name != 'nt':
|
if self._err_file.isatty() and os.name != 'nt':
|
||||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||||
else:
|
else:
|
||||||
@@ -697,6 +702,11 @@ class YoutubeDL(object):
|
|||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
||||||
|
if 'id' not in info_dict:
|
||||||
|
raise ExtractorError('Missing "id" field in extractor result')
|
||||||
|
if 'title' not in info_dict:
|
||||||
|
raise ExtractorError('Missing "title" field in extractor result')
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
info_dict['playlist'] = None
|
info_dict['playlist'] = None
|
||||||
@@ -728,6 +738,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
# We check that all the formats have the format and format_id fields
|
# We check that all the formats have the format and format_id fields
|
||||||
for i, format in enumerate(formats):
|
for i, format in enumerate(formats):
|
||||||
|
if 'url' not in format:
|
||||||
|
raise ExtractorError('Missing "url" key in result (index %d)' % i)
|
||||||
|
|
||||||
if format.get('format_id') is None:
|
if format.get('format_id') is None:
|
||||||
format['format_id'] = compat_str(i)
|
format['format_id'] = compat_str(i)
|
||||||
if format.get('format') is None:
|
if format.get('format') is None:
|
||||||
@@ -738,7 +751,7 @@ class YoutubeDL(object):
|
|||||||
)
|
)
|
||||||
# Automatically determine file extension if missing
|
# Automatically determine file extension if missing
|
||||||
if 'ext' not in format:
|
if 'ext' not in format:
|
||||||
format['ext'] = determine_ext(format['url'])
|
format['ext'] = determine_ext(format['url']).lower()
|
||||||
|
|
||||||
format_limit = self.params.get('format_limit', None)
|
format_limit = self.params.get('format_limit', None)
|
||||||
if format_limit:
|
if format_limit:
|
||||||
@@ -863,7 +876,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
dn = os.path.dirname(encodeFilename(filename))
|
dn = os.path.dirname(encodeFilename(filename))
|
||||||
if dn != '' and not os.path.exists(dn):
|
if dn and not os.path.exists(dn):
|
||||||
os.makedirs(dn)
|
os.makedirs(dn)
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
self.report_error('unable to create directory ' + compat_str(err))
|
self.report_error('unable to create directory ' + compat_str(err))
|
||||||
@@ -1197,6 +1210,9 @@ class YoutubeDL(object):
|
|||||||
def print_debug_header(self):
|
def print_debug_header(self):
|
||||||
if not self.params.get('verbose'):
|
if not self.params.get('verbose'):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' %
|
||||||
|
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding()))
|
||||||
write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
||||||
try:
|
try:
|
||||||
sp = subprocess.Popen(
|
sp = subprocess.Popen(
|
||||||
@@ -1261,3 +1277,19 @@ class YoutubeDL(object):
|
|||||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||||
opener.addheaders = []
|
opener.addheaders = []
|
||||||
self._opener = opener
|
self._opener = opener
|
||||||
|
|
||||||
|
def encode(self, s):
|
||||||
|
if isinstance(s, bytes):
|
||||||
|
return s # Already encoded
|
||||||
|
|
||||||
|
try:
|
||||||
|
return s.encode(self.get_encoding())
|
||||||
|
except UnicodeEncodeError as err:
|
||||||
|
err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_encoding(self):
|
||||||
|
encoding = self.params.get('encoding')
|
||||||
|
if encoding is None:
|
||||||
|
encoding = preferredencoding()
|
||||||
|
return encoding
|
||||||
|
@@ -51,6 +51,7 @@ __authors__ = (
|
|||||||
'David Wagner',
|
'David Wagner',
|
||||||
'Juan C. Olivares',
|
'Juan C. Olivares',
|
||||||
'Mattias Harrysson',
|
'Mattias Harrysson',
|
||||||
|
'phaer',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
@@ -255,13 +256,17 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option(
|
general.add_option(
|
||||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||||
general.add_option('--default-search',
|
general.add_option(
|
||||||
dest='default_search', metavar='PREFIX',
|
'--default-search',
|
||||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
dest='default_search', metavar='PREFIX',
|
||||||
|
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'--ignore-config',
|
'--ignore-config',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||||
|
general.add_option(
|
||||||
|
'--encoding', dest='encoding', metavar='ENCODING',
|
||||||
|
help='Force the specified encoding (experimental)')
|
||||||
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--playlist-start',
|
'--playlist-start',
|
||||||
@@ -364,6 +369,10 @@ def parseOpts(overrideArguments=None):
|
|||||||
|
|
||||||
verbosity.add_option('-q', '--quiet',
|
verbosity.add_option('-q', '--quiet',
|
||||||
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
action='store_true', dest='quiet', help='activates quiet mode', default=False)
|
||||||
|
verbosity.add_option(
|
||||||
|
'--no-warnings',
|
||||||
|
dest='no_warnings', action='store_true', default=False,
|
||||||
|
help='Ignore warnings')
|
||||||
verbosity.add_option('-s', '--simulate',
|
verbosity.add_option('-s', '--simulate',
|
||||||
action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
|
action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
|
||||||
verbosity.add_option('--skip-download',
|
verbosity.add_option('--skip-download',
|
||||||
@@ -391,7 +400,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
help='simulate, quiet but print output format', default=False)
|
help='simulate, quiet but print output format', default=False)
|
||||||
verbosity.add_option('-j', '--dump-json',
|
verbosity.add_option('-j', '--dump-json',
|
||||||
action='store_true', dest='dumpjson',
|
action='store_true', dest='dumpjson',
|
||||||
help='simulate, quiet but print JSON information', default=False)
|
help='simulate, quiet but print JSON information. See --output for a description of available keys.', default=False)
|
||||||
verbosity.add_option('--newline',
|
verbosity.add_option('--newline',
|
||||||
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False)
|
||||||
verbosity.add_option('--no-progress',
|
verbosity.add_option('--no-progress',
|
||||||
@@ -535,8 +544,6 @@ def parseOpts(overrideArguments=None):
|
|||||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||||
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
|
write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
|
||||||
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
|
write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
|
||||||
write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' %
|
|
||||||
(locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding()))
|
|
||||||
|
|
||||||
return parser, opts, args
|
return parser, opts, args
|
||||||
|
|
||||||
@@ -670,7 +677,7 @@ def _real_main(argv=None):
|
|||||||
date = DateRange.day(opts.date)
|
date = DateRange.day(opts.date)
|
||||||
else:
|
else:
|
||||||
date = DateRange(opts.dateafter, opts.datebefore)
|
date = DateRange(opts.dateafter, opts.datebefore)
|
||||||
if opts.default_search not in ('auto', None) and ':' not in opts.default_search:
|
if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
|
||||||
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
|
||||||
|
|
||||||
# Do not download videos when there are audio-only formats
|
# Do not download videos when there are audio-only formats
|
||||||
@@ -708,6 +715,7 @@ def _real_main(argv=None):
|
|||||||
'password': opts.password,
|
'password': opts.password,
|
||||||
'videopassword': opts.videopassword,
|
'videopassword': opts.videopassword,
|
||||||
'quiet': (opts.quiet or any_printing),
|
'quiet': (opts.quiet or any_printing),
|
||||||
|
'no_warnings': opts.no_warnings,
|
||||||
'forceurl': opts.geturl,
|
'forceurl': opts.geturl,
|
||||||
'forcetitle': opts.gettitle,
|
'forcetitle': opts.gettitle,
|
||||||
'forceid': opts.getid,
|
'forceid': opts.getid,
|
||||||
@@ -780,6 +788,7 @@ def _real_main(argv=None):
|
|||||||
'include_ads': opts.include_ads,
|
'include_ads': opts.include_ads,
|
||||||
'default_search': opts.default_search,
|
'default_search': opts.default_search,
|
||||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||||
|
'encoding': opts.encoding,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
@@ -297,6 +297,7 @@ class F4mFD(FileDownloader):
|
|||||||
break
|
break
|
||||||
frags_filenames.append(frag_filename)
|
frags_filenames.append(frag_filename)
|
||||||
|
|
||||||
|
dest_stream.close()
|
||||||
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
|
||||||
|
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
|
@@ -13,8 +13,10 @@ class HlsFD(FileDownloader):
|
|||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
args = [
|
||||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||||
|
'-bsf:a', 'aac_adtstoasc',
|
||||||
|
encodeFilename(tmpfilename, for_subprocess=True)]
|
||||||
|
|
||||||
for program in ['avconv', 'ffmpeg']:
|
for program in ['avconv', 'ffmpeg']:
|
||||||
try:
|
try:
|
||||||
|
@@ -23,6 +23,8 @@ class HttpFD(FileDownloader):
|
|||||||
headers = {'Youtubedl-no-compression': 'True'}
|
headers = {'Youtubedl-no-compression': 'True'}
|
||||||
if 'user_agent' in info_dict:
|
if 'user_agent' in info_dict:
|
||||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||||
|
if 'http_referer' in info_dict:
|
||||||
|
headers['Referer'] = info_dict['http_referer']
|
||||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||||
request = compat_urllib_request.Request(url, None, headers)
|
request = compat_urllib_request.Request(url, None, headers)
|
||||||
|
|
||||||
|
@@ -14,6 +14,7 @@ from .arte import (
|
|||||||
ArteTVConcertIE,
|
ArteTVConcertIE,
|
||||||
ArteTVFutureIE,
|
ArteTVFutureIE,
|
||||||
ArteTVDDCIE,
|
ArteTVDDCIE,
|
||||||
|
ArteTVEmbedIE,
|
||||||
)
|
)
|
||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
@@ -39,6 +40,7 @@ from .clipfish import ClipfishIE
|
|||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clipsyndicate import ClipsyndicateIE
|
from .clipsyndicate import ClipsyndicateIE
|
||||||
from .cmt import CMTIE
|
from .cmt import CMTIE
|
||||||
|
from .cnet import CNETIE
|
||||||
from .cnn import (
|
from .cnn import (
|
||||||
CNNIE,
|
CNNIE,
|
||||||
CNNBlogsIE,
|
CNNBlogsIE,
|
||||||
@@ -82,6 +84,7 @@ from .fktv import (
|
|||||||
)
|
)
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
from .fourtube import FourTubeIE
|
from .fourtube import FourTubeIE
|
||||||
|
from .franceculture import FranceCultureIE
|
||||||
from .franceinter import FranceInterIE
|
from .franceinter import FranceInterIE
|
||||||
from .francetv import (
|
from .francetv import (
|
||||||
PluzzIE,
|
PluzzIE,
|
||||||
@@ -155,6 +158,7 @@ from .mtv import (
|
|||||||
MTVIE,
|
MTVIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
)
|
)
|
||||||
|
from .musicplayon import MusicPlayOnIE
|
||||||
from .muzu import MuzuTVIE
|
from .muzu import MuzuTVIE
|
||||||
from .myspace import MySpaceIE
|
from .myspace import MySpaceIE
|
||||||
from .myspass import MySpassIE
|
from .myspass import MySpassIE
|
||||||
@@ -176,6 +180,8 @@ from .normalboots import NormalbootsIE
|
|||||||
from .novamov import NovaMovIE
|
from .novamov import NovaMovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
|
from .ntv import NTVIE
|
||||||
|
from .oe1 import OE1IE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import ORFIE
|
from .orf import ORFIE
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
@@ -207,7 +213,6 @@ from .rutv import RUTVIE
|
|||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .slashdot import SlashdotIE
|
|
||||||
from .slideshare import SlideshareIE
|
from .slideshare import SlideshareIE
|
||||||
from .smotri import (
|
from .smotri import (
|
||||||
SmotriIE,
|
SmotriIE,
|
||||||
@@ -256,13 +261,13 @@ from .udemy import (
|
|||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
)
|
)
|
||||||
from .unistra import UnistraIE
|
from .unistra import UnistraIE
|
||||||
|
from .urort import UrortIE
|
||||||
from .ustream import UstreamIE, UstreamChannelIE
|
from .ustream import UstreamIE, UstreamChannelIE
|
||||||
from .vbox7 import Vbox7IE
|
from .vbox7 import Vbox7IE
|
||||||
from .veehd import VeeHDIE
|
from .veehd import VeeHDIE
|
||||||
from .veoh import VeohIE
|
from .veoh import VeohIE
|
||||||
from .vesti import VestiIE
|
from .vesti import VestiIE
|
||||||
from .vevo import VevoIE
|
from .vevo import VevoIE
|
||||||
from .vice import ViceIE
|
|
||||||
from .viddler import ViddlerIE
|
from .viddler import ViddlerIE
|
||||||
from .videobam import VideoBamIE
|
from .videobam import VideoBamIE
|
||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
@@ -281,8 +286,12 @@ from .vine import VineIE
|
|||||||
from .viki import VikiIE
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
|
from .washingtonpost import WashingtonPostIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .wdr import WDRIE
|
from .wdr import (
|
||||||
|
WDRIE,
|
||||||
|
WDRMausIE,
|
||||||
|
)
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
|
@@ -6,7 +6,6 @@ import json
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
determine_ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -16,9 +15,10 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
"url": "http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||||
"playlist": [
|
"playlist": [
|
||||||
{
|
{
|
||||||
"file": "manofsteel-trailer4.mov",
|
|
||||||
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
"md5": "d97a8e575432dbcb81b7c3acb741f8a8",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
"id": "manofsteel-trailer4",
|
||||||
|
"ext": "mov",
|
||||||
"duration": 111,
|
"duration": 111,
|
||||||
"title": "Trailer 4",
|
"title": "Trailer 4",
|
||||||
"upload_date": "20130523",
|
"upload_date": "20130523",
|
||||||
@@ -26,9 +26,10 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "manofsteel-trailer3.mov",
|
|
||||||
"md5": "b8017b7131b721fb4e8d6f49e1df908c",
|
"md5": "b8017b7131b721fb4e8d6f49e1df908c",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
"id": "manofsteel-trailer3",
|
||||||
|
"ext": "mov",
|
||||||
"duration": 182,
|
"duration": 182,
|
||||||
"title": "Trailer 3",
|
"title": "Trailer 3",
|
||||||
"upload_date": "20130417",
|
"upload_date": "20130417",
|
||||||
@@ -36,9 +37,10 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "manofsteel-trailer.mov",
|
|
||||||
"md5": "d0f1e1150989b9924679b441f3404d48",
|
"md5": "d0f1e1150989b9924679b441f3404d48",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
"id": "manofsteel-trailer",
|
||||||
|
"ext": "mov",
|
||||||
"duration": 148,
|
"duration": 148,
|
||||||
"title": "Trailer",
|
"title": "Trailer",
|
||||||
"upload_date": "20121212",
|
"upload_date": "20121212",
|
||||||
@@ -46,15 +48,16 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"file": "manofsteel-teaser.mov",
|
|
||||||
"md5": "5fe08795b943eb2e757fa95cb6def1cb",
|
"md5": "5fe08795b943eb2e757fa95cb6def1cb",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
"id": "manofsteel-teaser",
|
||||||
|
"ext": "mov",
|
||||||
"duration": 93,
|
"duration": 93,
|
||||||
"title": "Teaser",
|
"title": "Teaser",
|
||||||
"upload_date": "20120721",
|
"upload_date": "20120721",
|
||||||
"uploader_id": "wb",
|
"uploader_id": "wb",
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,16 +68,16 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
movie = mobj.group('movie')
|
movie = mobj.group('movie')
|
||||||
uploader_id = mobj.group('company')
|
uploader_id = mobj.group('company')
|
||||||
|
|
||||||
playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
|
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||||
def fix_html(s):
|
def fix_html(s):
|
||||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
|
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||||
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
|
s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
|
||||||
# The ' in the onClick attributes are not escaped, it couldn't be parsed
|
# The ' in the onClick attributes are not escaped, it couldn't be parsed
|
||||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||||
def _clean_json(m):
|
def _clean_json(m):
|
||||||
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||||
s = u'<html>' + s + u'</html>'
|
s = '<html>' + s + u'</html>'
|
||||||
return s
|
return s
|
||||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||||
|
|
||||||
@@ -82,7 +85,7 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
for li in doc.findall('./div/ul/li'):
|
for li in doc.findall('./div/ul/li'):
|
||||||
on_click = li.find('.//a').attrib['onClick']
|
on_click = li.find('.//a').attrib['onClick']
|
||||||
trailer_info_json = self._search_regex(self._JSON_RE,
|
trailer_info_json = self._search_regex(self._JSON_RE,
|
||||||
on_click, u'trailer info')
|
on_click, 'trailer info')
|
||||||
trailer_info = json.loads(trailer_info_json)
|
trailer_info = json.loads(trailer_info_json)
|
||||||
title = trailer_info['title']
|
title = trailer_info['title']
|
||||||
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
||||||
@@ -98,8 +101,7 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
first_url = trailer_info['url']
|
first_url = trailer_info['url']
|
||||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||||
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
||||||
settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
|
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||||
settings = json.loads(settings_json)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format in settings['metadata']['sizes']:
|
for format in settings['metadata']['sizes']:
|
||||||
@@ -107,7 +109,6 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
|
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'ext': determine_ext(format_url),
|
|
||||||
'format': format['type'],
|
'format': format['type'],
|
||||||
'width': format['width'],
|
'width': format['width'],
|
||||||
'height': int(format['height']),
|
'height': int(format['height']),
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -19,114 +18,41 @@ from ..utils import (
|
|||||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||||
# add tests.
|
# add tests.
|
||||||
|
|
||||||
class ArteTvIE(InfoExtractor):
|
|
||||||
_VIDEOS_URL = r'(?:http://)?videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
|
||||||
_LIVEWEB_URL = r'(?:http://)?liveweb\.arte\.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'
|
|
||||||
_LIVE_URL = r'index-[0-9]+\.html$'
|
|
||||||
|
|
||||||
|
class ArteTvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de)/.*-(?P<id>.*?)\.html'
|
||||||
IE_NAME = 'arte.tv'
|
IE_NAME = 'arte.tv'
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return any(re.match(regex, url) for regex in (cls._VIDEOS_URL, cls._LIVEWEB_URL))
|
|
||||||
|
|
||||||
# TODO implement Live Stream
|
|
||||||
# from ..utils import compat_urllib_parse
|
|
||||||
# def extractLiveStream(self, url):
|
|
||||||
# video_lang = url.split('/')[-4]
|
|
||||||
# info = self.grep_webpage(
|
|
||||||
# url,
|
|
||||||
# r'src="(.*?/videothek_js.*?\.js)',
|
|
||||||
# 0,
|
|
||||||
# [
|
|
||||||
# (1, 'url', 'Invalid URL: %s' % url)
|
|
||||||
# ]
|
|
||||||
# )
|
|
||||||
# http_host = url.split('/')[2]
|
|
||||||
# next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url')))
|
|
||||||
# info = self.grep_webpage(
|
|
||||||
# next_url,
|
|
||||||
# r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
|
|
||||||
# '(http://.*?\.swf).*?' +
|
|
||||||
# '(rtmp://.*?)\'',
|
|
||||||
# re.DOTALL,
|
|
||||||
# [
|
|
||||||
# (1, 'path', 'could not extract video path: %s' % url),
|
|
||||||
# (2, 'player', 'could not extract video player: %s' % url),
|
|
||||||
# (3, 'url', 'could not extract video url: %s' % url)
|
|
||||||
# ]
|
|
||||||
# )
|
|
||||||
# video_url = '%s/%s' % (info.get('url'), info.get('path'))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VIDEOS_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is not None:
|
lang = mobj.group('lang')
|
||||||
id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
lang = mobj.group('lang')
|
|
||||||
return self._extract_video(url, id, lang)
|
|
||||||
|
|
||||||
mobj = re.match(self._LIVEWEB_URL, url)
|
|
||||||
if mobj is not None:
|
|
||||||
name = mobj.group('name')
|
|
||||||
lang = mobj.group('lang')
|
|
||||||
return self._extract_liveweb(url, name, lang)
|
|
||||||
|
|
||||||
if re.search(self._LIVE_URL, url) is not None:
|
|
||||||
raise ExtractorError('Arte live streams are not yet supported, sorry')
|
|
||||||
# self.extractLiveStream(url)
|
|
||||||
# return
|
|
||||||
|
|
||||||
raise ExtractorError('No video found')
|
|
||||||
|
|
||||||
def _extract_video(self, url, video_id, lang):
|
|
||||||
"""Extract from videos.arte.tv"""
|
|
||||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||||
ref_xml_doc = self._download_xml(
|
ref_xml_doc = self._download_xml(
|
||||||
ref_xml_url, video_id, note='Downloading metadata')
|
ref_xml_url, video_id, note='Downloading metadata')
|
||||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||||
config_xml_url = config_node.attrib['ref']
|
config_xml_url = config_node.attrib['ref']
|
||||||
config_xml = self._download_webpage(
|
config = self._download_xml(
|
||||||
config_xml_url, video_id, note='Downloading configuration')
|
config_xml_url, video_id, note='Downloading configuration')
|
||||||
|
|
||||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
formats = [{
|
||||||
def _key(m):
|
'forma_id': q.attrib['quality'],
|
||||||
quality = m.group('quality')
|
'url': q.text,
|
||||||
if quality == 'hd':
|
'ext': 'flv',
|
||||||
return 2
|
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
|
||||||
else:
|
} for q in config.findall('./urls/url')]
|
||||||
return 1
|
self._sort_formats(formats)
|
||||||
# We pick the best quality
|
|
||||||
video_urls = sorted(video_urls, key=_key)
|
|
||||||
video_url = list(video_urls)[-1].group('url')
|
|
||||||
|
|
||||||
title = self._html_search_regex(r'<name>(.*?)</name>', config_xml, 'title')
|
|
||||||
thumbnail = self._html_search_regex(r'<firstThumbnailUrl>(.*?)</firstThumbnailUrl>',
|
|
||||||
config_xml, 'thumbnail')
|
|
||||||
return {'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _extract_liveweb(self, url, name, lang):
|
title = config.find('.//name').text
|
||||||
"""Extract form http://liveweb.arte.tv/"""
|
thumbnail = config.find('.//firstThumbnailUrl').text
|
||||||
webpage = self._download_webpage(url, name)
|
return {
|
||||||
video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, 'event id')
|
'id': video_id,
|
||||||
config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
|
'title': title,
|
||||||
video_id, 'Downloading information')
|
'thumbnail': thumbnail,
|
||||||
event_doc = config_doc.find('event')
|
'formats': formats,
|
||||||
url_node = event_doc.find('video').find('urlHd')
|
}
|
||||||
if url_node is None:
|
|
||||||
url_node = event_doc.find('urlSd')
|
|
||||||
|
|
||||||
return {'id': video_id,
|
|
||||||
'title': event_doc.find('name%s' % lang.capitalize()).text,
|
|
||||||
'url': url_node.text.replace('MP4', 'mp4'),
|
|
||||||
'ext': 'flv',
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlus7IE(InfoExtractor):
|
class ArteTVPlus7IE(InfoExtractor):
|
||||||
@@ -152,9 +78,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||||
json_info = self._download_webpage(json_url, video_id, 'Downloading info json')
|
info = self._download_json(json_url, video_id)
|
||||||
self.report_extraction(video_id)
|
|
||||||
info = json.loads(json_info)
|
|
||||||
player_info = info['videoJsonPlayer']
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
@@ -176,6 +100,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
l = 'F'
|
l = 'F'
|
||||||
elif lang == 'de':
|
elif lang == 'de':
|
||||||
l = 'A'
|
l = 'A'
|
||||||
|
else:
|
||||||
|
l = lang
|
||||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||||
# Some formats may not be in the same language as the url
|
# Some formats may not be in the same language as the url
|
||||||
@@ -305,3 +231,22 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
|||||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||||
|
IE_NAME = 'arte.tv:embed'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
http://www\.arte\.tv
|
||||||
|
/playerv2/embed\.php\?json_url=
|
||||||
|
(?P<json_url>
|
||||||
|
http://arte\.tv/papi/tvguide/videos/stream/player/
|
||||||
|
(?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
lang = mobj.group('lang')
|
||||||
|
json_url = mobj.group('json_url')
|
||||||
|
return self._extract_from_json_url(json_url, video_id, lang)
|
||||||
|
@@ -11,22 +11,24 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AUEngineIE(InfoExtractor):
|
class AUEngineIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
||||||
'file': 'lfvlytY6.mp4',
|
|
||||||
'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
|
'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'lfvlytY6',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
|
'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed\.php\?.*?file=([^&]+).*?'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>',
|
title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
|
||||||
webpage, 'title')
|
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
|
||||||
links = map(compat_urllib_parse.unquote, links)
|
links = map(compat_urllib_parse.unquote, links)
|
||||||
@@ -39,14 +41,15 @@ class AUEngineIE(InfoExtractor):
|
|||||||
elif '/videos/' in link:
|
elif '/videos/' in link:
|
||||||
video_url = link
|
video_url = link
|
||||||
if not video_url:
|
if not video_url:
|
||||||
raise ExtractorError(u'Could not find video URL')
|
raise ExtractorError('Could not find video URL')
|
||||||
ext = '.' + determine_ext(video_url)
|
ext = '.' + determine_ext(video_url)
|
||||||
if ext == title[-len(ext):]:
|
if ext == title[-len(ext):]:
|
||||||
title = title[:-len(ext)]
|
title = title[:-len(ext)]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
|
||||||
}
|
}
|
||||||
|
@@ -1,22 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .ooyala import OoyalaIE
|
|
||||||
|
|
||||||
|
|
||||||
class BloombergIE(InfoExtractor):
|
class BloombergIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||||
u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
|
'md5': '7bf08858ff7c203c870e8a6190e221e5',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
|
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||||
u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
|
'ext': 'flv',
|
||||||
},
|
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||||
u'params': {
|
'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
|
||||||
# Requires ffmpeg (m3u8 manifest)
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
embed_code = self._search_regex(
|
f4m_url = self._search_regex(
|
||||||
r'<source src="https?://[^/]+/[^/]+/[^/]+/([^/]+)', webpage,
|
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
||||||
'embed code')
|
'f4m url')
|
||||||
return OoyalaIE._build_url_result(embed_code)
|
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': name.split('-')[-1],
|
||||||
|
'title': title,
|
||||||
|
'url': f4m_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}
|
||||||
|
@@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
object_str = object_str.replace('<--', '<!--')
|
object_str = object_str.replace('<--', '<!--')
|
||||||
object_str = fix_xml_ampersands(object_str)
|
object_str = fix_xml_ampersands(object_str)
|
||||||
|
|
||||||
object_doc = xml.etree.ElementTree.fromstring(object_str)
|
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||||
|
|
||||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||||
if fv_el is not None:
|
if fv_el is not None:
|
||||||
|
@@ -2,39 +2,46 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class C56IE(InfoExtractor):
|
class C56IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
|
_VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)'
|
||||||
IE_NAME = '56.com'
|
IE_NAME = '56.com'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html',
|
||||||
'file': '93440716.flv',
|
|
||||||
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
'md5': 'e59995ac63d0457783ea05f93f12a866',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '93440716',
|
||||||
|
'ext': 'flv',
|
||||||
'title': '网事知多少 第32期:车怒',
|
'title': '网事知多少 第32期:车怒',
|
||||||
|
'duration': 283.813,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
||||||
text_id = mobj.group('textid')
|
text_id = mobj.group('textid')
|
||||||
info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
|
|
||||||
text_id, 'Downloading video info')
|
page = self._download_json(
|
||||||
info = json.loads(info_page)['info']
|
'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info')
|
||||||
formats = [{
|
|
||||||
'format_id': f['type'],
|
info = page['info']
|
||||||
'filesize': int(f['filesize']),
|
|
||||||
'url': f['url']
|
formats = [
|
||||||
} for f in info['rfiles']]
|
{
|
||||||
|
'format_id': f['type'],
|
||||||
|
'filesize': int(f['filesize']),
|
||||||
|
'url': f['url']
|
||||||
|
} for f in info['rfiles']
|
||||||
|
]
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': info['vid'],
|
'id': info['vid'],
|
||||||
'title': info['Subject'],
|
'title': info['Subject'],
|
||||||
|
'duration': int(info['duration']) / 1000.0,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': info.get('bimg') or info.get('img'),
|
'thumbnail': info.get('bimg') or info.get('img'),
|
||||||
}
|
}
|
||||||
|
@@ -28,7 +28,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
video_id = mobj.groupdict().get('id')
|
video_id = mobj.groupdict().get('id')
|
||||||
if video_id is None:
|
if video_id is None:
|
||||||
webpage = self._download_webpage(url, mobj.group('path'))
|
webpage = self._download_webpage(url, mobj.group('path'))
|
||||||
video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
|
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, u'video id')
|
||||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||||
doc = self._download_xml(info_url,video_id,
|
doc = self._download_xml(info_url,video_id,
|
||||||
u'Downloading video info')
|
u'Downloading video info')
|
||||||
|
@@ -1,22 +1,28 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ClipfishIE(InfoExtractor):
|
class ClipfishIE(InfoExtractor):
|
||||||
IE_NAME = u'clipfish'
|
IE_NAME = 'clipfish'
|
||||||
|
|
||||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||||
u'file': u'3966754.mp4',
|
'md5': '2521cd644e862936cf2e698206e47385',
|
||||||
u'md5': u'2521cd644e862936cf2e698206e47385',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '3966754',
|
||||||
u'title': u'FIFA 14 - E3 2013 Trailer',
|
'ext': 'mp4',
|
||||||
u'duration': 82,
|
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||||
|
'duration': 82,
|
||||||
},
|
},
|
||||||
u'skip': 'Blocked in the US'
|
u'skip': 'Blocked in the US'
|
||||||
}
|
}
|
||||||
@@ -33,21 +39,10 @@ class ClipfishIE(InfoExtractor):
|
|||||||
video_url = doc.find('filename').text
|
video_url = doc.find('filename').text
|
||||||
if video_url is None:
|
if video_url is None:
|
||||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||||
raise ExtractorError(u'Cannot find video URL in document %r' %
|
raise ExtractorError('Cannot find video URL in document %r' %
|
||||||
xml_bytes)
|
xml_bytes)
|
||||||
thumbnail = doc.find('imageurl').text
|
thumbnail = doc.find('imageurl').text
|
||||||
duration_str = doc.find('duration').text
|
duration = parse_duration(doc.find('duration').text)
|
||||||
m = re.match(
|
|
||||||
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
|
||||||
duration_str)
|
|
||||||
if m:
|
|
||||||
duration = (
|
|
||||||
(int(m.group('hours')) * 60 * 60) +
|
|
||||||
(int(m.group('minutes')) * 60) +
|
|
||||||
(int(m.group('seconds')))
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
duration = None
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,13 +13,14 @@ class ClipsyndicateIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
_VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',
|
||||||
u'md5': u'4d7d549451bad625e0ff3d7bd56d776c',
|
'md5': '4d7d549451bad625e0ff3d7bd56d776c',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'4629301',
|
'id': '4629301',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'Brick Briscoe',
|
'title': 'Brick Briscoe',
|
||||||
u'duration': 612,
|
'duration': 612,
|
||||||
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,13 +29,13 @@ class ClipsyndicateIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
js_player = self._download_webpage(
|
js_player = self._download_webpage(
|
||||||
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,
|
||||||
video_id, u'Downlaoding player')
|
video_id, 'Downlaoding player')
|
||||||
# it includes a required token
|
# it includes a required token
|
||||||
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, u'flvars')
|
flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars')
|
||||||
|
|
||||||
pdoc = self._download_xml(
|
pdoc = self._download_xml(
|
||||||
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars,
|
||||||
video_id, u'Downloading video info',
|
video_id, 'Downloading video info',
|
||||||
transform_source=fix_xml_ampersands)
|
transform_source=fix_xml_ampersands)
|
||||||
|
|
||||||
track_doc = pdoc.find('trackList/track')
|
track_doc = pdoc.find('trackList/track')
|
||||||
|
70
youtube_dl/extractor/cnet.py
Normal file
70
youtube_dl/extractor/cnet.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CNETIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||||
|
'md5': '041233212a0d06b179c87cbcca1577b8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||||
|
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||||
|
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
|
||||||
|
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
|
||||||
|
'uploader': 'Sarah Mitroff',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
data_json = self._html_search_regex(
|
||||||
|
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
|
||||||
|
webpage, 'data json')
|
||||||
|
data = json.loads(data_json)
|
||||||
|
vdata = data['video']
|
||||||
|
|
||||||
|
video_id = vdata['id']
|
||||||
|
title = vdata['headline']
|
||||||
|
description = vdata.get('dek')
|
||||||
|
thumbnail = vdata.get('image', {}).get('path')
|
||||||
|
author = vdata.get('author')
|
||||||
|
if author:
|
||||||
|
uploader = '%s %s' % (author['firstName'], author['lastName'])
|
||||||
|
uploader_id = author.get('email')
|
||||||
|
else:
|
||||||
|
uploader = None
|
||||||
|
uploader_id = None
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': '%s-%s-%s' % (
|
||||||
|
f['type'], f['format'],
|
||||||
|
int_or_none(f.get('bitrate'), 1000, default='')),
|
||||||
|
'url': f['uri'],
|
||||||
|
'tbr': int_or_none(f.get('bitrate'), 1000),
|
||||||
|
} for f in vdata['files']['data']]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@@ -7,8 +7,8 @@ from .mtv import MTVServicesInfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -32,31 +32,34 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class ComedyCentralShowsIE(InfoExtractor):
|
class ComedyCentralShowsIE(InfoExtractor):
|
||||||
IE_DESC = 'The Daily Show / Colbert Report'
|
IE_DESC = 'The Daily Show / The Colbert Report'
|
||||||
# urls can be abbreviations like :thedailyshow or :colbert
|
# urls can be abbreviations like :thedailyshow or :colbert
|
||||||
# urls for episodes like:
|
# urls for episodes like:
|
||||||
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
# or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
# or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
|
||||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||||
_VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
|
||||||
|(https?://)?(www\.)?
|
|https?://(:www\.)?
|
||||||
(?P<showname>thedailyshow|colbertnation)\.com/
|
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||||
(full-episodes/(?P<episode>.*)|
|
(full-episodes/(?P<episode>.*)|
|
||||||
(?P<clip>
|
(?P<clip>
|
||||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
(?:(?:guests/[^/]+|videos)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||||
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||||
|
)|
|
||||||
(?P<interview>
|
(?P<interview>
|
||||||
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
|
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
||||||
$"""
|
(?:[?#].*|$)'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||||
'file': '422212.mp4',
|
|
||||||
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"upload_date": "20121214",
|
'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
|
||||||
"description": "Kristen Stewart",
|
'ext': 'mp4',
|
||||||
"uploader": "thedailyshow",
|
'upload_date': '20121213',
|
||||||
"title": "thedailyshow-kristen-stewart part 1"
|
'description': 'Kristen Stewart learns to let loose in "On the Road."',
|
||||||
|
'uploader': 'thedailyshow',
|
||||||
|
'title': 'thedailyshow kristen-stewart part 1',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,11 +82,6 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
'400': (384, 216),
|
'400': (384, 216),
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _transform_rtmp_url(rtmp_video_url):
|
def _transform_rtmp_url(rtmp_video_url):
|
||||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
|
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
|
||||||
@@ -99,14 +97,16 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
|
|
||||||
if mobj.group('shortname'):
|
if mobj.group('shortname'):
|
||||||
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
if mobj.group('shortname') in ('tds', 'thedailyshow'):
|
||||||
url = 'http://www.thedailyshow.com/full-episodes/'
|
url = 'http://thedailyshow.cc.com/full-episodes/'
|
||||||
else:
|
else:
|
||||||
url = 'http://www.colbertnation.com/full-episodes/'
|
url = 'http://thecolbertreport.cc.com/full-episodes/'
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
assert mobj is not None
|
assert mobj is not None
|
||||||
|
|
||||||
if mobj.group('clip'):
|
if mobj.group('clip'):
|
||||||
if mobj.group('showname') == 'thedailyshow':
|
if mobj.group('videotitle'):
|
||||||
|
epTitle = mobj.group('videotitle')
|
||||||
|
elif mobj.group('showname') == 'thedailyshow':
|
||||||
epTitle = mobj.group('tdstitle')
|
epTitle = mobj.group('tdstitle')
|
||||||
else:
|
else:
|
||||||
epTitle = mobj.group('cntitle')
|
epTitle = mobj.group('cntitle')
|
||||||
@@ -120,9 +120,9 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
epTitle = mobj.group('showname')
|
epTitle = mobj.group('showname')
|
||||||
else:
|
else:
|
||||||
epTitle = mobj.group('episode')
|
epTitle = mobj.group('episode')
|
||||||
|
show_name = mobj.group('showname')
|
||||||
|
|
||||||
self.report_extraction(epTitle)
|
webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
|
||||||
webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
|
|
||||||
if dlNewest:
|
if dlNewest:
|
||||||
url = htmlHandle.geturl()
|
url = htmlHandle.geturl()
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
@@ -130,71 +130,86 @@ class ComedyCentralShowsIE(InfoExtractor):
|
|||||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||||
if mobj.group('episode') == '':
|
if mobj.group('episode') == '':
|
||||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||||
epTitle = mobj.group('episode')
|
epTitle = mobj.group('episode').rpartition('/')[-1]
|
||||||
|
|
||||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||||
|
|
||||||
if len(mMovieParams) == 0:
|
if len(mMovieParams) == 0:
|
||||||
# The Colbert Report embeds the information in a without
|
# The Colbert Report embeds the information in a without
|
||||||
# a URL prefix; so extract the alternate reference
|
# a URL prefix; so extract the alternate reference
|
||||||
# and then add the URL prefix manually.
|
# and then add the URL prefix manually.
|
||||||
|
|
||||||
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
|
altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
|
||||||
if len(altMovieParams) == 0:
|
if len(altMovieParams) == 0:
|
||||||
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
raise ExtractorError('unable to find Flash URL in webpage ' + url)
|
||||||
else:
|
else:
|
||||||
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
|
||||||
|
|
||||||
uri = mMovieParams[0][1]
|
uri = mMovieParams[0][1]
|
||||||
indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
|
# Correct cc.com in uri
|
||||||
idoc = self._download_xml(indexUrl, epTitle,
|
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
|
||||||
'Downloading show index',
|
|
||||||
'unable to download episode index')
|
|
||||||
|
|
||||||
results = []
|
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
|
||||||
|
idoc = self._download_xml(
|
||||||
|
index_url, epTitle,
|
||||||
|
'Downloading show index', 'Unable to download episode index')
|
||||||
|
|
||||||
itemEls = idoc.findall('.//item')
|
title = idoc.find('./channel/title').text
|
||||||
for partNum,itemEl in enumerate(itemEls):
|
description = idoc.find('./channel/description').text
|
||||||
mediaId = itemEl.findall('./guid')[0].text
|
|
||||||
shortMediaId = mediaId.split(':')[-1]
|
|
||||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
|
||||||
officialTitle = itemEl.findall('./title')[0].text
|
|
||||||
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
|
||||||
|
|
||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
entries = []
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
item_els = idoc.findall('.//item')
|
||||||
cdoc = self._download_xml(configUrl, epTitle,
|
for part_num, itemEl in enumerate(item_els):
|
||||||
'Downloading configuration for %s' % shortMediaId)
|
upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||||
|
thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
|
||||||
|
|
||||||
|
content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
|
||||||
|
duration = float_or_none(content.attrib.get('duration'))
|
||||||
|
mediagen_url = content.attrib['url']
|
||||||
|
guid = itemEl.find('./guid').text.rpartition(':')[-1]
|
||||||
|
|
||||||
|
cdoc = self._download_xml(
|
||||||
|
mediagen_url, epTitle,
|
||||||
|
'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
|
||||||
|
|
||||||
turls = []
|
turls = []
|
||||||
for rendition in cdoc.findall('.//rendition'):
|
for rendition in cdoc.findall('.//rendition'):
|
||||||
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
|
||||||
turls.append(finfo)
|
turls.append(finfo)
|
||||||
|
|
||||||
if len(turls) == 0:
|
|
||||||
self._downloader.report_error('unable to download ' + mediaId + ': No videos found')
|
|
||||||
continue
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format, rtmp_video_url in turls:
|
for format, rtmp_video_url in turls:
|
||||||
w, h = self._video_dimensions.get(format, (None, None))
|
w, h = self._video_dimensions.get(format, (None, None))
|
||||||
formats.append({
|
formats.append({
|
||||||
|
'format_id': 'vhttp-%s' % format,
|
||||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||||
'ext': self._video_extensions.get(format, 'mp4'),
|
'ext': self._video_extensions.get(format, 'mp4'),
|
||||||
'format_id': format,
|
|
||||||
'height': h,
|
'height': h,
|
||||||
'width': w,
|
'width': w,
|
||||||
})
|
})
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'rtmp-%s' % format,
|
||||||
|
'url': rtmp_video_url,
|
||||||
|
'ext': self._video_extensions.get(format, 'mp4'),
|
||||||
|
'height': h,
|
||||||
|
'width': w,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
effTitle = showId + '-' + epTitle + ' part ' + compat_str(partNum+1)
|
virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
|
||||||
results.append({
|
entries.append({
|
||||||
'id': shortMediaId,
|
'id': guid,
|
||||||
|
'title': virtual_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': showId,
|
'uploader': show_name,
|
||||||
'upload_date': officialDate,
|
'upload_date': upload_date,
|
||||||
'title': effTitle,
|
'duration': duration,
|
||||||
'thumbnail': None,
|
'thumbnail': thumbnail,
|
||||||
'description': compat_str(officialTitle),
|
'description': description,
|
||||||
})
|
})
|
||||||
|
|
||||||
return results
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': show_name + ' ' + title,
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
@@ -252,6 +252,17 @@ class InfoExtractor(object):
|
|||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
|
||||||
content = webpage_bytes.decode(encoding, 'replace')
|
content = webpage_bytes.decode(encoding, 'replace')
|
||||||
|
|
||||||
|
if (u'<title>Access to this site is blocked</title>' in content and
|
||||||
|
u'Websense' in content[:512]):
|
||||||
|
msg = u'Access to this webpage has been blocked by Websense filtering software in your network.'
|
||||||
|
blocked_iframe = self._html_search_regex(
|
||||||
|
r'<iframe src="([^"]+)"', content,
|
||||||
|
u'Websense information URL', default=None)
|
||||||
|
if blocked_iframe:
|
||||||
|
msg += u' Visit %s for more details' % blocked_iframe
|
||||||
|
raise ExtractorError(msg, expected=True)
|
||||||
|
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||||
|
@@ -4,6 +4,7 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
)
|
)
|
||||||
@@ -54,18 +55,29 @@ class CSpanIE(InfoExtractor):
|
|||||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||||
data = self._download_json(info_url, video_id)
|
data = self._download_json(info_url, video_id)
|
||||||
|
|
||||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
doc = self._download_xml(
|
||||||
|
'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||||
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
def find_string(s):
|
title = find_xpath_attr(doc, './/string', 'name', 'title').text
|
||||||
return find_xpath_attr(doc, './/string', 'name', s).text
|
thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
|
||||||
|
|
||||||
|
files = data['video']['files']
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'id': '%s_%d' % (video_id, partnum + 1),
|
||||||
|
'title': (
|
||||||
|
title if len(files) == 1 else
|
||||||
|
'%s part %d' % (title, partnum + 1)),
|
||||||
|
'url': unescapeHTML(f['path']['#text']),
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': int_or_none(f.get('length', {}).get('#text')),
|
||||||
|
} for partnum, f in enumerate(files)]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'title': title,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': find_string('title'),
|
|
||||||
'url': url,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': find_string('poster'),
|
|
||||||
}
|
}
|
||||||
|
@@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||||
'file': '614784.mp4',
|
|
||||||
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '614784',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
'title': 'MythBusters: Mission Impossible Outtakes',
|
||||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||||
@@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for f in info['mp4']:
|
for f in info['mp4']:
|
||||||
formats.append(
|
formats.append(
|
||||||
{'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])})
|
{'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': info['contentId'],
|
'id': info['contentId'],
|
||||||
|
@@ -1,23 +1,25 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
determine_ext
|
|
||||||
)
|
)
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class EHowIE(InfoExtractor):
|
class EHowIE(InfoExtractor):
|
||||||
IE_NAME = u'eHow'
|
IE_NAME = 'eHow'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?ehow\.com/[^/_?]*_(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
|
'url': 'http://www.ehow.com/video_12245069_hardwood-flooring-basics.html',
|
||||||
u'file': u'12245069.flv',
|
'md5': '9809b4e3f115ae2088440bcb4efbf371',
|
||||||
u'md5': u'9809b4e3f115ae2088440bcb4efbf371',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '12245069',
|
||||||
u"title": u"Hardwood Flooring Basics",
|
'ext': 'flv',
|
||||||
u"description": u"Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...",
|
'title': 'Hardwood Flooring Basics',
|
||||||
u"uploader": u"Erick Nathan"
|
'description': 'Hardwood flooring may be time consuming, but its ultimately a pretty straightforward concept. Learn about hardwood flooring basics with help from a hardware flooring business owner in this free video...',
|
||||||
|
'uploader': 'Erick Nathan',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,21 +28,16 @@ class EHowIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||||
webpage, u'video URL')
|
webpage, 'video URL')
|
||||||
final_url = compat_urllib_parse.unquote(video_url)
|
final_url = compat_urllib_parse.unquote(video_url)
|
||||||
uploader = self._search_regex(r'<meta name="uploader" content="(.+?)" />',
|
uploader = self._html_search_meta('uploader', webpage)
|
||||||
webpage, u'uploader')
|
|
||||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||||
ext = determine_ext(final_url)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'id': video_id,
|
||||||
'id': video_id,
|
'url': final_url,
|
||||||
'url': final_url,
|
'title': title,
|
||||||
'ext': ext,
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'title': title,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
77
youtube_dl/extractor/franceculture.py
Normal file
77
youtube_dl/extractor/franceculture.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FranceCultureIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?P<baseurl>http://(?:www\.)?franceculture\.fr/)player/reecouter\?play=(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.franceculture.fr/player/reecouter?play=4795174',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4795174',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Rendez-vous au pays des geeks',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'uploader': 'Colette Fellous',
|
||||||
|
'upload_date': '20140301',
|
||||||
|
'duration': 3601,
|
||||||
|
'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$',
|
||||||
|
'description': 'Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats-Unis dans la S ...',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
baseurl = mobj.group('baseurl')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
params_code = self._search_regex(
|
||||||
|
r"<param name='movie' value='/sites/all/modules/rf/rf_player/swf/loader.swf\?([^']+)' />",
|
||||||
|
webpage, 'parameter code')
|
||||||
|
params = compat_parse_qs(params_code)
|
||||||
|
video_url = compat_urlparse.urljoin(baseurl, params['urlAOD'][0])
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h1 class="title[^"]+">(.+?)</h1>', webpage, 'title')
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
thumbnail_part = self._html_search_regex(
|
||||||
|
r'(?s)<div id="emission".*?<img src="([^"]+)"', webpage,
|
||||||
|
'thumbnail', fatal=False)
|
||||||
|
if thumbnail_part is None:
|
||||||
|
thumbnail = None
|
||||||
|
else:
|
||||||
|
thumbnail = compat_urlparse.urljoin(baseurl, thumbnail_part)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<p class="desc">(.*?)</p>', webpage, 'description')
|
||||||
|
|
||||||
|
info = json.loads(params['infoData'][0])[0]
|
||||||
|
duration = info.get('media_length')
|
||||||
|
upload_date_candidate = info.get('media_section5')
|
||||||
|
upload_date = (
|
||||||
|
upload_date_candidate
|
||||||
|
if (upload_date_candidate is not None and
|
||||||
|
re.match(r'[0-9]{8}$', upload_date_candidate))
|
||||||
|
else None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'vcodec': 'none' if video_url.lower().endswith('.mp3') else None,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
}
|
@@ -25,6 +25,7 @@ from ..utils import (
|
|||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
|
from .smotri import SmotriIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@@ -81,6 +82,17 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'add_ie': ['Brightcove'],
|
'add_ie': ['Brightcove'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.championat.com/video/football/v/87/87499.html',
|
||||||
|
'md5': 'fb973ecf6e4a78a67453647444222983',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3414141473001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Видео. Удаление Дзагоева (ЦСКА)',
|
||||||
|
'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
|
||||||
|
'uploader': 'Championat',
|
||||||
|
},
|
||||||
|
},
|
||||||
# Direct link to a video
|
# Direct link to a video
|
||||||
{
|
{
|
||||||
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||||
@@ -197,6 +209,36 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'No description',
|
'description': 'No description',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# arte embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
|
||||||
|
'md5': '7653032cbb25bf6c80d80f217055fa43',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '048195-004_PLUS7-F',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'X:enius',
|
||||||
|
'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
|
||||||
|
'upload_date': '20140320',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Requires rtmpdump'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# smotri embed
|
||||||
|
{
|
||||||
|
'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
|
||||||
|
'md5': 'ec40048448e9284c9a1de77bb188108b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v27008541fad',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Крым и Севастополь вошли в состав России',
|
||||||
|
'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
|
||||||
|
'duration': 900,
|
||||||
|
'upload_date': '20140318',
|
||||||
|
'uploader': 'rbctv_2012_4',
|
||||||
|
'uploader_id': 'rbctv_2012_4',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@@ -285,13 +327,16 @@ class GenericIE(InfoExtractor):
|
|||||||
if not parsed_url.scheme:
|
if not parsed_url.scheme:
|
||||||
default_search = self._downloader.params.get('default_search')
|
default_search = self._downloader.params.get('default_search')
|
||||||
if default_search is None:
|
if default_search is None:
|
||||||
default_search = 'auto'
|
default_search = 'auto_warning'
|
||||||
|
|
||||||
if default_search == 'auto':
|
if default_search in ('auto', 'auto_warning'):
|
||||||
if '/' in url:
|
if '/' in url:
|
||||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||||
return self.url_result('http://' + url)
|
return self.url_result('http://' + url)
|
||||||
else:
|
else:
|
||||||
|
if default_search == 'auto_warning':
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
|
||||||
return self.url_result('ytsearch:' + url)
|
return self.url_result('ytsearch:' + url)
|
||||||
else:
|
else:
|
||||||
assert ':' in default_search
|
assert ':' in default_search
|
||||||
@@ -525,6 +570,18 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'TED')
|
return self.url_result(mobj.group('url'), 'TED')
|
||||||
|
|
||||||
|
# Look for embedded arte.tv player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||||
|
|
||||||
|
# Look for embedded smotri.com player
|
||||||
|
smotri_url = SmotriIE._extract_url(webpage)
|
||||||
|
if smotri_url:
|
||||||
|
return self.url_result(smotri_url, 'Smotri')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
@@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
|
'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
|
||||||
'file': '52dd3e4b02a7602131000677.mp4',
|
|
||||||
'md5': '55f5e8981c1c80a64706a44b74833de8',
|
'md5': '55f5e8981c1c80a64706a44b74833de8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '52dd3e4b02a7602131000677',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Legalese It! with @MikeSacksHP',
|
'title': 'Legalese It! with @MikeSacksHP',
|
||||||
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
|
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
|
||||||
'duration': 1549,
|
'duration': 1549,
|
||||||
|
@@ -1,10 +1,8 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class IGNIE(InfoExtractor):
|
class IGNIE(InfoExtractor):
|
||||||
@@ -14,52 +12,57 @@ class IGNIE(InfoExtractor):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)'
|
||||||
IE_NAME = u'ign.com'
|
IE_NAME = 'ign.com'
|
||||||
|
|
||||||
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
|
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
|
||||||
_DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
|
_DESCRIPTION_RE = [
|
||||||
r'id="my_show_video">.*?<p>(.*?)</p>',
|
r'<span class="page-object-description">(.+?)</span>',
|
||||||
]
|
r'id="my_show_video">.*?<p>(.*?)</p>',
|
||||||
|
]
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||||
u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
|
'md5': 'eac8bdc1890980122c3b66f14bdd02e9',
|
||||||
u'md5': u'eac8bdc1890980122c3b66f14bdd02e9',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '8f862beef863986b2785559b9e1aa599',
|
||||||
u'title': u'The Last of Us Review',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
'title': 'The Last of Us Review',
|
||||||
|
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
u'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
u'file': u'5ebbd138523268b93c9141af17bec937.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '5ebbd138523268b93c9141af17bec937',
|
||||||
u'title': u'GTA 5 Video Review',
|
'ext': 'mp4',
|
||||||
u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
'title': 'GTA 5 Video Review',
|
||||||
|
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'file': u'638672ee848ae4ff108df2a296418ee2.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||||
u'title': u'26 Twisted Moments from GTA 5 in Slow Motion',
|
'ext': 'mp4',
|
||||||
u'description': u'The twisted beauty of GTA 5 in stunning slow motion.',
|
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
||||||
|
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _find_video_id(self, webpage):
|
def _find_video_id(self, webpage):
|
||||||
res_id = [r'data-video-id="(.+?)"',
|
res_id = [
|
||||||
r'<object id="vid_(.+?)"',
|
r'data-video-id="(.+?)"',
|
||||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
r'<object id="vid_(.+?)"',
|
||||||
]
|
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||||
|
]
|
||||||
return self._search_regex(res_id, webpage, 'video id')
|
return self._search_regex(res_id, webpage, 'video id')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -68,7 +71,7 @@ class IGNIE(InfoExtractor):
|
|||||||
page_type = mobj.group('type')
|
page_type = mobj.group('type')
|
||||||
webpage = self._download_webpage(url, name_or_id)
|
webpage = self._download_webpage(url, name_or_id)
|
||||||
if page_type == 'articles':
|
if page_type == 'articles':
|
||||||
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
|
video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, 'video url')
|
||||||
return self.url_result(video_url, ie='IGN')
|
return self.url_result(video_url, ie='IGN')
|
||||||
elif page_type != 'video':
|
elif page_type != 'video':
|
||||||
multiple_urls = re.findall(
|
multiple_urls = re.findall(
|
||||||
@@ -80,41 +83,37 @@ class IGNIE(InfoExtractor):
|
|||||||
video_id = self._find_video_id(webpage)
|
video_id = self._find_video_id(webpage)
|
||||||
result = self._get_video_info(video_id)
|
result = self._get_video_info(video_id)
|
||||||
description = self._html_search_regex(self._DESCRIPTION_RE,
|
description = self._html_search_regex(self._DESCRIPTION_RE,
|
||||||
webpage, 'video description',
|
webpage, 'video description', flags=re.DOTALL)
|
||||||
flags=re.DOTALL)
|
|
||||||
result['description'] = description
|
result['description'] = description
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _get_video_info(self, video_id):
|
def _get_video_info(self, video_id):
|
||||||
config_url = self._CONFIG_URL_TEMPLATE % video_id
|
config_url = self._CONFIG_URL_TEMPLATE % video_id
|
||||||
config = json.loads(self._download_webpage(config_url, video_id,
|
config = self._download_json(config_url, video_id)
|
||||||
u'Downloading video info'))
|
|
||||||
media = config['playlist']['media']
|
media = config['playlist']['media']
|
||||||
video_url = media['url']
|
|
||||||
|
|
||||||
return {'id': media['metadata']['videoId'],
|
return {
|
||||||
'url': video_url,
|
'id': media['metadata']['videoId'],
|
||||||
'ext': determine_ext(video_url),
|
'url': media['url'],
|
||||||
'title': media['metadata']['title'],
|
'title': media['metadata']['title'],
|
||||||
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
|
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class OneUPIE(IGNIE):
|
class OneUPIE(IGNIE):
|
||||||
"""Extractor for 1up.com, it uses the ign videos system."""
|
|
||||||
|
|
||||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||||
IE_NAME = '1up.com'
|
IE_NAME = '1up.com'
|
||||||
|
|
||||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://gamevideos.1up.com/video/id/34976',
|
'url': 'http://gamevideos.1up.com/video/id/34976',
|
||||||
u'file': u'34976.mp4',
|
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
||||||
u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '34976',
|
||||||
u'title': u'Sniper Elite V2 - Trailer',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
'title': 'Sniper Elite V2 - Trailer',
|
||||||
|
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,7 +122,6 @@ class OneUPIE(IGNIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
id = mobj.group('name_or_id')
|
|
||||||
result = super(OneUPIE, self)._real_extract(url)
|
result = super(OneUPIE, self)._real_extract(url)
|
||||||
result['id'] = id
|
result['id'] = mobj.group('name_or_id')
|
||||||
return result
|
return result
|
||||||
|
@@ -1,37 +1,39 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class KickStarterIE(InfoExtractor):
|
class KickStarterIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>\d*)/.*'
|
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
|
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
|
||||||
u"file": u"1404461844.mp4",
|
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
||||||
u"md5": u"c81addca81327ffa66c642b5d8b08cab",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '1404461844',
|
||||||
u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling",
|
'ext': 'mp4',
|
||||||
|
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
|
||||||
|
'description': 'A unique motocross documentary that examines the '
|
||||||
|
'life and mind of one of sports most elite athletes: Josh Grant.',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('id')
|
video_id = m.group('id')
|
||||||
webpage_src = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(r'data-video="(.*?)">',
|
video_url = self._search_regex(r'data-video-url="(.*?)"',
|
||||||
webpage_src, u'video URL')
|
webpage, 'video URL')
|
||||||
if 'mp4' in video_url:
|
video_title = self._html_search_regex(r'<title>(.*?)</title>',
|
||||||
ext = 'mp4'
|
webpage, 'title').rpartition('— Kickstarter')[0].strip()
|
||||||
else:
|
|
||||||
ext = 'flv'
|
|
||||||
video_title = self._html_search_regex(r"<title>(.*?)</title>",
|
|
||||||
webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip()
|
|
||||||
|
|
||||||
results = [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': ext,
|
'description': self._og_search_description(webpage),
|
||||||
}]
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
return results
|
}
|
||||||
|
@@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
||||||
'file': '3698222.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3698222',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
|
'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
|
||||||
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
||||||
'duration': 221,
|
'duration': 221,
|
||||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
class MooshareIE(InfoExtractor):
|
class MooshareIE(InfoExtractor):
|
||||||
IE_NAME = 'mooshare'
|
IE_NAME = 'mooshare'
|
||||||
IE_DESC = 'Mooshare.biz'
|
IE_DESC = 'Mooshare.biz'
|
||||||
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
|
_VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
|
75
youtube_dl/extractor/musicplayon.py
Normal file
75
youtube_dl/extractor/musicplayon.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class MusicPlayOnIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://en.musicplayon.com/play?v=433377',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '433377',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
|
||||||
|
'description': 'Rick Ross Interview On Chelsea Lately',
|
||||||
|
'duration': 342,
|
||||||
|
'uploader': 'ultrafish',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(page)
|
||||||
|
description = self._og_search_description(page)
|
||||||
|
thumbnail = self._og_search_thumbnail(page)
|
||||||
|
duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
|
||||||
|
view_count = self._og_search_property('count', page, fatal=False)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<div>by <a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
|
||||||
|
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
manifest = self._download_webpage(
|
||||||
|
'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
|
||||||
|
|
||||||
|
for entry in manifest.split('#')[1:]:
|
||||||
|
if entry.startswith('EXT-X-STREAM-INF:'):
|
||||||
|
meta, url, _ = entry.split('\n')
|
||||||
|
params = dict(param.split('=') for param in meta.split(',')[1:])
|
||||||
|
formats.append({
|
||||||
|
'url': url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'tbr': int(params['BANDWIDTH']),
|
||||||
|
'width': int(params['RESOLUTION'].split('x')[1]),
|
||||||
|
'height': int(params['RESOLUTION'].split('x')[-1]),
|
||||||
|
'format_note': params['NAME'].replace('"', '').strip(),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'duration': int_or_none(duration),
|
||||||
|
'view_count': int_or_none(view_count),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -6,12 +6,13 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class NBAIE(InfoExtractor):
|
class NBAIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||||
'file': u'0021200253-okc-bkn-recap.nba.mp4',
|
|
||||||
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
'md5': u'c0edcfc37607344e2ff8f13c378c88a4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '0021200253-okc-bkn-recap.nba',
|
||||||
|
'ext': 'mp4',
|
||||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||||
'title': 'Thunder vs. Nets',
|
'title': 'Thunder vs. Nets',
|
||||||
},
|
},
|
||||||
@@ -19,7 +20,7 @@ class NBAIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
@@ -33,7 +34,6 @@ class NBAIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': shortened_video_id,
|
'id': shortened_video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
}
|
}
|
||||||
|
@@ -1,12 +1,10 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
@@ -18,57 +16,54 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NiconicoIE(InfoExtractor):
|
class NiconicoIE(InfoExtractor):
|
||||||
IE_NAME = u'niconico'
|
IE_NAME = 'niconico'
|
||||||
IE_DESC = u'ニコニコ動画'
|
IE_DESC = 'ニコニコ動画'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
u'file': u'sm22312215.mp4',
|
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||||
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'sm22312215',
|
||||||
u'title': u'Big Buck Bunny',
|
'ext': 'mp4',
|
||||||
u'uploader': u'takuya0301',
|
'title': 'Big Buck Bunny',
|
||||||
u'uploader_id': u'2698420',
|
'uploader': 'takuya0301',
|
||||||
u'upload_date': u'20131123',
|
'uploader_id': '2698420',
|
||||||
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
'upload_date': '20131123',
|
||||||
|
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
u'username': u'ydl.niconico@gmail.com',
|
'username': 'ydl.niconico@gmail.com',
|
||||||
u'password': u'youtube-dl',
|
'password': 'youtube-dl',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
# If True it will raise an error if no login info is provided
|
|
||||||
_LOGIN_REQUIRED = True
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
# No authentication to be performed
|
|
||||||
if username is None:
|
if username is None:
|
||||||
if self._LOGIN_REQUIRED:
|
# Login is required
|
||||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
return False
|
|
||||||
|
|
||||||
# Log in
|
# Log in
|
||||||
login_form_strs = {
|
login_form_strs = {
|
||||||
u'mail': username,
|
'mail': username,
|
||||||
u'password': password,
|
'password': password,
|
||||||
}
|
}
|
||||||
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||||
# chokes on unicode
|
# chokes on unicode
|
||||||
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
||||||
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
u'https://secure.nicovideo.jp/secure/login', login_data)
|
'https://secure.nicovideo.jp/secure/login', login_data)
|
||||||
login_results = self._download_webpage(
|
login_results = self._download_webpage(
|
||||||
request, u'', note=u'Logging in', errnote=u'Unable to log in')
|
request, None, note='Logging in', errnote='Unable to log in')
|
||||||
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -82,12 +77,12 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
video_info = self._download_xml(
|
video_info = self._download_xml(
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
note=u'Downloading video info page')
|
note='Downloading video info page')
|
||||||
|
|
||||||
# Get flv info
|
# Get flv info
|
||||||
flv_info_webpage = self._download_webpage(
|
flv_info_webpage = self._download_webpage(
|
||||||
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||||
video_id, u'Downloading flv info')
|
video_id, 'Downloading flv info')
|
||||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
@@ -106,22 +101,22 @@ class NiconicoIE(InfoExtractor):
|
|||||||
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||||
try:
|
try:
|
||||||
user_info = self._download_xml(
|
user_info = self._download_xml(
|
||||||
url, video_id, note=u'Downloading user information')
|
url, video_id, note='Downloading user information')
|
||||||
video_uploader = user_info.find('.//nickname').text
|
video_uploader = user_info.find('.//nickname').text
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_real_url,
|
'url': video_real_url,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': video_extension,
|
'ext': video_extension,
|
||||||
'format': video_format,
|
'format': video_format,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
'view_count': video_view_count,
|
'view_count': video_view_count,
|
||||||
'webpage_url': video_webpage_url,
|
'webpage_url': video_webpage_url,
|
||||||
}
|
}
|
||||||
|
157
youtube_dl/extractor/ntv.py
Normal file
157
youtube_dl/extractor/ntv.py
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unescapeHTML
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.ntv.ru/novosti/863142/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '746000',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||||
|
'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
|
||||||
|
'duration': 136,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '750370',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||||
|
'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
|
||||||
|
'duration': 172,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '747480',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
||||||
|
'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
|
||||||
|
'duration': 1496,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '750783',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Остросюжетный фильм «Кома» 4 апреля вечером на НТВ',
|
||||||
|
'description': 'Остросюжетный фильм «Кома» 4 апреля вечером на НТВ',
|
||||||
|
'duration': 28,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '751482',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '«Дело врачей»: «Деревце жизни»',
|
||||||
|
'description': '«Дело врачей»: «Деревце жизни»',
|
||||||
|
'duration': 2590,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
_VIDEO_ID_REGEXES = [
|
||||||
|
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||||
|
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||||
|
r'<video restriction[^>]+><key>(\d+)</key>'
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
for pattern in self._VIDEO_ID_REGEXES:
|
||||||
|
mobj = re.search(pattern, page)
|
||||||
|
if mobj:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not mobj:
|
||||||
|
raise ExtractorError('No media links available for %s' % video_id)
|
||||||
|
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
||||||
|
title = unescapeHTML(player.find('./data/title').text)
|
||||||
|
description = unescapeHTML(player.find('./data/description').text)
|
||||||
|
|
||||||
|
video = player.find('./data/video')
|
||||||
|
video_id = video.find('./id').text
|
||||||
|
thumbnail = video.find('./splash').text
|
||||||
|
duration = int(video.find('./totaltime').text)
|
||||||
|
view_count = int(video.find('./views').text)
|
||||||
|
puid22 = video.find('./puid22').text
|
||||||
|
|
||||||
|
apps = {
|
||||||
|
'4': 'video1',
|
||||||
|
'7': 'video2',
|
||||||
|
}
|
||||||
|
|
||||||
|
app = apps[puid22] if puid22 in apps else apps['4']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['', 'hi', 'webm']:
|
||||||
|
file = video.find('./%sfile' % format_id)
|
||||||
|
if file is None:
|
||||||
|
continue
|
||||||
|
size = video.find('./%ssize' % format_id)
|
||||||
|
formats.append({
|
||||||
|
'url': 'rtmp://media.ntv.ru/%s' % app,
|
||||||
|
'app': app,
|
||||||
|
'play_path': file.text,
|
||||||
|
'rtmp_conn': 'B:1',
|
||||||
|
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
|
||||||
|
'page_url': 'http://www.ntv.ru',
|
||||||
|
'flash_ver': 'LNX 11,2,202,341',
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
'filesize': int(size.text),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
40
youtube_dl/extractor/oe1.py
Normal file
40
youtube_dl/extractor/oe1.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import calendar
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
# audios on oe1.orf.at are only available for 7 days, so we can't
|
||||||
|
# add tests.
|
||||||
|
|
||||||
|
|
||||||
|
class OE1IE(InfoExtractor):
|
||||||
|
IE_DESC = 'oe1.orf.at'
|
||||||
|
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
show_id = mobj.group('id')
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
||||||
|
show_id
|
||||||
|
)
|
||||||
|
|
||||||
|
timestamp = datetime.datetime.strptime('%s %s' % (
|
||||||
|
data['item']['day_label'],
|
||||||
|
data['item']['time']
|
||||||
|
), '%d.%m.%Y %H:%M')
|
||||||
|
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': show_id,
|
||||||
|
'title': data['item']['title'],
|
||||||
|
'url': data['item']['url_stream'],
|
||||||
|
'ext': 'mp3',
|
||||||
|
'description': data['item'].get('info'),
|
||||||
|
'timestamp': unix_timestamp
|
||||||
|
}
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
||||||
@@ -5,45 +7,50 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class PyvideoIE(InfoExtractor):
|
class PyvideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
_VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
||||||
_TESTS = [{
|
|
||||||
u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
_TESTS = [
|
||||||
u'file': u'24_4WWkSmNo.mp4',
|
{
|
||||||
u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
|
'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||||
u'info_dict': {
|
'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
|
||||||
u"title": u"Become a logging expert in 30 minutes",
|
'info_dict': {
|
||||||
u"description": u"md5:9665350d466c67fb5b1598de379021f7",
|
'id': '24_4WWkSmNo',
|
||||||
u"upload_date": u"20130320",
|
'ext': 'mp4',
|
||||||
u"uploader": u"NextDayVideo",
|
'title': 'Become a logging expert in 30 minutes',
|
||||||
u"uploader_id": u"NextDayVideo",
|
'description': 'md5:9665350d466c67fb5b1598de379021f7',
|
||||||
|
'upload_date': '20130320',
|
||||||
|
'uploader': 'NextDayVideo',
|
||||||
|
'uploader_id': 'NextDayVideo',
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
},
|
},
|
||||||
u'add_ie': ['Youtube'],
|
{
|
||||||
},
|
'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
||||||
{
|
'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
|
||||||
u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
'info_dict': {
|
||||||
u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
|
'id': '2542',
|
||||||
u'info_dict': {
|
'ext': 'm4v',
|
||||||
u'id': u'2542',
|
'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
|
||||||
u'ext': u'm4v',
|
},
|
||||||
u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
|
|
||||||
},
|
},
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
||||||
if m_youtube is not None:
|
if m_youtube is not None:
|
||||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||||
|
|
||||||
title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
|
title = self._html_search_regex(
|
||||||
webpage, u'title', flags=re.DOTALL)
|
r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
|
||||||
video_url = self._search_regex([r'<source src="(.*?)"',
|
video_url = self._search_regex(
|
||||||
r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||||
webpage, u'video url', flags=re.DOTALL)
|
webpage, 'video url', flags=re.DOTALL)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': os.path.splitext(title)[0],
|
'title': os.path.splitext(title)[0],
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate, determine_ext
|
from ..utils import unified_strdate, determine_ext
|
||||||
@@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
_VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html',
|
||||||
u'file': u'passionpittakeawalklive.flv',
|
'info_dict': {
|
||||||
u'md5': u'd9dea8360a1e7d485d2206db7fe13035',
|
'id': 'passionpittakeawalklive',
|
||||||
u'info_dict': {
|
'ext': 'flv',
|
||||||
u'title': u'Take A Walk (live)',
|
'title': 'Take A Walk (live)',
|
||||||
u'uploader': u'Passion Pit',
|
'uploader': 'Passion Pit',
|
||||||
u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
'uploader_id': 'passionpit',
|
||||||
|
'upload_date': '20120928',
|
||||||
|
'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ',
|
||||||
},
|
},
|
||||||
u'skip': u'Requires rtmpdump',
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
filename = mobj.group('filename')
|
filename = mobj.group('filename')
|
||||||
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
info_url = 'http://www.roxwel.com/api/videos/%s' % filename
|
||||||
info_page = self._download_webpage(info_url, filename,
|
info = self._download_json(info_url, filename)
|
||||||
u'Downloading video info')
|
|
||||||
|
|
||||||
self.report_extraction(filename)
|
|
||||||
info = json.loads(info_page)
|
|
||||||
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
|
rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')])
|
||||||
best_rate = rtmp_rates[-1]
|
best_rate = rtmp_rates[-1]
|
||||||
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
|
url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate)
|
||||||
rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url')
|
rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url')
|
||||||
ext = determine_ext(rtmp_url)
|
ext = determine_ext(rtmp_url)
|
||||||
if ext == 'f4v':
|
if ext == 'f4v':
|
||||||
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
|
rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename)
|
||||||
|
|
||||||
return {'id': filename,
|
return {
|
||||||
'title': info['title'],
|
'id': filename,
|
||||||
'url': rtmp_url,
|
'title': info['title'],
|
||||||
'ext': 'flv',
|
'url': rtmp_url,
|
||||||
'description': info['description'],
|
'ext': 'flv',
|
||||||
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
|
'description': info['description'],
|
||||||
'uploader': info['artist'],
|
'thumbnail': info.get('player_image_url') or info.get('image_url_large'),
|
||||||
'uploader_id': info['artistname'],
|
'uploader': info['artist'],
|
||||||
'upload_date': unified_strdate(info['dbdate']),
|
'uploader_id': info['artistname'],
|
||||||
}
|
'upload_date': unified_strdate(info['dbdate']),
|
||||||
|
}
|
||||||
|
@@ -28,6 +28,7 @@ class RTSIE(InfoExtractor):
|
|||||||
'uploader': 'Divers',
|
'uploader': 'Divers',
|
||||||
'upload_date': '19680921',
|
'upload_date': '19680921',
|
||||||
'timestamp': -40280400,
|
'timestamp': -40280400,
|
||||||
|
'thumbnail': 're:^https?://.*\.image'
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,4 +59,5 @@ class RTSIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'uploader': info.get('programName'),
|
'uploader': info.get('programName'),
|
||||||
'timestamp': upload_timestamp,
|
'timestamp': upload_timestamp,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
@@ -2,7 +2,6 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -20,8 +19,9 @@ class RutubeIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Раненный кенгуру забежал в аптеку',
|
'title': 'Раненный кенгуру забежал в аптеку',
|
||||||
'description': 'http://www.ntdtv.ru ',
|
'description': 'http://www.ntdtv.ru ',
|
||||||
'duration': 80,
|
'duration': 80,
|
||||||
@@ -38,15 +38,15 @@ class RutubeIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
|
video = self._download_json(
|
||||||
video_id, 'Downloading video JSON')
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video = json.loads(api_response)
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
trackinfo = self._download_json(
|
||||||
video_id, 'Downloading trackinfo JSON')
|
'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
|
||||||
trackinfo = json.loads(api_response)
|
video_id, 'Downloading trackinfo JSON')
|
||||||
|
|
||||||
# Some videos don't have the author field
|
# Some videos don't have the author field
|
||||||
author = trackinfo.get('author') or {}
|
author = trackinfo.get('author') or {}
|
||||||
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
m3u8_url = trackinfo['video_balancer'].get('m3u8')
|
||||||
@@ -79,10 +79,9 @@ class RutubeChannelIE(InfoExtractor):
|
|||||||
def _extract_videos(self, channel_id, channel_title=None):
|
def _extract_videos(self, channel_id, channel_title=None):
|
||||||
entries = []
|
entries = []
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
api_response = self._download_webpage(
|
page = self._download_json(
|
||||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
||||||
channel_id, 'Downloading page %s' % pagenum)
|
channel_id, 'Downloading page %s' % pagenum)
|
||||||
page = json.loads(api_response)
|
|
||||||
results = page['results']
|
results = page['results']
|
||||||
if not results:
|
if not results:
|
||||||
break
|
break
|
||||||
@@ -108,10 +107,9 @@ class RutubeMovieIE(RutubeChannelIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
movie_id = mobj.group('id')
|
movie_id = mobj.group('id')
|
||||||
api_response = self._download_webpage(
|
movie = self._download_json(
|
||||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||||
'Downloading movie JSON')
|
'Downloading movie JSON')
|
||||||
movie = json.loads(api_response)
|
|
||||||
movie_name = movie['name']
|
movie_name = movie['name']
|
||||||
return self._extract_videos(movie_id, movie_name)
|
return self._extract_videos(movie_id, movie_name)
|
||||||
|
|
||||||
|
@@ -1,24 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class SlashdotIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P<id>.*?)(&|$)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
u'add_ie': ['Ooyala'],
|
|
||||||
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
|
|
||||||
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
|
|
||||||
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
ooyala_url = self._search_regex(r'<script src="(.*?)"', webpage, 'ooyala url')
|
|
||||||
return self.url_result(ooyala_url, 'Ooyala')
|
|
@@ -13,22 +13,24 @@ from ..utils import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SmotriIE(InfoExtractor):
|
class SmotriIE(InfoExtractor):
|
||||||
IE_DESC = 'Smotri.com'
|
IE_DESC = 'Smotri.com'
|
||||||
IE_NAME = 'smotri'
|
IE_NAME = 'smotri'
|
||||||
_VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))'
|
_VALID_URL = r'^https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||||
_NETRC_MACHINE = 'smotri'
|
_NETRC_MACHINE = 'smotri'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# real video id 2610366
|
# real video id 2610366
|
||||||
{
|
{
|
||||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||||
'file': 'v261036632ab.mp4',
|
|
||||||
'md5': '2a7b08249e6f5636557579c368040eb9',
|
'md5': '2a7b08249e6f5636557579c368040eb9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'v261036632ab',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'катастрофа с камер видеонаблюдения',
|
'title': 'катастрофа с камер видеонаблюдения',
|
||||||
'uploader': 'rbc2008',
|
'uploader': 'rbc2008',
|
||||||
'uploader_id': 'rbc08',
|
'uploader_id': 'rbc08',
|
||||||
@@ -40,9 +42,10 @@ class SmotriIE(InfoExtractor):
|
|||||||
# real video id 57591
|
# real video id 57591
|
||||||
{
|
{
|
||||||
'url': 'http://smotri.com/video/view/?id=v57591cb20',
|
'url': 'http://smotri.com/video/view/?id=v57591cb20',
|
||||||
'file': 'v57591cb20.flv',
|
|
||||||
'md5': '830266dfc21f077eac5afd1883091bcd',
|
'md5': '830266dfc21f077eac5afd1883091bcd',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'v57591cb20',
|
||||||
|
'ext': 'flv',
|
||||||
'title': 'test',
|
'title': 'test',
|
||||||
'uploader': 'Support Photofile@photofile',
|
'uploader': 'Support Photofile@photofile',
|
||||||
'uploader_id': 'support-photofile',
|
'uploader_id': 'support-photofile',
|
||||||
@@ -54,9 +57,10 @@ class SmotriIE(InfoExtractor):
|
|||||||
# video-password
|
# video-password
|
||||||
{
|
{
|
||||||
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
|
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
|
||||||
'file': 'v1390466a13c.mp4',
|
|
||||||
'md5': 'f6331cef33cad65a0815ee482a54440b',
|
'md5': 'f6331cef33cad65a0815ee482a54440b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'v1390466a13c',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||||
'uploader': 'timoxa40',
|
'uploader': 'timoxa40',
|
||||||
'uploader_id': 'timoxa40',
|
'uploader_id': 'timoxa40',
|
||||||
@@ -71,9 +75,10 @@ class SmotriIE(InfoExtractor):
|
|||||||
# age limit + video-password
|
# age limit + video-password
|
||||||
{
|
{
|
||||||
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
|
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
|
||||||
'file': 'v15408898bcf.flv',
|
|
||||||
'md5': '91e909c9f0521adf5ee86fbe073aad70',
|
'md5': '91e909c9f0521adf5ee86fbe073aad70',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'v15408898bcf',
|
||||||
|
'ext': 'flv',
|
||||||
'title': 'этот ролик не покажут по ТВ',
|
'title': 'этот ролик не покажут по ТВ',
|
||||||
'uploader': 'zzxxx',
|
'uploader': 'zzxxx',
|
||||||
'uploader_id': 'ueggb',
|
'uploader_id': 'ueggb',
|
||||||
@@ -85,7 +90,22 @@ class SmotriIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'videopassword': '333'
|
'videopassword': '333'
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
# swf player
|
||||||
|
{
|
||||||
|
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
|
||||||
|
'md5': '4d47034979d9390d14acdf59c4935bc2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v9188090500',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Shakira - Don\'t Bother',
|
||||||
|
'uploader': 'HannahL',
|
||||||
|
'uploader_id': 'lisaha95',
|
||||||
|
'upload_date': '20090331',
|
||||||
|
'description': 'Shakira - Don\'t Bother, видео Shakira - Don\'t Bother',
|
||||||
|
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_SUCCESS = 0
|
_SUCCESS = 0
|
||||||
@@ -93,6 +113,21 @@ class SmotriIE(InfoExtractor):
|
|||||||
_PASSWORD_DETECTED = 2
|
_PASSWORD_DETECTED = 2
|
||||||
_VIDEO_NOT_FOUND = 3
|
_VIDEO_NOT_FOUND = 3
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_url(cls, webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
|
||||||
|
<div\s+class="video_image">[^<]+</div>\s*
|
||||||
|
<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
|
||||||
|
|
||||||
def _search_meta(self, name, html, display_name=None):
|
def _search_meta(self, name, html, display_name=None):
|
||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name
|
||||||
@@ -134,7 +169,7 @@ class SmotriIE(InfoExtractor):
|
|||||||
|
|
||||||
# Video JSON does not provide enough meta data
|
# Video JSON does not provide enough meta data
|
||||||
# We will extract some from the video web page instead
|
# We will extract some from the video web page instead
|
||||||
video_page_url = 'http://' + mobj.group('url')
|
video_page_url = 'http://smotri.com/video/view/?id=%s' % video_id
|
||||||
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
|
video_page = self._download_webpage(video_page_url, video_id, 'Downloading video page')
|
||||||
|
|
||||||
# Warning if video is unavailable
|
# Warning if video is unavailable
|
||||||
@@ -222,7 +257,7 @@ class SmotriIE(InfoExtractor):
|
|||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
'duration': video_duration,
|
'duration': video_duration,
|
||||||
'view_count': video_view_count,
|
'view_count': int_or_none(video_view_count),
|
||||||
'age_limit': 18 if adult_content else 0,
|
'age_limit': 18 if adult_content else 0,
|
||||||
'video_page_url': video_page_url
|
'video_page_url': video_page_url
|
||||||
}
|
}
|
||||||
|
@@ -18,12 +18,14 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||||
|
|
|
|
||||||
((?P<type_talk>talks)) # We have a simple talk
|
((?P<type_talk>talks)) # We have a simple talk
|
||||||
|
|
|
||||||
|
(?P<type_watch>watch)/[^/]+/[^/]+
|
||||||
)
|
)
|
||||||
(/lang/(.*?))? # The url may contain the language
|
(/lang/(.*?))? # The url may contain the language
|
||||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
/(?P<name>[\w-]+) # Here goes the name and then ".html"
|
||||||
.*)$
|
.*)$
|
||||||
'''
|
'''
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -36,7 +38,17 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'actively fooling us.'),
|
'actively fooling us.'),
|
||||||
'uploader': 'Dan Dennett',
|
'uploader': 'Dan Dennett',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||||
|
'md5': '226f4fb9c62380d11b7995efa4c87994',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vishal Sikka: The beauty and power of algorithms',
|
||||||
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
|
'description': 'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
_FORMATS_PREFERENCE = {
|
_FORMATS_PREFERENCE = {
|
||||||
'low': 1,
|
'low': 1,
|
||||||
@@ -57,6 +69,8 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
name = m.group('name')
|
name = m.group('name')
|
||||||
if m.group('type_talk'):
|
if m.group('type_talk'):
|
||||||
return self._talk_info(url, name)
|
return self._talk_info(url, name)
|
||||||
|
elif m.group('type_watch'):
|
||||||
|
return self._watch_info(url, name)
|
||||||
else:
|
else:
|
||||||
return self._playlist_videos_info(url, name)
|
return self._playlist_videos_info(url, name)
|
||||||
|
|
||||||
@@ -123,3 +137,26 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
def _watch_info(self, url, name):
|
||||||
|
webpage = self._download_webpage(url, name)
|
||||||
|
|
||||||
|
config_json = self._html_search_regex(
|
||||||
|
r"data-config='([^']+)", webpage, 'config')
|
||||||
|
config = json.loads(config_json)
|
||||||
|
video_url = config['video']['url']
|
||||||
|
thumbnail = config.get('image', {}).get('url')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': name,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
}
|
||||||
|
@@ -1,33 +1,37 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TF1IE(InfoExtractor):
|
class TF1IE(InfoExtractor):
|
||||||
"""TF1 uses the wat.tv player."""
|
"""TF1 uses the wat.tv player."""
|
||||||
_VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html'
|
_VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||||
u'file': u'10635995.mp4',
|
'info_dict': {
|
||||||
u'md5': u'2e378cc28b9957607d5e88f274e637d8',
|
'id': '10635995',
|
||||||
u'info_dict': {
|
'ext': 'mp4',
|
||||||
u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle',
|
||||||
u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Sometimes wat serves the whole file with the --test option
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
id = mobj.group(1)
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"',
|
embed_url = self._html_search_regex(
|
||||||
webpage, 'embed url')
|
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
|
||||||
embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page')
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
|
'Downloading embed player page')
|
||||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||||
wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info')
|
wat_info = self._download_json(
|
||||||
wat_info = json.loads(wat_info)['media']
|
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
|
||||||
wat_url = wat_info['url']
|
return self.url_result(wat_info['media']['url'], 'Wat')
|
||||||
return self.url_result(wat_url, 'Wat')
|
|
||||||
|
61
youtube_dl/extractor/urort.py
Normal file
61
youtube_dl/extractor/urort.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UrortIE(InfoExtractor):
|
||||||
|
IE_DESC = 'NRK P3 Urørt'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?urort\.p3\.no/#!/Band/(?P<id>[^/]+)$'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://urort.p3.no/#!/Band/Gerilja',
|
||||||
|
'md5': '5ed31a924be8a05e47812678a86e127b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '33124-4',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'The Bomb',
|
||||||
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
|
'like_count': int,
|
||||||
|
'uploader': 'Gerilja',
|
||||||
|
'uploader_id': 'Gerilja',
|
||||||
|
'upload_date': '20100323',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'matchtitle': '^The Bomb$', # To test, we want just one video
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
|
fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
|
||||||
|
json_url = 'http://urort.p3.no/breeze/urort/TrackDtos?$filter=' + fstr
|
||||||
|
songs = self._download_json(json_url, playlist_id)
|
||||||
|
print(songs[0])
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'id': '%d-%s' % (s['BandId'], s['$id']),
|
||||||
|
'title': s['Title'],
|
||||||
|
'url': s['TrackUrl'],
|
||||||
|
'ext': 'mp3',
|
||||||
|
'uploader_id': playlist_id,
|
||||||
|
'uploader': s.get('BandName', playlist_id),
|
||||||
|
'like_count': s.get('LikeCount'),
|
||||||
|
'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'],
|
||||||
|
'upload_date': unified_strdate(s.get('Released')),
|
||||||
|
} for s in songs]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'title': playlist_id,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
@@ -1,38 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .ooyala import OoyalaIE
|
|
||||||
from ..utils import ExtractorError
|
|
||||||
|
|
||||||
|
|
||||||
class ViceIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
|
|
||||||
u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
|
||||||
},
|
|
||||||
u'params': {
|
|
||||||
# Requires ffmpeg (m3u8 manifest)
|
|
||||||
u'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
name = mobj.group('name')
|
|
||||||
webpage = self._download_webpage(url, name)
|
|
||||||
try:
|
|
||||||
ooyala_url = self._og_search_video_url(webpage)
|
|
||||||
except ExtractorError:
|
|
||||||
try:
|
|
||||||
embed_code = self._search_regex(
|
|
||||||
r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
|
|
||||||
u'ooyala embed code')
|
|
||||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
|
||||||
except ExtractorError:
|
|
||||||
raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
|
|
||||||
return self.url_result(ooyala_url, ie='Ooyala')
|
|
||||||
|
|
@@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class VKIE(InfoExtractor):
|
class VKIE(InfoExtractor):
|
||||||
IE_NAME = 'vk.com'
|
IE_NAME = 'vk.com'
|
||||||
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
||||||
_NETRC_MACHINE = 'vk'
|
_NETRC_MACHINE = 'vk'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
103
youtube_dl/extractor/washingtonpost.py
Normal file
103
youtube_dl/extractor/washingtonpost.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WashingtonPostIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'c3f4b4922ffa259243f68e928db2db8c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Breaking Points: The Paper Mine',
|
||||||
|
'duration': 1287,
|
||||||
|
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
||||||
|
'uploader': 'The Washington Post',
|
||||||
|
'timestamp': 1395527908,
|
||||||
|
'upload_date': '20140322',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'md5': 'f645a07652c2950cd9134bb852c5f5eb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The town bureaucracy sustains',
|
||||||
|
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
||||||
|
'duration': 2217,
|
||||||
|
'timestamp': 1395528005,
|
||||||
|
'upload_date': '20140322',
|
||||||
|
'uploader': 'The Washington Post',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
page_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
|
||||||
|
entries = []
|
||||||
|
for i, uuid in enumerate(uuids, start=1):
|
||||||
|
vinfo_all = self._download_json(
|
||||||
|
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
|
||||||
|
page_id,
|
||||||
|
transform_source=strip_jsonp,
|
||||||
|
note='Downloading information of video %d/%d' % (i, len(uuids))
|
||||||
|
)
|
||||||
|
vinfo = vinfo_all[0]['contentConfig']
|
||||||
|
uploader = vinfo.get('credits', {}).get('source')
|
||||||
|
timestamp = int_or_none(
|
||||||
|
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': (
|
||||||
|
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
|
||||||
|
if s.get('width')
|
||||||
|
else s.get('type')),
|
||||||
|
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
|
||||||
|
'width': s.get('width'),
|
||||||
|
'height': s.get('height'),
|
||||||
|
'acodec': s.get('audioCodec'),
|
||||||
|
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
|
||||||
|
'filesize': s.get('fileSize'),
|
||||||
|
'url': s.get('url'),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': {
|
||||||
|
'MP4': 'http',
|
||||||
|
'F4F': 'f4m',
|
||||||
|
}.get(s.get('type'))
|
||||||
|
} for s in vinfo.get('streams', [])]
|
||||||
|
source_media_url = vinfo.get('sourceMediaURL')
|
||||||
|
if source_media_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'source_media',
|
||||||
|
'url': source_media_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': uuid,
|
||||||
|
'title': vinfo['title'],
|
||||||
|
'description': vinfo.get('blurb'),
|
||||||
|
'uploader': uploader,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': int_or_none(vinfo.get('videoDuration'), 100),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': page_id,
|
||||||
|
'title': title,
|
||||||
|
}
|
@@ -1,37 +1,37 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class WatIE(InfoExtractor):
|
class WatIE(InfoExtractor):
|
||||||
_VALID_URL=r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
_VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
||||||
IE_NAME = 'wat.tv'
|
IE_NAME = 'wat.tv'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||||
u'file': u'10631273.mp4',
|
'info_dict': {
|
||||||
u'md5': u'd8b2231e1e333acd12aad94b80937e19',
|
'id': '10631273',
|
||||||
u'info_dict': {
|
'ext': 'mp4',
|
||||||
u'title': u'World War Z - Philadelphia VOST',
|
'title': 'World War Z - Philadelphia VOST',
|
||||||
u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# Sometimes wat serves the whole file with the --test option
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
u'skip': u'Sometimes wat serves the whole file with the --test option',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def download_video_info(self, real_id):
|
def download_video_info(self, real_id):
|
||||||
# 'contentv4' is used in the website, but it also returns the related
|
# 'contentv4' is used in the website, but it also returns the related
|
||||||
# videos, we don't need them
|
# videos, we don't need them
|
||||||
info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info')
|
info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
|
||||||
info = json.loads(info)
|
|
||||||
return info['media']
|
return info['media']
|
||||||
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
def real_id_for_chapter(chapter):
|
def real_id_for_chapter(chapter):
|
||||||
return chapter['tc_start'].split('-')[0]
|
return chapter['tc_start'].split('-')[0]
|
||||||
@@ -56,17 +56,17 @@ class WatIE(InfoExtractor):
|
|||||||
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
||||||
return self.playlist_result(entries, real_id, video_info['title'])
|
return self.playlist_result(entries, real_id, video_info['title'])
|
||||||
|
|
||||||
|
upload_date = None
|
||||||
|
if 'date_diffusion' in first_chapter:
|
||||||
|
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
||||||
# Otherwise we can continue and extract just one part, we have to use
|
# Otherwise we can continue and extract just one part, we have to use
|
||||||
# the short id for getting the video url
|
# the short id for getting the video url
|
||||||
info = {'id': real_id,
|
return {
|
||||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
'id': real_id,
|
||||||
'ext': 'mp4',
|
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||||
'title': first_chapter['title'],
|
'title': first_chapter['title'],
|
||||||
'thumbnail': first_chapter['preview'],
|
'thumbnail': first_chapter['preview'],
|
||||||
'description': first_chapter['description'],
|
'description': first_chapter['description'],
|
||||||
'view_count': video_info['views'],
|
'view_count': video_info['views'],
|
||||||
}
|
'upload_date': upload_date,
|
||||||
if 'date_diffusion' in first_chapter:
|
}
|
||||||
info['upload_date'] = unified_strdate(first_chapter['date_diffusion'])
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
@@ -4,9 +4,10 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -111,4 +112,85 @@ class WDRIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WDRMausIE(InfoExtractor):
|
||||||
|
_VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
|
||||||
|
IE_DESC = 'Sendung mit der Maus'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aktuelle-sendung',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^http://.+\.jpg',
|
||||||
|
'upload_date': 're:^[0-9]{8}$',
|
||||||
|
'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
|
||||||
|
'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '40_jahre_maus',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^http://.+\.jpg',
|
||||||
|
'upload_date': '20131007',
|
||||||
|
'title': '12.03.2011 - 40 Jahre Maus',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
param_code = self._html_search_regex(
|
||||||
|
r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters')
|
||||||
|
|
||||||
|
title_date = self._search_regex(
|
||||||
|
r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
|
||||||
|
webpage, 'air date')
|
||||||
|
title_str = self._html_search_regex(
|
||||||
|
r'<h1>(.*?)</h1>', webpage, 'title')
|
||||||
|
title = '%s - %s' % (title_date, title_str)
|
||||||
|
upload_date = unified_strdate(
|
||||||
|
self._html_search_meta('dc.date', webpage))
|
||||||
|
|
||||||
|
fields = compat_parse_qs(param_code)
|
||||||
|
video_url = fields['firstVideo'][0]
|
||||||
|
thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'url': video_url,
|
||||||
|
}]
|
||||||
|
|
||||||
|
jscode = self._download_webpage(
|
||||||
|
'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
|
||||||
|
video_id, fatal=False,
|
||||||
|
note='Downloading URL translation table',
|
||||||
|
errnote='Could not download URL translation table')
|
||||||
|
if jscode:
|
||||||
|
for m in re.finditer(
|
||||||
|
r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
|
||||||
|
jscode):
|
||||||
|
if video_url.startswith(m.group('stream')):
|
||||||
|
http_url = video_url.replace(
|
||||||
|
m.group('stream'), m.group('dl'))
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'http',
|
||||||
|
'url': http_url,
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO test _1
|
@@ -7,13 +7,13 @@ import itertools
|
|||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
import struct
|
import struct
|
||||||
import traceback
|
import traceback
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
from ..jsinterp import JSInterpreter
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
@@ -438,113 +438,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
def _parse_sig_js(self, jscode):
|
def _parse_sig_js(self, jscode):
|
||||||
funcname = self._search_regex(
|
funcname = self._search_regex(
|
||||||
r'signature=([a-zA-Z]+)', jscode,
|
r'signature=([a-zA-Z]+)', jscode,
|
||||||
u'Initial JS player signature function name')
|
u'Initial JS player signature function name')
|
||||||
|
|
||||||
functions = {}
|
jsi = JSInterpreter(jscode)
|
||||||
|
initial_function = jsi.extract_function(funcname)
|
||||||
def argidx(varname):
|
|
||||||
return string.lowercase.index(varname)
|
|
||||||
|
|
||||||
def interpret_statement(stmt, local_vars, allow_recursion=20):
|
|
||||||
if allow_recursion < 0:
|
|
||||||
raise ExtractorError(u'Recursion limit reached')
|
|
||||||
|
|
||||||
if stmt.startswith(u'var '):
|
|
||||||
stmt = stmt[len(u'var '):]
|
|
||||||
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
|
|
||||||
r'=(?P<expr>.*)$', stmt)
|
|
||||||
if ass_m:
|
|
||||||
if ass_m.groupdict().get('index'):
|
|
||||||
def assign(val):
|
|
||||||
lvar = local_vars[ass_m.group('out')]
|
|
||||||
idx = interpret_expression(ass_m.group('index'),
|
|
||||||
local_vars, allow_recursion)
|
|
||||||
assert isinstance(idx, int)
|
|
||||||
lvar[idx] = val
|
|
||||||
return val
|
|
||||||
expr = ass_m.group('expr')
|
|
||||||
else:
|
|
||||||
def assign(val):
|
|
||||||
local_vars[ass_m.group('out')] = val
|
|
||||||
return val
|
|
||||||
expr = ass_m.group('expr')
|
|
||||||
elif stmt.startswith(u'return '):
|
|
||||||
assign = lambda v: v
|
|
||||||
expr = stmt[len(u'return '):]
|
|
||||||
else:
|
|
||||||
raise ExtractorError(
|
|
||||||
u'Cannot determine left side of statement in %r' % stmt)
|
|
||||||
|
|
||||||
v = interpret_expression(expr, local_vars, allow_recursion)
|
|
||||||
return assign(v)
|
|
||||||
|
|
||||||
def interpret_expression(expr, local_vars, allow_recursion):
|
|
||||||
if expr.isdigit():
|
|
||||||
return int(expr)
|
|
||||||
|
|
||||||
if expr.isalpha():
|
|
||||||
return local_vars[expr]
|
|
||||||
|
|
||||||
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
|
|
||||||
if m:
|
|
||||||
member = m.group('member')
|
|
||||||
val = local_vars[m.group('in')]
|
|
||||||
if member == 'split("")':
|
|
||||||
return list(val)
|
|
||||||
if member == 'join("")':
|
|
||||||
return u''.join(val)
|
|
||||||
if member == 'length':
|
|
||||||
return len(val)
|
|
||||||
if member == 'reverse()':
|
|
||||||
return val[::-1]
|
|
||||||
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
|
|
||||||
if slice_m:
|
|
||||||
idx = interpret_expression(
|
|
||||||
slice_m.group('idx'), local_vars, allow_recursion-1)
|
|
||||||
return val[idx:]
|
|
||||||
|
|
||||||
m = re.match(
|
|
||||||
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
|
||||||
if m:
|
|
||||||
val = local_vars[m.group('in')]
|
|
||||||
idx = interpret_expression(m.group('idx'), local_vars,
|
|
||||||
allow_recursion-1)
|
|
||||||
return val[idx]
|
|
||||||
|
|
||||||
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
|
|
||||||
if m:
|
|
||||||
a = interpret_expression(m.group('a'),
|
|
||||||
local_vars, allow_recursion)
|
|
||||||
b = interpret_expression(m.group('b'),
|
|
||||||
local_vars, allow_recursion)
|
|
||||||
return a % b
|
|
||||||
|
|
||||||
m = re.match(
|
|
||||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
|
||||||
if m:
|
|
||||||
fname = m.group('func')
|
|
||||||
if fname not in functions:
|
|
||||||
functions[fname] = extract_function(fname)
|
|
||||||
argvals = [int(v) if v.isdigit() else local_vars[v]
|
|
||||||
for v in m.group('args').split(',')]
|
|
||||||
return functions[fname](argvals)
|
|
||||||
raise ExtractorError(u'Unsupported JS expression %r' % expr)
|
|
||||||
|
|
||||||
def extract_function(funcname):
|
|
||||||
func_m = re.search(
|
|
||||||
r'function ' + re.escape(funcname) +
|
|
||||||
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
|
||||||
jscode)
|
|
||||||
argnames = func_m.group('args').split(',')
|
|
||||||
|
|
||||||
def resf(args):
|
|
||||||
local_vars = dict(zip(argnames, args))
|
|
||||||
for stmt in func_m.group('code').split(';'):
|
|
||||||
res = interpret_statement(stmt, local_vars)
|
|
||||||
return res
|
|
||||||
return resf
|
|
||||||
|
|
||||||
initial_function = extract_function(funcname)
|
|
||||||
return lambda s: initial_function([s])
|
return lambda s: initial_function([s])
|
||||||
|
|
||||||
def _parse_sig_swf(self, file_contents):
|
def _parse_sig_swf(self, file_contents):
|
||||||
@@ -1549,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
break
|
break
|
||||||
|
|
||||||
more = self._download_json(
|
more = self._download_json(
|
||||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
|
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||||
|
'Downloading page #%s' % page_num,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
content_html = more['content_html']
|
content_html = more['content_html']
|
||||||
more_widget_html = more['load_more_widget_html']
|
more_widget_html = more['load_more_widget_html']
|
||||||
|
|
||||||
@@ -1712,7 +1611,7 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com searches'
|
IE_DESC = u'YouTube.com searches'
|
||||||
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
|
_API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
|
||||||
_MAX_RESULTS = 1000
|
_MAX_RESULTS = 1000
|
||||||
IE_NAME = u'youtube:search'
|
IE_NAME = u'youtube:search'
|
||||||
_SEARCH_KEY = 'ytsearch'
|
_SEARCH_KEY = 'ytsearch'
|
||||||
@@ -1723,9 +1622,12 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
|||||||
video_ids = []
|
video_ids = []
|
||||||
pagenum = 0
|
pagenum = 0
|
||||||
limit = n
|
limit = n
|
||||||
|
PAGE_SIZE = 50
|
||||||
|
|
||||||
while (50 * pagenum) < limit:
|
while (PAGE_SIZE * pagenum) < limit:
|
||||||
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
|
result_url = self._API_URL % (
|
||||||
|
compat_urllib_parse.quote_plus(query.encode('utf-8')),
|
||||||
|
(PAGE_SIZE * pagenum) + 1)
|
||||||
data_json = self._download_webpage(
|
data_json = self._download_webpage(
|
||||||
result_url, video_id=u'query "%s"' % query,
|
result_url, video_id=u'query "%s"' % query,
|
||||||
note=u'Downloading page %s' % (pagenum + 1),
|
note=u'Downloading page %s' % (pagenum + 1),
|
||||||
@@ -1836,11 +1738,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
feed_entries = []
|
feed_entries = []
|
||||||
paging = 0
|
paging = 0
|
||||||
for i in itertools.count(1):
|
for i in itertools.count(1):
|
||||||
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
info = self._download_json(self._FEED_TEMPLATE % paging,
|
||||||
u'%s feed' % self._FEED_NAME,
|
u'%s feed' % self._FEED_NAME,
|
||||||
u'Downloading page %s' % i)
|
u'Downloading page %s' % i)
|
||||||
info = json.loads(info)
|
feed_html = info.get('feed_html') or info.get('content_html')
|
||||||
feed_html = info['feed_html']
|
|
||||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||||
ids = orderedSet(m.group(1) for m in m_ids)
|
ids = orderedSet(m.group(1) for m in m_ids)
|
||||||
feed_entries.extend(
|
feed_entries.extend(
|
||||||
@@ -1852,7 +1753,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
|
||||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||||
_FEED_NAME = 'subscriptions'
|
_FEED_NAME = 'subscriptions'
|
||||||
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
||||||
|
116
youtube_dl/jsinterp.py
Normal file
116
youtube_dl/jsinterp.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JSInterpreter(object):
|
||||||
|
def __init__(self, code):
|
||||||
|
self.code = code
|
||||||
|
self._functions = {}
|
||||||
|
|
||||||
|
def interpret_statement(self, stmt, local_vars, allow_recursion=20):
|
||||||
|
if allow_recursion < 0:
|
||||||
|
raise ExtractorError('Recursion limit reached')
|
||||||
|
|
||||||
|
if stmt.startswith('var '):
|
||||||
|
stmt = stmt[len('var '):]
|
||||||
|
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
|
||||||
|
r'=(?P<expr>.*)$', stmt)
|
||||||
|
if ass_m:
|
||||||
|
if ass_m.groupdict().get('index'):
|
||||||
|
def assign(val):
|
||||||
|
lvar = local_vars[ass_m.group('out')]
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
ass_m.group('index'), local_vars, allow_recursion)
|
||||||
|
assert isinstance(idx, int)
|
||||||
|
lvar[idx] = val
|
||||||
|
return val
|
||||||
|
expr = ass_m.group('expr')
|
||||||
|
else:
|
||||||
|
def assign(val):
|
||||||
|
local_vars[ass_m.group('out')] = val
|
||||||
|
return val
|
||||||
|
expr = ass_m.group('expr')
|
||||||
|
elif stmt.startswith('return '):
|
||||||
|
assign = lambda v: v
|
||||||
|
expr = stmt[len('return '):]
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Cannot determine left side of statement in %r' % stmt)
|
||||||
|
|
||||||
|
v = self.interpret_expression(expr, local_vars, allow_recursion)
|
||||||
|
return assign(v)
|
||||||
|
|
||||||
|
def interpret_expression(self, expr, local_vars, allow_recursion):
|
||||||
|
if expr.isdigit():
|
||||||
|
return int(expr)
|
||||||
|
|
||||||
|
if expr.isalpha():
|
||||||
|
return local_vars[expr]
|
||||||
|
|
||||||
|
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
|
||||||
|
if m:
|
||||||
|
member = m.group('member')
|
||||||
|
val = local_vars[m.group('in')]
|
||||||
|
if member == 'split("")':
|
||||||
|
return list(val)
|
||||||
|
if member == 'join("")':
|
||||||
|
return u''.join(val)
|
||||||
|
if member == 'length':
|
||||||
|
return len(val)
|
||||||
|
if member == 'reverse()':
|
||||||
|
return val[::-1]
|
||||||
|
slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
|
||||||
|
if slice_m:
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
slice_m.group('idx'), local_vars, allow_recursion - 1)
|
||||||
|
return val[idx:]
|
||||||
|
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
||||||
|
if m:
|
||||||
|
val = local_vars[m.group('in')]
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
m.group('idx'), local_vars, allow_recursion - 1)
|
||||||
|
return val[idx]
|
||||||
|
|
||||||
|
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
|
||||||
|
if m:
|
||||||
|
a = self.interpret_expression(
|
||||||
|
m.group('a'), local_vars, allow_recursion)
|
||||||
|
b = self.interpret_expression(
|
||||||
|
m.group('b'), local_vars, allow_recursion)
|
||||||
|
return a % b
|
||||||
|
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
||||||
|
if m:
|
||||||
|
fname = m.group('func')
|
||||||
|
if fname not in self._functions:
|
||||||
|
self._functions[fname] = self.extract_function(fname)
|
||||||
|
argvals = [int(v) if v.isdigit() else local_vars[v]
|
||||||
|
for v in m.group('args').split(',')]
|
||||||
|
return self._functions[fname](argvals)
|
||||||
|
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||||
|
|
||||||
|
def extract_function(self, funcname):
|
||||||
|
func_m = re.search(
|
||||||
|
(r'(?:function %s|%s\s*=\s*function)' % (
|
||||||
|
re.escape(funcname), re.escape(funcname))) +
|
||||||
|
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
||||||
|
self.code)
|
||||||
|
if func_m is None:
|
||||||
|
raise ExtractorError('Could not find JS function %r' % funcname)
|
||||||
|
argnames = func_m.group('args').split(',')
|
||||||
|
|
||||||
|
def resf(args):
|
||||||
|
local_vars = dict(zip(argnames, args))
|
||||||
|
for stmt in func_m.group('code').split(';'):
|
||||||
|
res = self.interpret_statement(stmt, local_vars)
|
||||||
|
return res
|
||||||
|
return resf
|
||||||
|
|
@@ -53,8 +53,9 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
|
|
||||||
if self._downloader.params.get('verbose', False):
|
if self._downloader.params.get('verbose', False):
|
||||||
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
bcmd = [self._downloader.encode(c) for c in cmd]
|
||||||
stdout,stderr = p.communicate()
|
p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
stdout, stderr = p.communicate()
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
stderr = stderr.decode('utf-8', 'replace')
|
stderr = stderr.decode('utf-8', 'replace')
|
||||||
msg = stderr.strip().split('\n')[-1]
|
msg = stderr.strip().split('\n')[-1]
|
||||||
|
@@ -539,7 +539,6 @@ def encodeFilename(s, for_subprocess=False):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
return s.encode(encoding, 'ignore')
|
return s.encode(encoding, 'ignore')
|
||||||
|
|
||||||
|
|
||||||
def decodeOption(optval):
|
def decodeOption(optval):
|
||||||
if optval is None:
|
if optval is None:
|
||||||
return optval
|
return optval
|
||||||
@@ -1177,8 +1176,12 @@ class HEADRequest(compat_urllib_request.Request):
|
|||||||
return "HEAD"
|
return "HEAD"
|
||||||
|
|
||||||
|
|
||||||
def int_or_none(v, scale=1):
|
def int_or_none(v, scale=1, default=None):
|
||||||
return v if v is None else (int(v) // scale)
|
return default if v is None else (int(v) // scale)
|
||||||
|
|
||||||
|
|
||||||
|
def float_or_none(v, scale=1, default=None):
|
||||||
|
return default if v is None else (float(v) / scale)
|
||||||
|
|
||||||
|
|
||||||
def parse_duration(s):
|
def parse_duration(s):
|
||||||
@@ -1186,7 +1189,7 @@ def parse_duration(s):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s)
|
r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?(?::[0-9]+)?$', s)
|
||||||
if not m:
|
if not m:
|
||||||
return None
|
return None
|
||||||
res = int(m.group('secs'))
|
res = int(m.group('secs'))
|
||||||
@@ -1261,8 +1264,8 @@ class PagedList(object):
|
|||||||
|
|
||||||
def uppercase_escape(s):
|
def uppercase_escape(s):
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\\U([0-9a-fA-F]{8})',
|
r'\\U[0-9a-fA-F]{8}',
|
||||||
lambda m: compat_chr(int(m.group(1), base=16)), s)
|
lambda m: m.group(0).decode('unicode-escape'), s)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
struct.pack(u'!I', 0)
|
struct.pack(u'!I', 0)
|
||||||
@@ -1328,3 +1331,7 @@ US_RATINGS = {
|
|||||||
'R': 16,
|
'R': 16,
|
||||||
'NC': 18,
|
'NC': 18,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def strip_jsonp(code):
|
||||||
|
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.03.24.3'
|
__version__ = '2014.04.03.3'
|
||||||
|
Reference in New Issue
Block a user