Compare commits
64 Commits
2014.01.30
...
2014.02.04
Author | SHA1 | Date | |
---|---|---|---|
|
eef726c04b | ||
|
acf1555d76 | ||
|
22e7f1a6ec | ||
|
3c49325658 | ||
|
bb1cd2bea1 | ||
|
fdf1f8d4ce | ||
|
117c8c6b97 | ||
|
5cef4ff09b | ||
|
91264ce572 | ||
|
c79ef8e1ae | ||
|
58d915df51 | ||
|
7881a64499 | ||
|
90159f5561 | ||
|
99877772d0 | ||
|
b0268cb6ce | ||
|
4edff4cfa8 | ||
|
1eac553e7e | ||
|
9d3ac7444d | ||
|
588128d054 | ||
|
8e93b9b9aa | ||
|
b4bcffefa3 | ||
|
2b39af9b4f | ||
|
23fe495feb | ||
|
b5dbe89bba | ||
|
dbe80ca7ad | ||
|
009a3408f5 | ||
|
b58e3c8918 | ||
|
56b6faf91e | ||
|
7ac1f877a7 | ||
|
d55433bbfd | ||
|
f0ce2bc1c5 | ||
|
c3bc00b90e | ||
|
ff6b7b049b | ||
|
f46359121f | ||
|
37c1525c17 | ||
|
c85e4cf7b4 | ||
|
c66dcda287 | ||
|
6d845922ab | ||
|
2949cbe036 | ||
|
c3309a7774 | ||
|
7aed837595 | ||
|
0eb799bae9 | ||
|
4baff4a4ae | ||
|
45d7bc2f8b | ||
|
c0c2ddddcd | ||
|
a96ed91610 | ||
|
c1206423c4 | ||
|
659aa21ba1 | ||
|
efd02e858a | ||
|
3bf8bc7f37 | ||
|
8ccda826d5 | ||
|
b9381e43c2 | ||
|
fcdea2666d | ||
|
c4db377cbb | ||
|
90dc5e8693 | ||
|
c81a855b0f | ||
|
c8d8ec8567 | ||
|
4f879a5be0 | ||
|
1a0648b4a9 | ||
|
3c1b4669d0 | ||
|
24b3d5e538 | ||
|
ab083b08ab | ||
|
89acb96927 | ||
|
d1b30713fb |
12
README.md
12
README.md
@@ -53,6 +53,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
from google videos for youtube-dl "large
|
from google videos for youtube-dl "large
|
||||||
apple". By default (with value "auto")
|
apple". By default (with value "auto")
|
||||||
youtube-dl guesses.
|
youtube-dl guesses.
|
||||||
|
--ignore-config Do not read configuration files. When given
|
||||||
|
in the global configuration file /etc
|
||||||
|
/youtube-dl.conf: do not read the user
|
||||||
|
configuration in ~/.config/youtube-dl.conf
|
||||||
|
(%APPDATA%/youtube-dl/config.txt on
|
||||||
|
Windows)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
@@ -325,7 +331,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
|||||||
|
|
||||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||||
|
|
||||||
# BUILD INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
@@ -347,6 +353,10 @@ If you want to create a build of youtube-dl yourself, you'll need
|
|||||||
* zip
|
* zip
|
||||||
* nosetests
|
* nosetests
|
||||||
|
|
||||||
|
### Adding support for a new site
|
||||||
|
|
||||||
|
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||||
|
@@ -34,6 +34,7 @@ from youtube_dl.extractor import (
|
|||||||
KhanAcademyIE,
|
KhanAcademyIE,
|
||||||
EveryonesMixtapeIE,
|
EveryonesMixtapeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
GenericIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
|
|||||||
self.assertEqual(result['id'], '1409')
|
self.assertEqual(result['id'], '1409')
|
||||||
self.assertTrue(len(result['entries']) >= 34)
|
self.assertTrue(len(result['entries']) >= 34)
|
||||||
|
|
||||||
|
def test_multiple_brightcove_videos(self):
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/2283
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = GenericIE(dl)
|
||||||
|
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
|
||||||
|
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
|
||||||
|
self.assertEqual(len(result['entries']), 3)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
75
test/test_signatures.py
Normal file
75
test/test_signatures.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
|
||||||
|
from youtube_dl.extractor import YoutubeIE
|
||||||
|
from youtube_dl.utils import compat_str, compat_urlretrieve
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
(
|
||||||
|
u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||||
|
u'js',
|
||||||
|
86,
|
||||||
|
u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||||
|
u'js',
|
||||||
|
85,
|
||||||
|
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestSignature(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
|
||||||
|
if not os.path.exists(self.TESTDATA_DIR):
|
||||||
|
os.mkdir(self.TESTDATA_DIR)
|
||||||
|
|
||||||
|
|
||||||
|
def make_tfunc(url, stype, sig_length, expected_sig):
|
||||||
|
basename = url.rpartition('/')[2]
|
||||||
|
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
|
||||||
|
assert m, '%r should follow URL format' % basename
|
||||||
|
test_id = m.group(1)
|
||||||
|
|
||||||
|
def test_func(self):
|
||||||
|
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||||
|
|
||||||
|
if not os.path.exists(fn):
|
||||||
|
compat_urlretrieve(url, fn)
|
||||||
|
|
||||||
|
ie = YoutubeIE()
|
||||||
|
if stype == 'js':
|
||||||
|
with io.open(fn, encoding='utf-8') as testf:
|
||||||
|
jscode = testf.read()
|
||||||
|
func = ie._parse_sig_js(jscode)
|
||||||
|
else:
|
||||||
|
assert stype == 'swf'
|
||||||
|
with open(fn, 'rb') as testf:
|
||||||
|
swfcode = testf.read()
|
||||||
|
func = ie._parse_sig_swf(swfcode)
|
||||||
|
src_sig = compat_str(string.printable[:sig_length])
|
||||||
|
got_sig = func(src_sig)
|
||||||
|
self.assertEqual(got_sig, expected_sig)
|
||||||
|
|
||||||
|
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
|
||||||
|
setattr(TestSignature, test_func.__name__, test_func)
|
||||||
|
|
||||||
|
for test_spec in _TESTS:
|
||||||
|
make_tfunc(*test_spec)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@@ -10,9 +10,11 @@ from test.helper import FakeYDL, md5
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
BlipTVIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
TEDIE,
|
TEDIE,
|
||||||
|
VimeoIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -202,5 +204,80 @@ class TestTedSubtitles(BaseTestSubtitles):
|
|||||||
for lang in langs:
|
for lang in langs:
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBlipTVSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://blip.tv/a/a-6603250'
|
||||||
|
IE = BlipTVIE
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
|
||||||
|
|
||||||
|
|
||||||
|
class TestVimeoSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://vimeo.com/76979871'
|
||||||
|
IE = VimeoIE
|
||||||
|
|
||||||
|
def test_no_writesubtitles(self):
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(subtitles, None)
|
||||||
|
|
||||||
|
def test_subtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||||
|
|
||||||
|
def test_subtitles_lang(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['subtitleslangs'] = ['fr']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr']))
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_automatic_captions(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writeautomaticsub'] = True
|
||||||
|
self.DL.params['subtitleslang'] = ['en']
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertTrue(len(subtitles.keys()) == 0)
|
||||||
|
|
||||||
|
def test_nosubtitles(self):
|
||||||
|
self.DL.expect_warning(u'video doesn\'t have subtitles')
|
||||||
|
self.url = 'http://vimeo.com/56015672'
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(len(subtitles), 0)
|
||||||
|
|
||||||
|
def test_multiple_langs(self):
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
langs = ['es', 'fr', 'de']
|
||||||
|
self.DL.params['subtitleslangs'] = langs
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
for lang in langs:
|
||||||
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
24
youtube-dl.plugin.zsh
Normal file
24
youtube-dl.plugin.zsh
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# This allows the youtube-dl command to be installed in ZSH using antigen.
|
||||||
|
# Antigen is a bundle manager. It allows you to enhance the functionality of
|
||||||
|
# your zsh session by installing bundles and themes easily.
|
||||||
|
|
||||||
|
# Antigen documentation:
|
||||||
|
# http://antigen.sharats.me/
|
||||||
|
# https://github.com/zsh-users/antigen
|
||||||
|
|
||||||
|
# Install youtube-dl:
|
||||||
|
# antigen bundle rg3/youtube-dl
|
||||||
|
# Bundles installed by antigen are available for use immediately.
|
||||||
|
|
||||||
|
# Update youtube-dl (and all other antigen bundles):
|
||||||
|
# antigen update
|
||||||
|
|
||||||
|
# The antigen command will download the git repository to a folder and then
|
||||||
|
# execute an enabling script (this file). The complete process for loading the
|
||||||
|
# code is documented here:
|
||||||
|
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
|
||||||
|
|
||||||
|
# This specific script just aliases youtube-dl to the python script that this
|
||||||
|
# library provides. This requires updating the PYTHONPATH to ensure that the
|
||||||
|
# full set of code can be located.
|
||||||
|
alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
|
@@ -100,6 +100,43 @@ def parseOpts(overrideArguments=None):
|
|||||||
optionf.close()
|
optionf.close()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def _readUserConf():
|
||||||
|
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||||
|
if xdg_config_home:
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||||
|
else:
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||||
|
userConf = _readOptions(userConfFile, None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
appdata_dir = os.environ.get('appdata')
|
||||||
|
if appdata_dir:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = []
|
||||||
|
|
||||||
|
return userConf
|
||||||
|
|
||||||
def _format_option_string(option):
|
def _format_option_string(option):
|
||||||
''' ('-o', '--option') -> -o, --format METAVAR'''
|
''' ('-o', '--option') -> -o, --format METAVAR'''
|
||||||
|
|
||||||
@@ -203,6 +240,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('--default-search',
|
general.add_option('--default-search',
|
||||||
dest='default_search', metavar='PREFIX',
|
dest='default_search', metavar='PREFIX',
|
||||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||||
|
general.add_option(
|
||||||
|
'--ignore-config',
|
||||||
|
action='store_true',
|
||||||
|
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--playlist-start',
|
'--playlist-start',
|
||||||
@@ -457,44 +499,18 @@ def parseOpts(overrideArguments=None):
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||||
else:
|
else:
|
||||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
|
||||||
|
|
||||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
|
||||||
if xdg_config_home:
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
|
||||||
else:
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
|
||||||
userConf = _readOptions(userConfFile, None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
appdata_dir = os.environ.get('appdata')
|
|
||||||
if appdata_dir:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = []
|
|
||||||
|
|
||||||
commandLineConf = sys.argv[1:]
|
commandLineConf = sys.argv[1:]
|
||||||
|
if '--ignore-config' in commandLineConf:
|
||||||
|
systemConf = []
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||||
|
if '--ignore-config' in systemConf:
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
userConf = _readUserConf()
|
||||||
argv = systemConf + userConf + commandLineConf
|
argv = systemConf + userConf + commandLineConf
|
||||||
|
|
||||||
opts, args = parser.parse_args(argv)
|
opts, args = parser.parse_args(argv)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||||
|
@@ -96,6 +96,7 @@ from .ina import InaIE
|
|||||||
from .infoq import InfoQIE
|
from .infoq import InfoQIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
from .internetvideoarchive import InternetVideoArchiveIE
|
from .internetvideoarchive import InternetVideoArchiveIE
|
||||||
|
from .iprima import IPrimaIE
|
||||||
from .ivi import (
|
from .ivi import (
|
||||||
IviIE,
|
IviIE,
|
||||||
IviCompilationIE
|
IviCompilationIE
|
||||||
@@ -110,6 +111,7 @@ from .khanacademy import KhanAcademyIE
|
|||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
from .lynda import (
|
from .lynda import (
|
||||||
@@ -125,6 +127,7 @@ from .mit import TechTVMITIE, MITIE
|
|||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mpora import MporaIE
|
from .mpora import MporaIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
|
from .mooshare import MooshareIE
|
||||||
from .mtv import (
|
from .mtv import (
|
||||||
MTVIE,
|
MTVIE,
|
||||||
MTVIggyIE,
|
MTVIggyIE,
|
||||||
@@ -141,6 +144,7 @@ from .newgrounds import NewgroundsIE
|
|||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
from .normalboots import NormalbootsIE
|
||||||
from .novamov import NovamovIE
|
from .novamov import NovamovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
@@ -198,6 +202,7 @@ from .ted import TEDIE
|
|||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
from .tinypic import TinyPicIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
@@ -228,6 +233,7 @@ from .vimeo import (
|
|||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
from .viki import VikiIE
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
|
from .vube import VubeIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
|
@@ -1,128 +1,137 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
|
||||||
ExtractorError,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVIE(InfoExtractor):
|
class BlipTVIE(SubtitlesInfoExtractor):
|
||||||
"""Information extractor for blip.tv"""
|
"""Information extractor for blip.tv"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
|
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||||
'file': '5779306.mov',
|
|
||||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5779306',
|
||||||
|
'ext': 'mov',
|
||||||
'upload_date': '20111205',
|
'upload_date': '20111205',
|
||||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||||
'uploader': 'Comic Book Resources - CBR TV',
|
'uploader': 'Comic Book Resources - CBR TV',
|
||||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/2274
|
||||||
def report_direct_download(self, title):
|
'note': 'Video with subtitles',
|
||||||
"""Report information extraction."""
|
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||||
self.to_screen('%s: Direct download detected' % title)
|
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6586561',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Red vs. Blue',
|
||||||
|
'description': 'One-Zero-One',
|
||||||
|
'upload_date': '20130614',
|
||||||
|
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
# See https://github.com/rg3/youtube-dl/issues/857
|
# See https://github.com/rg3/youtube-dl/issues/857
|
||||||
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||||
if embed_mobj:
|
if embed_mobj:
|
||||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||||
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
|
video_id = self._search_regex(
|
||||||
|
r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||||
|
|
||||||
if '?' in url:
|
cchar = '&' if '?' in url else '?'
|
||||||
cchar = '&'
|
|
||||||
else:
|
|
||||||
cchar = '?'
|
|
||||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||||
request = compat_urllib_request.Request(json_url)
|
request = compat_urllib_request.Request(json_url)
|
||||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||||
self.report_extraction(mobj.group(1))
|
|
||||||
urlh = self._request_webpage(request, None, False,
|
|
||||||
'unable to download video info webpage')
|
|
||||||
|
|
||||||
try:
|
json_data = self._download_json(request, video_id=presumptive_id)
|
||||||
json_code_bytes = urlh.read()
|
|
||||||
json_code = json_code_bytes.decode('utf-8')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
|
|
||||||
|
|
||||||
try:
|
if 'Post' in json_data:
|
||||||
json_data = json.loads(json_code)
|
data = json_data['Post']
|
||||||
if 'Post' in json_data:
|
else:
|
||||||
data = json_data['Post']
|
data = json_data
|
||||||
else:
|
|
||||||
data = json_data
|
|
||||||
|
|
||||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
video_id = compat_str(data['item_id'])
|
||||||
formats = []
|
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||||
if 'additionalMedia' in data:
|
subtitles = {}
|
||||||
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
|
formats = []
|
||||||
if not int(f['media_width']): # filter m3u8
|
if 'additionalMedia' in data:
|
||||||
continue
|
for f in data['additionalMedia']:
|
||||||
formats.append({
|
if f.get('file_type_srt') == 1:
|
||||||
'url': f['url'],
|
LANGS = {
|
||||||
'format_id': f['role'],
|
'english': 'en',
|
||||||
'width': int(f['media_width']),
|
}
|
||||||
'height': int(f['media_height']),
|
lang = f['role'].rpartition('-')[-1].strip().lower()
|
||||||
})
|
langcode = LANGS.get(lang, lang)
|
||||||
else:
|
subtitles[langcode] = f['url']
|
||||||
|
continue
|
||||||
|
if not int(f['media_width']): # filter m3u8
|
||||||
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': data['media']['url'],
|
'url': f['url'],
|
||||||
'width': int(data['media']['width']),
|
'format_id': f['role'],
|
||||||
'height': int(data['media']['height']),
|
'width': int(f['media_width']),
|
||||||
|
'height': int(f['media_height']),
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': data['media']['url'],
|
||||||
|
'width': int(data['media']['width']),
|
||||||
|
'height': int(data['media']['height']),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(data['item_id']),
|
'id': video_id,
|
||||||
'uploader': data['display_name'],
|
'uploader': data['display_name'],
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'thumbnail': data['thumbnailUrl'],
|
'thumbnail': data['thumbnailUrl'],
|
||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
'user_agent': 'iTunes/10.6.1',
|
'user_agent': 'iTunes/10.6.1',
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
'subtitles': video_subtitles,
|
||||||
except (ValueError, KeyError) as err:
|
}
|
||||||
raise ExtractorError('Unable to parse video information: %s' % repr(err))
|
|
||||||
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||||
|
# when we request with a common UA
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Youtubedl-user-agent', 'youtube-dl')
|
||||||
|
return self._download_webpage(req, None, note=False)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
"""Information Extractor for blip.tv users."""
|
|
||||||
|
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||||
_PAGE_SIZE = 12
|
_PAGE_SIZE = 12
|
||||||
IE_NAME = 'blip.tv:user'
|
IE_NAME = 'blip.tv:user'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract username
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
username = mobj.group(1)
|
username = mobj.group(1)
|
||||||
|
|
||||||
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
||||||
@@ -131,7 +140,6 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||||
page_base = page_base % mobj.group(1)
|
page_base = page_base % mobj.group(1)
|
||||||
|
|
||||||
|
|
||||||
# Download video ids using BlipTV Ajax calls. Result size per
|
# Download video ids using BlipTV Ajax calls. Result size per
|
||||||
# query is limited (currently to 12 videos) so we need to query
|
# query is limited (currently to 12 videos) so we need to query
|
||||||
# page by page until there are no video ids - it means we got
|
# page by page until there are no video ids - it means we got
|
||||||
@@ -142,8 +150,8 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
url = page_base + "&page=" + str(pagenum)
|
url = page_base + "&page=" + str(pagenum)
|
||||||
page = self._download_webpage(url, username,
|
page = self._download_webpage(
|
||||||
'Downloading video ids from page %d' % pagenum)
|
url, username, 'Downloading video ids from page %d' % pagenum)
|
||||||
|
|
||||||
# Extract video identifiers
|
# Extract video identifiers
|
||||||
ids_in_page = []
|
ids_in_page = []
|
||||||
@@ -167,4 +175,4 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
return [self.playlist_result(url_entries, playlist_title=username)]
|
||||||
|
@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_brightcove_url(cls, webpage):
|
def _extract_brightcove_url(cls, webpage):
|
||||||
"""Try to extract the brightcove url from the wepbage, returns None
|
"""Try to extract the brightcove url from the webpage, returns None
|
||||||
if it can't be found
|
if it can't be found
|
||||||
"""
|
"""
|
||||||
|
urls = cls._extract_brightcove_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_brightcove_urls(cls, webpage):
|
||||||
|
"""Return a list of all Brightcove URLs from the webpage """
|
||||||
|
|
||||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||||
if url_m:
|
if url_m:
|
||||||
return url_m.group(1)
|
return [url_m.group(1)]
|
||||||
|
|
||||||
m_brightcove = re.search(
|
matches = re.findall(
|
||||||
r'''(?sx)<object
|
r'''(?sx)<object
|
||||||
(?:
|
(?:
|
||||||
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
|
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
||||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||||
).+?</object>''',
|
).+?</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
if m_brightcove is not None:
|
return [cls._build_brighcove_url(m) for m in matches]
|
||||||
return cls._build_brighcove_url(m_brightcove.group())
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
@@ -1,12 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
translation_table = {
|
translation_table = {
|
||||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||||
|
@@ -28,7 +28,25 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
||||||
'age_limit': 10,
|
'age_limit': 10,
|
||||||
},
|
},
|
||||||
}]
|
},
|
||||||
|
# embedded youtube video
|
||||||
|
{
|
||||||
|
'url': 'http://www.collegehumor.com/embed/6950457',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'W5gMp3ZjYg4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||||
|
'uploader': 'Funnyplox TV',
|
||||||
|
'uploader_id': 'funnyploxtv',
|
||||||
|
'description': 'md5:11812366244110c3523968aa74f02521',
|
||||||
|
'upload_date': '20140128',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -38,6 +56,12 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
data = json.loads(self._download_webpage(
|
data = json.loads(self._download_webpage(
|
||||||
jsonUrl, video_id, 'Downloading info JSON'))
|
jsonUrl, video_id, 'Downloading info JSON'))
|
||||||
vdata = data['video']
|
vdata = data['video']
|
||||||
|
if vdata.get('youtubeId') is not None:
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': vdata['youtubeId'],
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
}
|
||||||
|
|
||||||
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
||||||
rating = vdata.get('rating')
|
rating = vdata.get('rating')
|
||||||
@@ -49,7 +73,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
PREFS = {'high_quality': 2, 'low_quality': 0}
|
PREFS = {'high_quality': 2, 'low_quality': 0}
|
||||||
formats = []
|
formats = []
|
||||||
for format_key in ('mp4', 'webm'):
|
for format_key in ('mp4', 'webm'):
|
||||||
for qname, qurl in vdata[format_key].items():
|
for qname, qurl in vdata.get(format_key, {}).items():
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_key + '_' + qname,
|
'format_id': format_key + '_' + qname,
|
||||||
'url': qurl,
|
'url': qurl,
|
||||||
|
@@ -399,7 +399,7 @@ class InfoExtractor(object):
|
|||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
||||||
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
return [
|
return [
|
||||||
|
@@ -1,49 +1,60 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
find_xpath_attr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CSpanIE(InfoExtractor):
|
class CSpanIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
|
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
|
||||||
IE_DESC = 'C-SPAN'
|
IE_DESC = 'C-SPAN'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.c-spanvideo.org/program/HolderonV',
|
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||||
'file': '315139.mp4',
|
|
||||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '315139',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||||
},
|
},
|
||||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
prog_name = mobj.group('name')
|
page_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, prog_name)
|
webpage = self._download_webpage(url, page_id)
|
||||||
video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')
|
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
[
|
||||||
description = self._og_search_description(webpage)
|
# The full description
|
||||||
|
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
|
||||||
|
# If the description is small enough the other div is not
|
||||||
|
# present, otherwise this is a stripped version
|
||||||
|
r'<p class=\'initial\'>(.*?)</p>'
|
||||||
|
],
|
||||||
|
webpage, 'description', flags=re.DOTALL)
|
||||||
|
|
||||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||||
data_json = self._download_webpage(
|
data = self._download_json(info_url, video_id)
|
||||||
info_url, video_id, 'Downloading video info')
|
|
||||||
data = json.loads(data_json)
|
|
||||||
|
|
||||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
||||||
|
|
||||||
|
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
def find_string(s):
|
||||||
|
return find_xpath_attr(doc, './/string', 'name', s).text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': find_string('title'),
|
||||||
'url': url,
|
'url': url,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': find_string('poster'),
|
||||||
}
|
}
|
||||||
|
@@ -1,4 +1,7 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@@ -30,7 +33,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'pluzz.francetv.fr'
|
IE_NAME = 'pluzz.francetv.fr'
|
||||||
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
||||||
|
|
||||||
# Can't use tests, videos expire in 7 days
|
# Can't use tests, videos expire in 7 days
|
||||||
@@ -44,17 +47,17 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'francetvinfo.fr'
|
IE_NAME = 'francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||||
u'file': u'84981923.mp4',
|
'file': '84981923.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Soir 3',
|
'title': 'Soir 3',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -62,13 +65,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
|
video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'francetv'
|
IE_NAME = 'francetv'
|
||||||
IE_DESC = u'France 2, 3, 4, 5 and Ô'
|
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
||||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||||
(?:
|
(?:
|
||||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||||
@@ -78,73 +81,73 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
# france2
|
# france2
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||||
u'file': u'75540104.mp4',
|
'file': '75540104.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'13h15, le samedi...',
|
'title': '13h15, le samedi...',
|
||||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france3
|
# france3
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||||
u'ext': u'flv',
|
'ext': 'flv',
|
||||||
u'title': u'Le scandale du prix des médicaments',
|
'title': 'Le scandale du prix des médicaments',
|
||||||
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france4
|
# france4
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
u'ext': u'flv',
|
'ext': 'flv',
|
||||||
u'title': u'Hero Corp Making of - Extrait 1',
|
'title': 'Hero Corp Making of - Extrait 1',
|
||||||
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
|
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france5
|
# france5
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'92837968',
|
'id': '92837968',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'C à dire ?!',
|
'title': 'C à dire ?!',
|
||||||
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# franceo
|
# franceo
|
||||||
{
|
{
|
||||||
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'92327925',
|
'id': '92327925',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'Infô-Afrique',
|
'title': 'Infô-Afrique',
|
||||||
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
|
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
u'skip': u'The id changes frequently',
|
'skip': 'The id changes frequently',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -160,26 +163,26 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
'\.fr/\?id-video=([^"/&]+)'),
|
'\.fr/\?id-video=([^"/&]+)'),
|
||||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||||
]
|
]
|
||||||
video_id = self._html_search_regex(id_res, webpage, u'video ID')
|
video_id = self._html_search_regex(id_res, webpage, 'video ID')
|
||||||
else:
|
else:
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class GenerationQuoiIE(InfoExtractor):
|
class GenerationQuoiIE(InfoExtractor):
|
||||||
IE_NAME = u'france2.fr:generation-quoi'
|
IE_NAME = 'france2.fr:generation-quoi'
|
||||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
||||||
u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
|
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Génération Quoi - Garde à Vous',
|
'title': 'Génération Quoi - Garde à Vous',
|
||||||
u'uploader': u'Génération Quoi',
|
'uploader': 'Génération Quoi',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# It uses Dailymotion
|
# It uses Dailymotion
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -194,20 +197,20 @@ class GenerationQuoiIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class CultureboxIE(FranceTVBaseInfoExtractor):
|
class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'culturebox.francetvinfo.fr'
|
IE_NAME = 'culturebox.francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'EV_6785',
|
'id': 'EV_6785',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'Einstein on the beach au Théâtre du Châtelet',
|
'title': 'Einstein on the beach au Théâtre du Châtelet',
|
||||||
u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,5 +218,5 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id')
|
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
@@ -38,18 +38,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'R\u00e9gis plante sa Jeep',
|
'title': 'R\u00e9gis plante sa Jeep',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# embedded vimeo video
|
|
||||||
{
|
|
||||||
'add_ie': ['Vimeo'],
|
|
||||||
'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
|
||||||
'file': '22444065.mp4',
|
|
||||||
'md5': '2903896e23df39722c33f015af0666e2',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
|
|
||||||
'uploader_id': 'skillsmatter',
|
|
||||||
'uploader': 'Skills Matter',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# bandcamp page with custom domain
|
# bandcamp page with custom domain
|
||||||
{
|
{
|
||||||
'add_ie': ['Bandcamp'],
|
'add_ie': ['Bandcamp'],
|
||||||
@@ -246,11 +234,21 @@ class GenericIE(InfoExtractor):
|
|||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||||
if bc_url is not None:
|
if bc_urls:
|
||||||
self.to_screen('Brightcove video detected.')
|
self.to_screen('Brightcove video detected.')
|
||||||
surl = smuggle_url(bc_url, {'Referer': url})
|
entries = [{
|
||||||
return self.url_result(surl, 'Brightcove')
|
'_type': 'url',
|
||||||
|
'url': smuggle_url(bc_url, {'Referer': url}),
|
||||||
|
'ie_key': 'Brightcove'
|
||||||
|
} for bc_url in bc_urls]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': video_title,
|
||||||
|
'id': video_id,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
# Look for embedded (iframe) Vimeo player
|
# Look for embedded (iframe) Vimeo player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class InfoQIE(InfoExtractor):
|
class InfoQIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
|
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"name": "InfoQ",
|
"name": "InfoQ",
|
||||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||||
@@ -26,9 +26,9 @@ class InfoQIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id=url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
self.report_extraction(url)
|
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
||||||
@@ -50,6 +50,6 @@ class InfoQIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
}
|
}
|
||||||
|
85
youtube_dl/extractor/iprima.py
Normal file
85
youtube_dl/extractor/iprima.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from random import random
|
||||||
|
from math import floor
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
|
class IPrimaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '39152',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Partička (92)',
|
||||||
|
'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
|
||||||
|
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
|
||||||
|
floor(random()*1073741824),
|
||||||
|
floor(random()*1073741824))
|
||||||
|
|
||||||
|
req = compat_urllib_request.Request(player_url)
|
||||||
|
req.add_header('Referer', url)
|
||||||
|
playerpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
|
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||||
|
|
||||||
|
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||||
|
|
||||||
|
if zoneGEO != '0':
|
||||||
|
base_url = base_url.replace('token', 'token_'+zoneGEO)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id in ['lq', 'hq', 'hd']:
|
||||||
|
filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||||
|
|
||||||
|
if filename == 'null':
|
||||||
|
continue
|
||||||
|
|
||||||
|
real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
|
||||||
|
|
||||||
|
if format_id == 'lq':
|
||||||
|
quality = 0
|
||||||
|
elif format_id == 'hq':
|
||||||
|
quality = 1
|
||||||
|
elif format_id == 'hd':
|
||||||
|
quality = 2
|
||||||
|
filename = 'hq/'+filename
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': base_url,
|
||||||
|
'quality': quality,
|
||||||
|
'play_path': 'mp4:'+filename.replace('"', '')[:-4],
|
||||||
|
'rtmp_live': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': real_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
63
youtube_dl/extractor/lifenews.py
Normal file
63
youtube_dl/extractor/lifenews.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class LifeNewsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'lifenews'
|
||||||
|
IE_DESC = 'LIFE | NEWS'
|
||||||
|
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://lifenews.ru/news/126342',
|
||||||
|
'file': '126342.mp4',
|
||||||
|
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||||
|
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||||
|
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||||
|
'upload_date': '20140130',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||||
|
if title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
|
||||||
|
|
||||||
|
upload_date = self._html_search_regex(
|
||||||
|
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'upload_date': unified_strdate(upload_date),
|
||||||
|
}
|
@@ -4,14 +4,11 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
class LiveLeakIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||||
'file': '757_1364311680.mp4',
|
'file': '757_1364311680.mp4',
|
||||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||||
@@ -20,7 +17,17 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'uploader': 'ljfriel2',
|
'uploader': 'ljfriel2',
|
||||||
'title': 'Most unlucky car accident'
|
'title': 'Most unlucky car accident'
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||||
|
'file': 'f93_1390833151.mp4',
|
||||||
|
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||||
|
'uploader': 'ARD_Stinkt',
|
||||||
|
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -28,7 +35,11 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
sources_raw = self._search_regex(
|
sources_raw = self._search_regex(
|
||||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs')
|
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
||||||
|
if sources_raw is None:
|
||||||
|
sources_raw = '[{ %s}]' % (
|
||||||
|
self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
|
||||||
|
|
||||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
||||||
sources = json.loads(sources_json)
|
sources = json.loads(sources_json)
|
||||||
|
|
||||||
|
@@ -16,7 +16,8 @@ class MalemotionIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "Bien dur",
|
"title": "Bien dur",
|
||||||
"age_limit": 18,
|
"age_limit": 18,
|
||||||
}
|
},
|
||||||
|
'skip': 'This video has been deleted.'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
114
youtube_dl/extractor/mooshare.py
Normal file
114
youtube_dl/extractor/mooshare.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urllib_parse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MooshareIE(InfoExtractor):
|
||||||
|
IE_NAME = 'mooshare'
|
||||||
|
IE_DESC = 'Mooshare.biz'
|
||||||
|
_VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
|
||||||
|
'md5': '4e14f9562928aecd2e42c6f341c8feba',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8dqtk4bjbp8g',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Comedy Football 2011 - (part 1-2)',
|
||||||
|
'duration': 893,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://mooshare.biz/aipjtoc4g95j',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aipjtoc4g95j',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Orange Caramel Dashing Through the Snow',
|
||||||
|
'duration': 212,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||||
|
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||||
|
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||||
|
|
||||||
|
download_form = {
|
||||||
|
'op': 'download1',
|
||||||
|
'id': video_id,
|
||||||
|
'hash': hash_key,
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
|
||||||
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
self.to_screen('%s: Waiting for timeout' % video_id)
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
|
||||||
|
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
|
||||||
|
duration = int(duration_str) if duration_str is not None else None
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# SD video
|
||||||
|
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'format_id': 'sd',
|
||||||
|
'format': 'SD',
|
||||||
|
})
|
||||||
|
|
||||||
|
# HD video
|
||||||
|
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'format_id': 'hd',
|
||||||
|
'format': 'HD',
|
||||||
|
})
|
||||||
|
|
||||||
|
# rtmp video
|
||||||
|
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
|
||||||
|
if mobj is not None:
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('rtmpurl'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
'format': 'HD',
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -119,7 +119,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
if mgid.endswith('.swf'):
|
if mgid.endswith('.swf'):
|
||||||
mgid = mgid[:-4]
|
mgid = mgid[:-4]
|
||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
|
mgid = self._search_regex(
|
||||||
|
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||||
|
webpage, u'mgid')
|
||||||
return self._get_videos_info(mgid)
|
return self._get_videos_info(mgid)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -4,18 +4,18 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsIE(InfoExtractor):
|
class NewgroundsIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||||
'file': '549479.mp3',
|
|
||||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "B7 - BusMode",
|
'id': '549479',
|
||||||
"uploader": "Burn7",
|
'ext': 'mp3',
|
||||||
|
'title': 'B7 - BusMode',
|
||||||
|
'uploader': 'Burn7',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
61
youtube_dl/extractor/normalboots.py
Normal file
61
youtube_dl/extractor/normalboots.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
class NormalbootsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
|
u'file': u'home-alone-games-jontron.mp4',
|
||||||
|
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
||||||
|
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
||||||
|
u'uploader': u'JonTron',
|
||||||
|
u'upload_date': u'20140125',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'uploader': None,
|
||||||
|
'upload_date': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if url[:4] != 'http':
|
||||||
|
url = 'http://' + url
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_title = self._og_search_title(webpage)
|
||||||
|
video_description = self._og_search_description(webpage)
|
||||||
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
|
webpage, 'uploader')
|
||||||
|
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
|
webpage, 'date')
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||||
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
||||||
|
|
||||||
|
info['url'] = video_url
|
||||||
|
info['title'] = video_title
|
||||||
|
info['description'] = video_description
|
||||||
|
info['thumbnail'] = video_thumbnail
|
||||||
|
info['uploader'] = video_uploader
|
||||||
|
info['upload_date'] = video_upload_date
|
||||||
|
|
||||||
|
return info
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@@ -5,30 +7,63 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class PBSIE(InfoExtractor):
|
class PBSIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?:
|
||||||
|
# Direct video URL
|
||||||
|
video\.pbs\.org/video/(?P<id>[0-9]+)/? |
|
||||||
|
# Article with embedded player
|
||||||
|
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||||
|
# Player
|
||||||
|
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://video.pbs.org/video/2365006249/',
|
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||||
u'file': u'2365006249.mp4',
|
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||||
u'md5': 'ce1888486f0908d555a8093cac9a7362',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '2365006249',
|
||||||
u'title': u'A More Perfect Union',
|
'ext': 'mp4',
|
||||||
u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
|
'title': 'A More Perfect Union',
|
||||||
u'duration': 3190,
|
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||||
|
'duration': 3190,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
|
display_id = presumptive_id
|
||||||
|
if presumptive_id:
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
url = self._search_regex(
|
||||||
|
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||||
|
webpage, 'player URL')
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
player_id = mobj.group('player_id')
|
||||||
|
if not display_id:
|
||||||
|
display_id = player_id
|
||||||
|
if player_id:
|
||||||
|
player_page = self._download_webpage(
|
||||||
|
url, display_id, note='Downloading player page',
|
||||||
|
errnote='Could not download player page')
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||||
|
else:
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = video_id
|
||||||
|
|
||||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||||
info_page = self._download_webpage(info_url, video_id)
|
info = self._download_json(info_url, display_id)
|
||||||
info =json.loads(info_page)
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'title': info['title'],
|
'id': video_id,
|
||||||
'url': info['alternate_encoding']['url'],
|
'title': info['title'],
|
||||||
'ext': 'mp4',
|
'url': info['alternate_encoding']['url'],
|
||||||
'description': info['program'].get('description'),
|
'ext': 'mp4',
|
||||||
'thumbnail': info.get('image_url'),
|
'description': info['program'].get('description'),
|
||||||
'duration': info.get('duration'),
|
'thumbnail': info.get('image_url'),
|
||||||
}
|
'duration': info.get('duration'),
|
||||||
|
}
|
||||||
|
@@ -6,8 +6,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,9 +14,10 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||||
'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
|
||||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||||
|
'ext': 'mp3',
|
||||||
"uploader_id": "ford-lopatin",
|
"uploader_id": "ford-lopatin",
|
||||||
"location": "Spain",
|
"location": "Spain",
|
||||||
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||||
@@ -42,7 +41,6 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
raise ExtractorError('Invalid JSON: ' + str(e))
|
||||||
|
|
||||||
video_url = data['akamai_url'] + '&cbr=256'
|
video_url = data['akamai_url'] + '&cbr=256'
|
||||||
url_parts = compat_urllib_parse_urlparse(video_url)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
|
|||||||
IE_NAME = '220.ro'
|
IE_NAME = '220.ro'
|
||||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
"url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||||
u'file': u'LYV6doKo7f.mp4',
|
'file': 'LYV6doKo7f.mp4',
|
||||||
u'md5': u'03af18b73a07b4088753930db7a34add',
|
'md5': '03af18b73a07b4088753930db7a34add',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Luati-le Banii sez 4 ep 1",
|
"title": "Luati-le Banii sez 4 ep 1",
|
||||||
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
flashVars_str = self._search_regex(
|
flashVars_str = self._search_regex(
|
||||||
r'<param name="flashVars" value="([^"]+)"',
|
r'<param name="flashVars" value="([^"]+)"',
|
||||||
webpage, u'flashVars')
|
webpage, 'flashVars')
|
||||||
flashVars = compat_parse_qs(flashVars_str)
|
flashVars = compat_parse_qs(flashVars_str)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
|
|||||||
'description': clean_html(flashVars['desc'][0]),
|
'description': clean_html(flashVars['desc'][0]),
|
||||||
'thumbnail': flashVars['preview'][0],
|
'thumbnail': flashVars['preview'][0],
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -1,34 +1,36 @@
|
|||||||
import re
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = u'southparkstudios.com'
|
IE_NAME = 'southparkstudios.com'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||||
|
|
||||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
|
||||||
u'title': u'Bat Daded',
|
'ext': 'mp4',
|
||||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
'title': 'Bat Daded',
|
||||||
|
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class SouthparkDeIE(SouthParkStudiosIE):
|
class SouthparkDeIE(SouthParkStudiosIE):
|
||||||
IE_NAME = u'southpark.de'
|
IE_NAME = 'southpark.de'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||||
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||||
u'title': u'The Government Won\'t Respect My Privacy',
|
'ext': 'mp4',
|
||||||
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
'title': 'The Government Won\'t Respect My Privacy',
|
||||||
|
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -6,20 +8,20 @@ from .common import InfoExtractor
|
|||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
u'file': u'1259285.mp4',
|
'file': '1259285.mp4',
|
||||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||||
u'file': u'1309159.mp4',
|
'file': '1309159.mp4',
|
||||||
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
|
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
|
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||||
}
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
|
||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
note='Downloading XML', errnote='Failed to download XML')
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': n.tag.rpartition('type')[2],
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||||
'width': int(n.find('./width').text),
|
'width': int(n.find('./width').text),
|
||||||
'height': int(n.find('./height').text),
|
'height': int(n.find('./height').text),
|
||||||
'abr': int(n.find('./audiobitrate').text),
|
'abr': int(n.find('./audiobitrate').text),
|
||||||
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -62,10 +62,13 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
subtitles[sub_lang] = subtitle
|
subtitles[sub_lang] = subtitle
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
return self._download_webpage(url, None, note=False)
|
||||||
|
|
||||||
def _request_subtitle_url(self, sub_lang, url):
|
def _request_subtitle_url(self, sub_lang, url):
|
||||||
""" makes the http request for the subtitle """
|
""" makes the http request for the subtitle """
|
||||||
try:
|
try:
|
||||||
sub = self._download_webpage(url, None, note=False)
|
sub = self._download_subtitle_url(sub_lang, url)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||||
return
|
return
|
||||||
@@ -79,7 +82,11 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
returns {sub_lang: url} or {} if not available
|
returns {sub_lang: url} or {} if not available
|
||||||
Must be redefined by the subclasses
|
Must be redefined by the subclasses
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
# By default, allow implementations to simply pass in the result
|
||||||
|
assert isinstance(webpage, dict), \
|
||||||
|
'_get_available_subtitles not implemented'
|
||||||
|
return webpage
|
||||||
|
|
||||||
def _get_available_automatic_caption(self, video_id, webpage):
|
def _get_available_automatic_caption(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
|
50
youtube_dl/extractor/tinypic.py
Normal file
50
youtube_dl/extractor/tinypic.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from youtube_dl.utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class TinyPicIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tinypic'
|
||||||
|
IE_DESC = 'tinypic.com videos'
|
||||||
|
_VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
|
||||||
|
'md5': '609b74432465364e72727ebc6203f044',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6xw7tc',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'shadow phenomenon weird',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
||||||
|
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
file_id = mobj.group('fileid')
|
||||||
|
server_id = mobj.group('serverid')
|
||||||
|
|
||||||
|
KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
|
||||||
|
keywords = self._html_search_meta('keywords', webpage, 'title')
|
||||||
|
title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
|
||||||
|
|
||||||
|
video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
|
||||||
|
thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title
|
||||||
|
}
|
@@ -1,17 +1,21 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class TrailerAddictIE(InfoExtractor):
|
class TrailerAddictIE(InfoExtractor):
|
||||||
|
_WORKING = False
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||||
u'file': u'76184.mp4',
|
'md5': '41365557f3c8c397d091da510e73ceb4',
|
||||||
u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '76184',
|
||||||
u"title": u"Prince Avalanche Trailer",
|
'ext': 'mp4',
|
||||||
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
|
'title': 'Prince Avalanche Trailer',
|
||||||
|
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,9 +26,15 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._search_regex(r'<title>(.+?)</title>',
|
title = self._search_regex(r'<title>(.+?)</title>',
|
||||||
webpage, 'video title').replace(' - Trailer Addict','')
|
webpage, 'video title').replace(' - Trailer Addict','')
|
||||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
view_count_str = self._search_regex(
|
||||||
webpage, 'Views Count')
|
r'<span class="views_n">([0-9,.]+)</span>',
|
||||||
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
webpage, 'view count', fatal=False)
|
||||||
|
view_count = (
|
||||||
|
None if view_count_str is None
|
||||||
|
else int(view_count_str.replace(',', '')))
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
# Presence of (no)watchplus function indicates HD quality is available
|
# Presence of (no)watchplus function indicates HD quality is available
|
||||||
if re.search(r'function (no)?watchplus()', webpage):
|
if re.search(r'function (no)?watchplus()', webpage):
|
||||||
@@ -39,14 +49,16 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
info_webpage, 'Download url').replace('%3F','?')
|
info_webpage, 'Download url').replace('%3F','?')
|
||||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||||
info_webpage, 'thumbnail url')
|
info_webpage, 'thumbnail url')
|
||||||
ext = final_url.split('.')[-1].split('?')[0]
|
|
||||||
|
|
||||||
return [{
|
description = self._html_search_regex(
|
||||||
'id' : video_id,
|
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||||
'url' : final_url,
|
webpage, 'description', fatal=False)
|
||||||
'ext' : ext,
|
|
||||||
'title' : title,
|
return {
|
||||||
'thumbnail' : thumbnail_url,
|
'id': video_id,
|
||||||
'description' : self._og_search_description(webpage),
|
'url': final_url,
|
||||||
'view_count' : view_count,
|
'title': title,
|
||||||
}]
|
'thumbnail': thumbnail_url,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -10,48 +12,48 @@ from ..utils import (
|
|||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
||||||
IE_NAME = u'ustream'
|
IE_NAME = 'ustream'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ustream.tv/recorded/20274954',
|
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||||
u'file': u'20274954.flv',
|
'file': '20274954.flv',
|
||||||
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
|
'md5': '088f151799e8f572f84eb62f17d73e5c',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"uploader": u"Young Americans for Liberty",
|
"uploader": "Young Americans for Liberty",
|
||||||
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
|
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('videoID')
|
video_id = m.group('videoID')
|
||||||
|
|
||||||
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
|
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||||
webpage, u'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': video_title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': video_title,
|
|
||||||
'uploader': uploader,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
return info
|
|
||||||
|
|
||||||
class UstreamChannelIE(InfoExtractor):
|
class UstreamChannelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
||||||
IE_NAME = u'ustream:channel'
|
IE_NAME = 'ustream:channel'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
@@ -22,16 +23,16 @@ class VevoIE(InfoExtractor):
|
|||||||
vevo:)
|
vevo:)
|
||||||
(?P<id>[^&?#]+)'''
|
(?P<id>[^&?#]+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
u'file': u'GB1101300280.mp4',
|
'file': 'GB1101300280.mp4',
|
||||||
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
|
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"upload_date": u"20130624",
|
"upload_date": "20130624",
|
||||||
u"uploader": u"Hurts",
|
"uploader": "Hurts",
|
||||||
u"title": u"Somebody to Die For",
|
"title": "Somebody to Die For",
|
||||||
u"duration": 230.12,
|
"duration": 230.12,
|
||||||
u"width": 1920,
|
"width": 1920,
|
||||||
u"height": 1080,
|
"height": 1080,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||||
@@ -44,7 +45,7 @@ class VevoIE(InfoExtractor):
|
|||||||
if version['version'] > last_version['version']:
|
if version['version'] > last_version['version']:
|
||||||
last_version = version
|
last_version = version
|
||||||
if last_version['version'] == -1:
|
if last_version['version'] == -1:
|
||||||
raise ExtractorError(u'Unable to extract last version of the video')
|
raise ExtractorError('Unable to extract last version of the video')
|
||||||
|
|
||||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||||
formats = []
|
formats = []
|
||||||
@@ -85,7 +86,7 @@ class VevoIE(InfoExtractor):
|
|||||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': u'SMIL_' + m.group('cbr'),
|
'format_id': 'SMIL_' + m.group('cbr'),
|
||||||
'vcodec': m.group('vcodec'),
|
'vcodec': m.group('vcodec'),
|
||||||
'acodec': m.group('acodec'),
|
'acodec': m.group('acodec'),
|
||||||
'vbr': int(m.group('vbr')),
|
'vbr': int(m.group('vbr')),
|
||||||
@@ -101,26 +102,25 @@ class VevoIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||||
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
video_info = self._download_json(json_url, video_id)['video']
|
||||||
video_info = json.loads(info_json)['video']
|
|
||||||
|
|
||||||
formats = self._formats_from_json(video_info)
|
formats = self._formats_from_json(video_info)
|
||||||
try:
|
try:
|
||||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
smil_xml = self._download_webpage(smil_url, video_id,
|
smil_xml = self._download_webpage(smil_url, video_id,
|
||||||
u'Downloading SMIL info')
|
'Downloading SMIL info')
|
||||||
formats.extend(self._formats_from_smil(smil_xml))
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if not isinstance(ee.cause, compat_HTTPError):
|
if not isinstance(ee.cause, compat_HTTPError):
|
||||||
raise
|
raise
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
u'Cannot download SMIL information, falling back to JSON ..')
|
'Cannot download SMIL information, falling back to JSON ..')
|
||||||
|
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int(self._search_regex(
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
|
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info['title'],
|
'title': video_info['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -129,5 +129,3 @@ class VevoIE(InfoExtractor):
|
|||||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||||
'duration': video_info['duration'],
|
'duration': video_info['duration'],
|
||||||
}
|
}
|
||||||
|
|
||||||
return info
|
|
||||||
|
@@ -6,10 +6,10 @@ import re
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@@ -19,7 +19,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VimeoIE(InfoExtractor):
|
class VimeoIE(SubtitlesInfoExtractor):
|
||||||
"""Information extractor for vimeo.com."""
|
"""Information extractor for vimeo.com."""
|
||||||
|
|
||||||
# _VALID_URL matches Vimeo URLs
|
# _VALID_URL matches Vimeo URLs
|
||||||
@@ -84,6 +84,20 @@ class VimeoIE(InfoExtractor):
|
|||||||
'videopassword': 'youtube-dl',
|
'videopassword': 'youtube-dl',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://vimeo.com/76979871',
|
||||||
|
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
||||||
|
'note': 'Video with subtitles',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '76979871',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||||
|
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||||
|
'upload_date': '20131015',
|
||||||
|
'uploader_id': 'staff',
|
||||||
|
'uploader': 'Vimeo Staff',
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@@ -273,19 +287,31 @@ class VimeoIE(InfoExtractor):
|
|||||||
if len(formats) == 0:
|
if len(formats) == 0:
|
||||||
raise ExtractorError('No known codec found')
|
raise ExtractorError('No known codec found')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
text_tracks = config['request'].get('text_tracks')
|
||||||
|
if text_tracks:
|
||||||
|
for tt in text_tracks:
|
||||||
|
subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
|
||||||
|
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'webpage_url': url,
|
'webpage_url': url,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
|
'subtitles': video_subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
80
youtube_dl/extractor/vube.py
Normal file
80
youtube_dl/extractor/vube.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class VubeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vube'
|
||||||
|
IE_DESC = 'Vube.com'
|
||||||
|
_VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||||
|
'md5': 'f81dcf6d0448e3291f54380181695821',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'YL2qNPkqon',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||||
|
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||||
|
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||||
|
'uploader': 'Chiara.Grispo',
|
||||||
|
'uploader_id': '1u3hX0znhP',
|
||||||
|
'upload_date': '20140103',
|
||||||
|
'duration': 170.56
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video = self._download_json('http://vube.com/api/v2/video/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
public_id = video['public_id']
|
||||||
|
|
||||||
|
formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||||
|
'height': int(fmt['height']),
|
||||||
|
'abr': int(fmt['audio_bitrate']),
|
||||||
|
'vbr': int(fmt['video_bitrate']),
|
||||||
|
'format_id': fmt['media_resolution_id']
|
||||||
|
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed']
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
description = video.get('description')
|
||||||
|
thumbnail = video['thumbnail_src']
|
||||||
|
if thumbnail.startswith('//'):
|
||||||
|
thumbnail = 'http:' + thumbnail
|
||||||
|
uploader = video['user_alias']
|
||||||
|
uploader_id = video['user_url_id']
|
||||||
|
upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d')
|
||||||
|
duration = video['duration']
|
||||||
|
view_count = video['raw_view_count']
|
||||||
|
like_count = video['total_likes']
|
||||||
|
dislike_count= video['total_hates']
|
||||||
|
|
||||||
|
comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id,
|
||||||
|
video_id, 'Downloading video comment JSON')
|
||||||
|
|
||||||
|
comment_count = comment['total']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@@ -5,7 +5,6 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.01.30.1'
|
__version__ = '2014.02.04'
|
||||||
|
Reference in New Issue
Block a user