Compare commits
35 Commits
2014.01.30
...
2014.02.03
Author | SHA1 | Date | |
---|---|---|---|
|
9d3ac7444d | ||
|
588128d054 | ||
|
8e93b9b9aa | ||
|
b4bcffefa3 | ||
|
2b39af9b4f | ||
|
23fe495feb | ||
|
b5dbe89bba | ||
|
dbe80ca7ad | ||
|
009a3408f5 | ||
|
b58e3c8918 | ||
|
56b6faf91e | ||
|
7ac1f877a7 | ||
|
d55433bbfd | ||
|
f0ce2bc1c5 | ||
|
c3bc00b90e | ||
|
ff6b7b049b | ||
|
f46359121f | ||
|
37c1525c17 | ||
|
c85e4cf7b4 | ||
|
c66dcda287 | ||
|
6d845922ab | ||
|
2949cbe036 | ||
|
c3309a7774 | ||
|
7aed837595 | ||
|
0eb799bae9 | ||
|
4baff4a4ae | ||
|
45d7bc2f8b | ||
|
c0c2ddddcd | ||
|
a96ed91610 | ||
|
c1206423c4 | ||
|
659aa21ba1 | ||
|
efd02e858a | ||
|
3bf8bc7f37 | ||
|
c81a855b0f | ||
|
4f879a5be0 |
12
README.md
12
README.md
@@ -53,6 +53,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
from google videos for youtube-dl "large
|
from google videos for youtube-dl "large
|
||||||
apple". By default (with value "auto")
|
apple". By default (with value "auto")
|
||||||
youtube-dl guesses.
|
youtube-dl guesses.
|
||||||
|
--ignore-config Do not read configuration files. When given
|
||||||
|
in the global configuration file /etc
|
||||||
|
/youtube-dl.conf: do not read the user
|
||||||
|
configuration in ~/.config/youtube-dl.conf
|
||||||
|
(%APPDATA%/youtube-dl/config.txt on
|
||||||
|
Windows)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
@@ -325,7 +331,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
|||||||
|
|
||||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||||
|
|
||||||
# BUILD INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
@@ -347,6 +353,10 @@ If you want to create a build of youtube-dl yourself, you'll need
|
|||||||
* zip
|
* zip
|
||||||
* nosetests
|
* nosetests
|
||||||
|
|
||||||
|
### Adding support for a new site
|
||||||
|
|
||||||
|
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||||
|
@@ -10,6 +10,7 @@ from test.helper import FakeYDL, md5
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
BlipTVIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
TEDIE,
|
TEDIE,
|
||||||
@@ -202,5 +203,25 @@ class TestTedSubtitles(BaseTestSubtitles):
|
|||||||
for lang in langs:
|
for lang in langs:
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBlipTVSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://blip.tv/a/a-6603250'
|
||||||
|
IE = BlipTVIE
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -18,6 +18,7 @@
|
|||||||
# code is documented here:
|
# code is documented here:
|
||||||
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
|
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
|
||||||
|
|
||||||
# This specific script just adds the downloaded folder to the end of the $PATH,
|
# This specific script just aliases youtube-dl to the python script that this
|
||||||
# which allows the contained youtube-dl executable to be found.
|
# library provides. This requires updating the PYTHONPATH to ensure that the
|
||||||
export PATH=${PATH}:$(dirname $0)
|
# full set of code can be located.
|
||||||
|
alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
|
||||||
|
@@ -100,6 +100,43 @@ def parseOpts(overrideArguments=None):
|
|||||||
optionf.close()
|
optionf.close()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def _readUserConf():
|
||||||
|
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||||
|
if xdg_config_home:
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||||
|
else:
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||||
|
userConf = _readOptions(userConfFile, None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
appdata_dir = os.environ.get('appdata')
|
||||||
|
if appdata_dir:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = []
|
||||||
|
|
||||||
|
return userConf
|
||||||
|
|
||||||
def _format_option_string(option):
|
def _format_option_string(option):
|
||||||
''' ('-o', '--option') -> -o, --format METAVAR'''
|
''' ('-o', '--option') -> -o, --format METAVAR'''
|
||||||
|
|
||||||
@@ -203,6 +240,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('--default-search',
|
general.add_option('--default-search',
|
||||||
dest='default_search', metavar='PREFIX',
|
dest='default_search', metavar='PREFIX',
|
||||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||||
|
general.add_option(
|
||||||
|
'--ignore-config',
|
||||||
|
action='store_true',
|
||||||
|
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--playlist-start',
|
'--playlist-start',
|
||||||
@@ -457,44 +499,18 @@ def parseOpts(overrideArguments=None):
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||||
else:
|
else:
|
||||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
|
||||||
|
|
||||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
|
||||||
if xdg_config_home:
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
|
||||||
else:
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
|
||||||
userConf = _readOptions(userConfFile, None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
appdata_dir = os.environ.get('appdata')
|
|
||||||
if appdata_dir:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = []
|
|
||||||
|
|
||||||
commandLineConf = sys.argv[1:]
|
commandLineConf = sys.argv[1:]
|
||||||
|
if '--ignore-config' in commandLineConf:
|
||||||
|
systemConf = []
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||||
|
if '--ignore-config' in systemConf:
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
userConf = _readUserConf()
|
||||||
argv = systemConf + userConf + commandLineConf
|
argv = systemConf + userConf + commandLineConf
|
||||||
|
|
||||||
opts, args = parser.parse_args(argv)
|
opts, args = parser.parse_args(argv)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||||
|
@@ -110,6 +110,7 @@ from .khanacademy import KhanAcademyIE
|
|||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
from .lynda import (
|
from .lynda import (
|
||||||
@@ -141,6 +142,7 @@ from .newgrounds import NewgroundsIE
|
|||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
from .normalboots import NormalbootsIE
|
||||||
from .novamov import NovamovIE
|
from .novamov import NovamovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
@@ -198,6 +200,7 @@ from .ted import TEDIE
|
|||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
from .tinypic import TinyPicIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
@@ -228,6 +231,7 @@ from .vimeo import (
|
|||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
from .viki import VikiIE
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
|
from .vube import VubeIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
|
@@ -6,6 +6,7 @@ import re
|
|||||||
import socket
|
import socket
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -17,68 +18,76 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVIE(InfoExtractor):
|
class BlipTVIE(SubtitlesInfoExtractor):
|
||||||
"""Information extractor for blip.tv"""
|
"""Information extractor for blip.tv"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
|
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||||
'file': '5779306.mov',
|
|
||||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5779306',
|
||||||
|
'ext': 'mov',
|
||||||
'upload_date': '20111205',
|
'upload_date': '20111205',
|
||||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||||
'uploader': 'Comic Book Resources - CBR TV',
|
'uploader': 'Comic Book Resources - CBR TV',
|
||||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/2274
|
||||||
|
'note': 'Video with subtitles',
|
||||||
|
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||||
|
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6586561',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Red vs. Blue',
|
||||||
|
'description': 'One-Zero-One',
|
||||||
|
'upload_date': '20130614',
|
||||||
|
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||||
}
|
}
|
||||||
|
}]
|
||||||
def report_direct_download(self, title):
|
|
||||||
"""Report information extraction."""
|
|
||||||
self.to_screen('%s: Direct download detected' % title)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
# See https://github.com/rg3/youtube-dl/issues/857
|
# See https://github.com/rg3/youtube-dl/issues/857
|
||||||
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||||
if embed_mobj:
|
if embed_mobj:
|
||||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||||
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
|
video_id = self._search_regex(
|
||||||
|
r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||||
|
|
||||||
if '?' in url:
|
cchar = '&' if '?' in url else '?'
|
||||||
cchar = '&'
|
|
||||||
else:
|
|
||||||
cchar = '?'
|
|
||||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||||
request = compat_urllib_request.Request(json_url)
|
request = compat_urllib_request.Request(json_url)
|
||||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||||
self.report_extraction(mobj.group(1))
|
|
||||||
urlh = self._request_webpage(request, None, False,
|
|
||||||
'unable to download video info webpage')
|
|
||||||
|
|
||||||
try:
|
json_data = self._download_json(request, video_id=presumptive_id)
|
||||||
json_code_bytes = urlh.read()
|
|
||||||
json_code = json_code_bytes.decode('utf-8')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
|
|
||||||
|
|
||||||
try:
|
|
||||||
json_data = json.loads(json_code)
|
|
||||||
if 'Post' in json_data:
|
if 'Post' in json_data:
|
||||||
data = json_data['Post']
|
data = json_data['Post']
|
||||||
else:
|
else:
|
||||||
data = json_data
|
data = json_data
|
||||||
|
|
||||||
|
video_id = compat_str(data['item_id'])
|
||||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||||
|
subtitles = {}
|
||||||
formats = []
|
formats = []
|
||||||
if 'additionalMedia' in data:
|
if 'additionalMedia' in data:
|
||||||
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
|
for f in data['additionalMedia']:
|
||||||
|
if f.get('file_type_srt') == 1:
|
||||||
|
LANGS = {
|
||||||
|
'english': 'en',
|
||||||
|
}
|
||||||
|
lang = f['role'].rpartition('-')[-1].strip().lower()
|
||||||
|
langcode = LANGS.get(lang, lang)
|
||||||
|
subtitles[langcode] = f['url']
|
||||||
|
continue
|
||||||
if not int(f['media_width']): # filter m3u8
|
if not int(f['media_width']): # filter m3u8
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
@@ -93,11 +102,16 @@ class BlipTVIE(InfoExtractor):
|
|||||||
'width': int(data['media']['width']),
|
'width': int(data['media']['width']),
|
||||||
'height': int(data['media']['height']),
|
'height': int(data['media']['height']),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(data['item_id']),
|
'id': video_id,
|
||||||
'uploader': data['display_name'],
|
'uploader': data['display_name'],
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
@@ -105,24 +119,24 @@ class BlipTVIE(InfoExtractor):
|
|||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
'user_agent': 'iTunes/10.6.1',
|
'user_agent': 'iTunes/10.6.1',
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': video_subtitles,
|
||||||
}
|
}
|
||||||
except (ValueError, KeyError) as err:
|
|
||||||
raise ExtractorError('Unable to parse video information: %s' % repr(err))
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||||
|
# when we request with a common UA
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Youtubedl-user-agent', 'youtube-dl')
|
||||||
|
return self._download_webpage(req, None, note=False)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
"""Information Extractor for blip.tv users."""
|
|
||||||
|
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||||
_PAGE_SIZE = 12
|
_PAGE_SIZE = 12
|
||||||
IE_NAME = 'blip.tv:user'
|
IE_NAME = 'blip.tv:user'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract username
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
username = mobj.group(1)
|
username = mobj.group(1)
|
||||||
|
|
||||||
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
||||||
@@ -131,7 +145,6 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||||
page_base = page_base % mobj.group(1)
|
page_base = page_base % mobj.group(1)
|
||||||
|
|
||||||
|
|
||||||
# Download video ids using BlipTV Ajax calls. Result size per
|
# Download video ids using BlipTV Ajax calls. Result size per
|
||||||
# query is limited (currently to 12 videos) so we need to query
|
# query is limited (currently to 12 videos) so we need to query
|
||||||
# page by page until there are no video ids - it means we got
|
# page by page until there are no video ids - it means we got
|
||||||
@@ -142,8 +155,8 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
url = page_base + "&page=" + str(pagenum)
|
url = page_base + "&page=" + str(pagenum)
|
||||||
page = self._download_webpage(url, username,
|
page = self._download_webpage(
|
||||||
'Downloading video ids from page %d' % pagenum)
|
url, username, 'Downloading video ids from page %d' % pagenum)
|
||||||
|
|
||||||
# Extract video identifiers
|
# Extract video identifiers
|
||||||
ids_in_page = []
|
ids_in_page = []
|
||||||
@@ -167,4 +180,4 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
return [self.playlist_result(url_entries, playlist_title=username)]
|
||||||
|
@@ -1,12 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
translation_table = {
|
translation_table = {
|
||||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||||
|
@@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||||
'uploader': 'Funnyplox TV',
|
'uploader': 'Funnyplox TV',
|
||||||
'uploader_id': 'funnyploxtv',
|
'uploader_id': 'funnyploxtv',
|
||||||
'description': 'md5:b20fc87608e2837596bbc8df85a3c34d',
|
'description': 'md5:506f69f7a297ed698ced3375f2363b0e',
|
||||||
'upload_date': '20140128',
|
'upload_date': '20140128',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@@ -399,7 +399,7 @@ class InfoExtractor(object):
|
|||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
||||||
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
return [
|
return [
|
||||||
|
@@ -1,49 +1,60 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
find_xpath_attr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CSpanIE(InfoExtractor):
|
class CSpanIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
|
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
|
||||||
IE_DESC = 'C-SPAN'
|
IE_DESC = 'C-SPAN'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.c-spanvideo.org/program/HolderonV',
|
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||||
'file': '315139.mp4',
|
|
||||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '315139',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||||
},
|
},
|
||||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
prog_name = mobj.group('name')
|
page_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, prog_name)
|
webpage = self._download_webpage(url, page_id)
|
||||||
video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')
|
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
[
|
||||||
description = self._og_search_description(webpage)
|
# The full description
|
||||||
|
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
|
||||||
|
# If the description is small enough the other div is not
|
||||||
|
# present, otherwise this is a stripped version
|
||||||
|
r'<p class=\'initial\'>(.*?)</p>'
|
||||||
|
],
|
||||||
|
webpage, 'description', flags=re.DOTALL)
|
||||||
|
|
||||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||||
data_json = self._download_webpage(
|
data = self._download_json(info_url, video_id)
|
||||||
info_url, video_id, 'Downloading video info')
|
|
||||||
data = json.loads(data_json)
|
|
||||||
|
|
||||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
||||||
|
|
||||||
|
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
def find_string(s):
|
||||||
|
return find_xpath_attr(doc, './/string', 'name', s).text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': find_string('title'),
|
||||||
'url': url,
|
'url': url,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': find_string('poster'),
|
||||||
}
|
}
|
||||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class InfoQIE(InfoExtractor):
|
class InfoQIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
|
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"name": "InfoQ",
|
"name": "InfoQ",
|
||||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||||
@@ -26,9 +26,9 @@ class InfoQIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id=url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
self.report_extraction(url)
|
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
||||||
|
63
youtube_dl/extractor/lifenews.py
Normal file
63
youtube_dl/extractor/lifenews.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class LifeNewsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'lifenews'
|
||||||
|
IE_DESC = 'LIFE | NEWS'
|
||||||
|
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://lifenews.ru/news/126342',
|
||||||
|
'file': '126342.mp4',
|
||||||
|
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||||
|
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||||
|
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||||
|
'upload_date': '20140130',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||||
|
if title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
|
||||||
|
|
||||||
|
upload_date = self._html_search_regex(
|
||||||
|
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'upload_date': unified_strdate(upload_date),
|
||||||
|
}
|
@@ -4,9 +4,6 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
class LiveLeakIE(InfoExtractor):
|
||||||
|
@@ -4,18 +4,18 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsIE(InfoExtractor):
|
class NewgroundsIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||||
'file': '549479.mp3',
|
|
||||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "B7 - BusMode",
|
'id': '549479',
|
||||||
"uploader": "Burn7",
|
'ext': 'mp3',
|
||||||
|
'title': 'B7 - BusMode',
|
||||||
|
'uploader': 'Burn7',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
61
youtube_dl/extractor/normalboots.py
Normal file
61
youtube_dl/extractor/normalboots.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
class NormalbootsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
|
u'file': u'home-alone-games-jontron.mp4',
|
||||||
|
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
||||||
|
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
||||||
|
u'uploader': u'JonTron',
|
||||||
|
u'upload_date': u'20140125',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'uploader': None,
|
||||||
|
'upload_date': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if url[:4] != 'http':
|
||||||
|
url = 'http://' + url
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_title = self._og_search_title(webpage)
|
||||||
|
video_description = self._og_search_description(webpage)
|
||||||
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
|
webpage, 'uploader')
|
||||||
|
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
|
webpage, 'date')
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||||
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
||||||
|
|
||||||
|
info['url'] = video_url
|
||||||
|
info['title'] = video_title
|
||||||
|
info['description'] = video_description
|
||||||
|
info['thumbnail'] = video_thumbnail
|
||||||
|
info['uploader'] = video_uploader
|
||||||
|
info['upload_date'] = video_upload_date
|
||||||
|
|
||||||
|
return info
|
@@ -6,8 +6,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,9 +14,10 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||||
'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
|
||||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||||
|
'ext': 'mp3',
|
||||||
"uploader_id": "ford-lopatin",
|
"uploader_id": "ford-lopatin",
|
||||||
"location": "Spain",
|
"location": "Spain",
|
||||||
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||||
@@ -42,7 +41,6 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
raise ExtractorError('Invalid JSON: ' + str(e))
|
||||||
|
|
||||||
video_url = data['akamai_url'] + '&cbr=256'
|
video_url = data['akamai_url'] + '&cbr=256'
|
||||||
url_parts = compat_urllib_parse_urlparse(video_url)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
|
|||||||
IE_NAME = '220.ro'
|
IE_NAME = '220.ro'
|
||||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
"url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||||
u'file': u'LYV6doKo7f.mp4',
|
'file': 'LYV6doKo7f.mp4',
|
||||||
u'md5': u'03af18b73a07b4088753930db7a34add',
|
'md5': '03af18b73a07b4088753930db7a34add',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Luati-le Banii sez 4 ep 1",
|
"title": "Luati-le Banii sez 4 ep 1",
|
||||||
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
flashVars_str = self._search_regex(
|
flashVars_str = self._search_regex(
|
||||||
r'<param name="flashVars" value="([^"]+)"',
|
r'<param name="flashVars" value="([^"]+)"',
|
||||||
webpage, u'flashVars')
|
webpage, 'flashVars')
|
||||||
flashVars = compat_parse_qs(flashVars_str)
|
flashVars = compat_parse_qs(flashVars_str)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
|
|||||||
'description': clean_html(flashVars['desc'][0]),
|
'description': clean_html(flashVars['desc'][0]),
|
||||||
'thumbnail': flashVars['preview'][0],
|
'thumbnail': flashVars['preview'][0],
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -1,34 +1,36 @@
|
|||||||
import re
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = u'southparkstudios.com'
|
IE_NAME = 'southparkstudios.com'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||||
|
|
||||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
|
||||||
u'title': u'Bat Daded',
|
'ext': 'mp4',
|
||||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
'title': 'Bat Daded',
|
||||||
|
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class SouthparkDeIE(SouthParkStudiosIE):
|
class SouthparkDeIE(SouthParkStudiosIE):
|
||||||
IE_NAME = u'southpark.de'
|
IE_NAME = 'southpark.de'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||||
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||||
u'title': u'The Government Won\'t Respect My Privacy',
|
'ext': 'mp4',
|
||||||
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
'title': 'The Government Won\'t Respect My Privacy',
|
||||||
|
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -6,20 +8,20 @@ from .common import InfoExtractor
|
|||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
u'file': u'1259285.mp4',
|
'file': '1259285.mp4',
|
||||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||||
u'file': u'1309159.mp4',
|
'file': '1309159.mp4',
|
||||||
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
|
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
|
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||||
}
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
|
||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
note='Downloading XML', errnote='Failed to download XML')
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': n.tag.rpartition('type')[2],
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||||
'width': int(n.find('./width').text),
|
'width': int(n.find('./width').text),
|
||||||
'height': int(n.find('./height').text),
|
'height': int(n.find('./height').text),
|
||||||
'abr': int(n.find('./audiobitrate').text),
|
'abr': int(n.find('./audiobitrate').text),
|
||||||
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -62,24 +62,30 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
subtitles[sub_lang] = subtitle
|
subtitles[sub_lang] = subtitle
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
return self._download_webpage(url, None, note=False)
|
||||||
|
|
||||||
def _request_subtitle_url(self, sub_lang, url):
|
def _request_subtitle_url(self, sub_lang, url):
|
||||||
""" makes the http request for the subtitle """
|
""" makes the http request for the subtitle """
|
||||||
try:
|
try:
|
||||||
sub = self._download_webpage(url, None, note=False)
|
return self._download_subtitle_url(sub_lang, url)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||||
return
|
return
|
||||||
if not sub:
|
if not sub:
|
||||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||||
return
|
return
|
||||||
return sub
|
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
returns {sub_lang: url} or {} if not available
|
returns {sub_lang: url} or {} if not available
|
||||||
Must be redefined by the subclasses
|
Must be redefined by the subclasses
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
# By default, allow implementations to simply pass in the result
|
||||||
|
assert isinstance(webpage, dict), \
|
||||||
|
'_get_available_subtitles not implemented'
|
||||||
|
return webpage
|
||||||
|
|
||||||
def _get_available_automatic_caption(self, video_id, webpage):
|
def _get_available_automatic_caption(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
|
50
youtube_dl/extractor/tinypic.py
Normal file
50
youtube_dl/extractor/tinypic.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from youtube_dl.utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class TinyPicIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tinypic'
|
||||||
|
IE_DESC = 'tinypic.com videos'
|
||||||
|
_VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
|
||||||
|
'md5': '609b74432465364e72727ebc6203f044',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6xw7tc',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'shadow phenomenon weird',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
||||||
|
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
file_id = mobj.group('fileid')
|
||||||
|
server_id = mobj.group('serverid')
|
||||||
|
|
||||||
|
KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
|
||||||
|
keywords = self._html_search_meta('keywords', webpage, 'title')
|
||||||
|
title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
|
||||||
|
|
||||||
|
video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
|
||||||
|
thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title
|
||||||
|
}
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -6,12 +8,13 @@ from .common import InfoExtractor
|
|||||||
class TrailerAddictIE(InfoExtractor):
|
class TrailerAddictIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||||
u'file': u'76184.mp4',
|
'md5': '41365557f3c8c397d091da510e73ceb4',
|
||||||
u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '76184',
|
||||||
u"title": u"Prince Avalanche Trailer",
|
'ext': 'mp4',
|
||||||
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
|
'title': 'Prince Avalanche Trailer',
|
||||||
|
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,9 +25,15 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._search_regex(r'<title>(.+?)</title>',
|
title = self._search_regex(r'<title>(.+?)</title>',
|
||||||
webpage, 'video title').replace(' - Trailer Addict','')
|
webpage, 'video title').replace(' - Trailer Addict','')
|
||||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
view_count_str = self._search_regex(
|
||||||
webpage, 'Views Count')
|
r'<span class="views_n">([0-9,.]+)</span>',
|
||||||
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
webpage, 'view count', fatal=False)
|
||||||
|
view_count = (
|
||||||
|
None if view_count_str is None
|
||||||
|
else int(view_count_str.replace(',', '')))
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
# Presence of (no)watchplus function indicates HD quality is available
|
# Presence of (no)watchplus function indicates HD quality is available
|
||||||
if re.search(r'function (no)?watchplus()', webpage):
|
if re.search(r'function (no)?watchplus()', webpage):
|
||||||
@@ -39,14 +48,16 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
info_webpage, 'Download url').replace('%3F','?')
|
info_webpage, 'Download url').replace('%3F','?')
|
||||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||||
info_webpage, 'thumbnail url')
|
info_webpage, 'thumbnail url')
|
||||||
ext = final_url.split('.')[-1].split('?')[0]
|
|
||||||
|
|
||||||
return [{
|
description = self._html_search_regex(
|
||||||
'id' : video_id,
|
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||||
'url' : final_url,
|
webpage, 'description', fatal=False)
|
||||||
'ext' : ext,
|
|
||||||
'title' : title,
|
return {
|
||||||
'thumbnail' : thumbnail_url,
|
'id': video_id,
|
||||||
'description' : self._og_search_description(webpage),
|
'url': final_url,
|
||||||
'view_count' : view_count,
|
'title': title,
|
||||||
}]
|
'thumbnail': thumbnail_url,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -10,36 +12,36 @@ from ..utils import (
|
|||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
||||||
IE_NAME = u'ustream'
|
IE_NAME = 'ustream'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ustream.tv/recorded/20274954',
|
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||||
u'file': u'20274954.flv',
|
'file': '20274954.flv',
|
||||||
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
|
'md5': '088f151799e8f572f84eb62f17d73e5c',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"uploader": u"Young Americans for Liberty",
|
"uploader": "Young Americans for Liberty",
|
||||||
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
|
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('videoID')
|
video_id = m.group('videoID')
|
||||||
|
|
||||||
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
|
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||||
webpage, u'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
@@ -47,11 +49,11 @@ class UstreamIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
|
||||||
class UstreamChannelIE(InfoExtractor):
|
class UstreamChannelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
||||||
IE_NAME = u'ustream:channel'
|
IE_NAME = 'ustream:channel'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
@@ -22,16 +23,16 @@ class VevoIE(InfoExtractor):
|
|||||||
vevo:)
|
vevo:)
|
||||||
(?P<id>[^&?#]+)'''
|
(?P<id>[^&?#]+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
u'file': u'GB1101300280.mp4',
|
'file': 'GB1101300280.mp4',
|
||||||
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
|
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"upload_date": u"20130624",
|
"upload_date": "20130624",
|
||||||
u"uploader": u"Hurts",
|
"uploader": "Hurts",
|
||||||
u"title": u"Somebody to Die For",
|
"title": "Somebody to Die For",
|
||||||
u"duration": 230.12,
|
"duration": 230.12,
|
||||||
u"width": 1920,
|
"width": 1920,
|
||||||
u"height": 1080,
|
"height": 1080,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||||
@@ -44,7 +45,7 @@ class VevoIE(InfoExtractor):
|
|||||||
if version['version'] > last_version['version']:
|
if version['version'] > last_version['version']:
|
||||||
last_version = version
|
last_version = version
|
||||||
if last_version['version'] == -1:
|
if last_version['version'] == -1:
|
||||||
raise ExtractorError(u'Unable to extract last version of the video')
|
raise ExtractorError('Unable to extract last version of the video')
|
||||||
|
|
||||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||||
formats = []
|
formats = []
|
||||||
@@ -85,7 +86,7 @@ class VevoIE(InfoExtractor):
|
|||||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': u'SMIL_' + m.group('cbr'),
|
'format_id': 'SMIL_' + m.group('cbr'),
|
||||||
'vcodec': m.group('vcodec'),
|
'vcodec': m.group('vcodec'),
|
||||||
'acodec': m.group('acodec'),
|
'acodec': m.group('acodec'),
|
||||||
'vbr': int(m.group('vbr')),
|
'vbr': int(m.group('vbr')),
|
||||||
@@ -101,26 +102,25 @@ class VevoIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||||
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
video_info = self._download_json(json_url, video_id)['video']
|
||||||
video_info = json.loads(info_json)['video']
|
|
||||||
|
|
||||||
formats = self._formats_from_json(video_info)
|
formats = self._formats_from_json(video_info)
|
||||||
try:
|
try:
|
||||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
smil_xml = self._download_webpage(smil_url, video_id,
|
smil_xml = self._download_webpage(smil_url, video_id,
|
||||||
u'Downloading SMIL info')
|
'Downloading SMIL info')
|
||||||
formats.extend(self._formats_from_smil(smil_xml))
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if not isinstance(ee.cause, compat_HTTPError):
|
if not isinstance(ee.cause, compat_HTTPError):
|
||||||
raise
|
raise
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
u'Cannot download SMIL information, falling back to JSON ..')
|
'Cannot download SMIL information, falling back to JSON ..')
|
||||||
|
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int(self._search_regex(
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
|
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info['title'],
|
'title': video_info['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -129,5 +129,3 @@ class VevoIE(InfoExtractor):
|
|||||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||||
'duration': video_info['duration'],
|
'duration': video_info['duration'],
|
||||||
}
|
}
|
||||||
|
|
||||||
return info
|
|
||||||
|
80
youtube_dl/extractor/vube.py
Normal file
80
youtube_dl/extractor/vube.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class VubeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vube'
|
||||||
|
IE_DESC = 'Vube.com'
|
||||||
|
_VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||||
|
'md5': 'f81dcf6d0448e3291f54380181695821',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'YL2qNPkqon',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||||
|
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||||
|
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||||
|
'uploader': 'Chiara.Grispo',
|
||||||
|
'uploader_id': '1u3hX0znhP',
|
||||||
|
'upload_date': '20140103',
|
||||||
|
'duration': 170.56
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video = self._download_json('http://vube.com/api/v2/video/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
public_id = video['public_id']
|
||||||
|
|
||||||
|
formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||||
|
'height': int(fmt['height']),
|
||||||
|
'abr': int(fmt['audio_bitrate']),
|
||||||
|
'vbr': int(fmt['video_bitrate']),
|
||||||
|
'format_id': fmt['media_resolution_id']
|
||||||
|
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed']
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
description = video.get('description')
|
||||||
|
thumbnail = video['thumbnail_src']
|
||||||
|
if thumbnail.startswith('//'):
|
||||||
|
thumbnail = 'http:' + thumbnail
|
||||||
|
uploader = video['user_alias']
|
||||||
|
uploader_id = video['user_url_id']
|
||||||
|
upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d')
|
||||||
|
duration = video['duration']
|
||||||
|
view_count = video['raw_view_count']
|
||||||
|
like_count = video['total_likes']
|
||||||
|
dislike_count= video['total_hates']
|
||||||
|
|
||||||
|
comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id,
|
||||||
|
video_id, 'Downloading video comment JSON')
|
||||||
|
|
||||||
|
comment_count = comment['total']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@@ -5,7 +5,6 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.01.30.2'
|
__version__ = '2014.02.03'
|
||||||
|
Reference in New Issue
Block a user