Compare commits
52 Commits
2014.01.30
...
2014.02.03
Author | SHA1 | Date | |
---|---|---|---|
|
9d3ac7444d | ||
|
588128d054 | ||
|
8e93b9b9aa | ||
|
b4bcffefa3 | ||
|
2b39af9b4f | ||
|
23fe495feb | ||
|
b5dbe89bba | ||
|
dbe80ca7ad | ||
|
009a3408f5 | ||
|
b58e3c8918 | ||
|
56b6faf91e | ||
|
7ac1f877a7 | ||
|
d55433bbfd | ||
|
f0ce2bc1c5 | ||
|
c3bc00b90e | ||
|
ff6b7b049b | ||
|
f46359121f | ||
|
37c1525c17 | ||
|
c85e4cf7b4 | ||
|
c66dcda287 | ||
|
6d845922ab | ||
|
2949cbe036 | ||
|
c3309a7774 | ||
|
7aed837595 | ||
|
0eb799bae9 | ||
|
4baff4a4ae | ||
|
45d7bc2f8b | ||
|
c0c2ddddcd | ||
|
a96ed91610 | ||
|
c1206423c4 | ||
|
659aa21ba1 | ||
|
efd02e858a | ||
|
3bf8bc7f37 | ||
|
8ccda826d5 | ||
|
b9381e43c2 | ||
|
fcdea2666d | ||
|
c4db377cbb | ||
|
90dc5e8693 | ||
|
c81a855b0f | ||
|
c8d8ec8567 | ||
|
4f879a5be0 | ||
|
1a0648b4a9 | ||
|
3c1b4669d0 | ||
|
24b3d5e538 | ||
|
ab083b08ab | ||
|
89acb96927 | ||
|
79752e18b1 | ||
|
55b41c723c | ||
|
9f8928d032 | ||
|
3effa7ceaa | ||
|
38a40276ec | ||
|
d1b30713fb |
12
README.md
12
README.md
@@ -53,6 +53,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
from google videos for youtube-dl "large
|
from google videos for youtube-dl "large
|
||||||
apple". By default (with value "auto")
|
apple". By default (with value "auto")
|
||||||
youtube-dl guesses.
|
youtube-dl guesses.
|
||||||
|
--ignore-config Do not read configuration files. When given
|
||||||
|
in the global configuration file /etc
|
||||||
|
/youtube-dl.conf: do not read the user
|
||||||
|
configuration in ~/.config/youtube-dl.conf
|
||||||
|
(%APPDATA%/youtube-dl/config.txt on
|
||||||
|
Windows)
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
@@ -325,7 +331,7 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
|
|||||||
|
|
||||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||||
|
|
||||||
# BUILD INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
@@ -347,6 +353,10 @@ If you want to create a build of youtube-dl yourself, you'll need
|
|||||||
* zip
|
* zip
|
||||||
* nosetests
|
* nosetests
|
||||||
|
|
||||||
|
### Adding support for a new site
|
||||||
|
|
||||||
|
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||||
|
@@ -10,6 +10,7 @@ from test.helper import FakeYDL, md5
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
|
BlipTVIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
TEDIE,
|
TEDIE,
|
||||||
@@ -202,5 +203,25 @@ class TestTedSubtitles(BaseTestSubtitles):
|
|||||||
for lang in langs:
|
for lang in langs:
|
||||||
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBlipTVSubtitles(BaseTestSubtitles):
|
||||||
|
url = 'http://blip.tv/a/a-6603250'
|
||||||
|
IE = BlipTVIE
|
||||||
|
|
||||||
|
def test_list_subtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['listsubtitles'] = True
|
||||||
|
info_dict = self.getInfoDict()
|
||||||
|
self.assertEqual(info_dict, None)
|
||||||
|
|
||||||
|
def test_allsubtitles(self):
|
||||||
|
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||||
|
self.DL.params['writesubtitles'] = True
|
||||||
|
self.DL.params['allsubtitles'] = True
|
||||||
|
subtitles = self.getSubtitles()
|
||||||
|
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||||
|
self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
24
youtube-dl.plugin.zsh
Normal file
24
youtube-dl.plugin.zsh
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
# This allows the youtube-dl command to be installed in ZSH using antigen.
|
||||||
|
# Antigen is a bundle manager. It allows you to enhance the functionality of
|
||||||
|
# your zsh session by installing bundles and themes easily.
|
||||||
|
|
||||||
|
# Antigen documentation:
|
||||||
|
# http://antigen.sharats.me/
|
||||||
|
# https://github.com/zsh-users/antigen
|
||||||
|
|
||||||
|
# Install youtube-dl:
|
||||||
|
# antigen bundle rg3/youtube-dl
|
||||||
|
# Bundles installed by antigen are available for use immediately.
|
||||||
|
|
||||||
|
# Update youtube-dl (and all other antigen bundles):
|
||||||
|
# antigen update
|
||||||
|
|
||||||
|
# The antigen command will download the git repository to a folder and then
|
||||||
|
# execute an enabling script (this file). The complete process for loading the
|
||||||
|
# code is documented here:
|
||||||
|
# https://github.com/zsh-users/antigen#notes-on-writing-plugins
|
||||||
|
|
||||||
|
# This specific script just aliases youtube-dl to the python script that this
|
||||||
|
# library provides. This requires updating the PYTHONPATH to ensure that the
|
||||||
|
# full set of code can be located.
|
||||||
|
alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
|
@@ -100,6 +100,43 @@ def parseOpts(overrideArguments=None):
|
|||||||
optionf.close()
|
optionf.close()
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def _readUserConf():
|
||||||
|
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||||
|
if xdg_config_home:
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
||||||
|
else:
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
||||||
|
if not os.path.isfile(userConfFile):
|
||||||
|
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
||||||
|
userConf = _readOptions(userConfFile, None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
appdata_dir = os.environ.get('appdata')
|
||||||
|
if appdata_dir:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
||||||
|
default=None)
|
||||||
|
if userConf is None:
|
||||||
|
userConf = _readOptions(
|
||||||
|
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
if userConf is None:
|
||||||
|
userConf = []
|
||||||
|
|
||||||
|
return userConf
|
||||||
|
|
||||||
def _format_option_string(option):
|
def _format_option_string(option):
|
||||||
''' ('-o', '--option') -> -o, --format METAVAR'''
|
''' ('-o', '--option') -> -o, --format METAVAR'''
|
||||||
|
|
||||||
@@ -203,6 +240,11 @@ def parseOpts(overrideArguments=None):
|
|||||||
general.add_option('--default-search',
|
general.add_option('--default-search',
|
||||||
dest='default_search', metavar='PREFIX',
|
dest='default_search', metavar='PREFIX',
|
||||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||||
|
general.add_option(
|
||||||
|
'--ignore-config',
|
||||||
|
action='store_true',
|
||||||
|
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||||
|
|
||||||
|
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--playlist-start',
|
'--playlist-start',
|
||||||
@@ -457,44 +499,18 @@ def parseOpts(overrideArguments=None):
|
|||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
|
||||||
else:
|
else:
|
||||||
systemConf = _readOptions('/etc/youtube-dl.conf')
|
|
||||||
|
|
||||||
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
|
|
||||||
if xdg_config_home:
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
|
|
||||||
else:
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
|
|
||||||
if not os.path.isfile(userConfFile):
|
|
||||||
userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
|
|
||||||
userConf = _readOptions(userConfFile, None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
appdata_dir = os.environ.get('appdata')
|
|
||||||
if appdata_dir:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf'),
|
|
||||||
default=None)
|
|
||||||
if userConf is None:
|
|
||||||
userConf = _readOptions(
|
|
||||||
os.path.join(os.path.expanduser('~'), 'youtube-dl.conf.txt'),
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
if userConf is None:
|
|
||||||
userConf = []
|
|
||||||
|
|
||||||
commandLineConf = sys.argv[1:]
|
commandLineConf = sys.argv[1:]
|
||||||
|
if '--ignore-config' in commandLineConf:
|
||||||
|
systemConf = []
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
systemConf = _readOptions('/etc/youtube-dl.conf')
|
||||||
|
if '--ignore-config' in systemConf:
|
||||||
|
userConf = []
|
||||||
|
else:
|
||||||
|
userConf = _readUserConf()
|
||||||
argv = systemConf + userConf + commandLineConf
|
argv = systemConf + userConf + commandLineConf
|
||||||
|
|
||||||
opts, args = parser.parse_args(argv)
|
opts, args = parser.parse_args(argv)
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
|
||||||
|
@@ -110,6 +110,7 @@ from .khanacademy import KhanAcademyIE
|
|||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .keek import KeekIE
|
from .keek import KeekIE
|
||||||
from .la7 import LA7IE
|
from .la7 import LA7IE
|
||||||
|
from .lifenews import LifeNewsIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||||
from .lynda import (
|
from .lynda import (
|
||||||
@@ -141,6 +142,7 @@ from .newgrounds import NewgroundsIE
|
|||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
|
from .normalboots import NormalbootsIE
|
||||||
from .novamov import NovamovIE
|
from .novamov import NovamovIE
|
||||||
from .nowness import NownessIE
|
from .nowness import NownessIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
@@ -198,6 +200,7 @@ from .ted import TEDIE
|
|||||||
from .tf1 import TF1IE
|
from .tf1 import TF1IE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
|
from .tinypic import TinyPicIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
@@ -228,6 +231,7 @@ from .vimeo import (
|
|||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
from .viki import VikiIE
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
|
from .vube import VubeIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .weibo import WeiboIE
|
from .weibo import WeiboIE
|
||||||
from .wimp import WimpIE
|
from .wimp import WimpIE
|
||||||
|
@@ -6,6 +6,7 @@ import re
|
|||||||
import socket
|
import socket
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_str,
|
compat_str,
|
||||||
@@ -17,112 +18,125 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVIE(InfoExtractor):
|
class BlipTVIE(SubtitlesInfoExtractor):
|
||||||
"""Information extractor for blip.tv"""
|
"""Information extractor for blip.tv"""
|
||||||
|
|
||||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
|
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||||
'file': '5779306.mov',
|
|
||||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5779306',
|
||||||
|
'ext': 'mov',
|
||||||
'upload_date': '20111205',
|
'upload_date': '20111205',
|
||||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||||
'uploader': 'Comic Book Resources - CBR TV',
|
'uploader': 'Comic Book Resources - CBR TV',
|
||||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# https://github.com/rg3/youtube-dl/pull/2274
|
||||||
def report_direct_download(self, title):
|
'note': 'Video with subtitles',
|
||||||
"""Report information extraction."""
|
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||||
self.to_screen('%s: Direct download detected' % title)
|
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6586561',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Red vs. Blue',
|
||||||
|
'description': 'One-Zero-One',
|
||||||
|
'upload_date': '20130614',
|
||||||
|
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
presumptive_id = mobj.group('presumptive_id')
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
# See https://github.com/rg3/youtube-dl/issues/857
|
# See https://github.com/rg3/youtube-dl/issues/857
|
||||||
embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||||
if embed_mobj:
|
if embed_mobj:
|
||||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||||
video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id')
|
video_id = self._search_regex(
|
||||||
|
r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||||
|
|
||||||
if '?' in url:
|
cchar = '&' if '?' in url else '?'
|
||||||
cchar = '&'
|
|
||||||
else:
|
|
||||||
cchar = '?'
|
|
||||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||||
request = compat_urllib_request.Request(json_url)
|
request = compat_urllib_request.Request(json_url)
|
||||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||||
self.report_extraction(mobj.group(1))
|
|
||||||
urlh = self._request_webpage(request, None, False,
|
|
||||||
'unable to download video info webpage')
|
|
||||||
|
|
||||||
try:
|
json_data = self._download_json(request, video_id=presumptive_id)
|
||||||
json_code_bytes = urlh.read()
|
|
||||||
json_code = json_code_bytes.decode('utf-8')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err))
|
|
||||||
|
|
||||||
try:
|
if 'Post' in json_data:
|
||||||
json_data = json.loads(json_code)
|
data = json_data['Post']
|
||||||
if 'Post' in json_data:
|
else:
|
||||||
data = json_data['Post']
|
data = json_data
|
||||||
else:
|
|
||||||
data = json_data
|
|
||||||
|
|
||||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
video_id = compat_str(data['item_id'])
|
||||||
formats = []
|
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||||
if 'additionalMedia' in data:
|
subtitles = {}
|
||||||
for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])):
|
formats = []
|
||||||
if not int(f['media_width']): # filter m3u8
|
if 'additionalMedia' in data:
|
||||||
continue
|
for f in data['additionalMedia']:
|
||||||
formats.append({
|
if f.get('file_type_srt') == 1:
|
||||||
'url': f['url'],
|
LANGS = {
|
||||||
'format_id': f['role'],
|
'english': 'en',
|
||||||
'width': int(f['media_width']),
|
}
|
||||||
'height': int(f['media_height']),
|
lang = f['role'].rpartition('-')[-1].strip().lower()
|
||||||
})
|
langcode = LANGS.get(lang, lang)
|
||||||
else:
|
subtitles[langcode] = f['url']
|
||||||
|
continue
|
||||||
|
if not int(f['media_width']): # filter m3u8
|
||||||
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': data['media']['url'],
|
'url': f['url'],
|
||||||
'width': int(data['media']['width']),
|
'format_id': f['role'],
|
||||||
'height': int(data['media']['height']),
|
'width': int(f['media_width']),
|
||||||
|
'height': int(f['media_height']),
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': data['media']['url'],
|
||||||
|
'width': int(data['media']['width']),
|
||||||
|
'height': int(data['media']['height']),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, subtitles)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': compat_str(data['item_id']),
|
'id': video_id,
|
||||||
'uploader': data['display_name'],
|
'uploader': data['display_name'],
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'thumbnail': data['thumbnailUrl'],
|
'thumbnail': data['thumbnailUrl'],
|
||||||
'description': data['description'],
|
'description': data['description'],
|
||||||
'user_agent': 'iTunes/10.6.1',
|
'user_agent': 'iTunes/10.6.1',
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
'subtitles': video_subtitles,
|
||||||
except (ValueError, KeyError) as err:
|
}
|
||||||
raise ExtractorError('Unable to parse video information: %s' % repr(err))
|
|
||||||
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||||
|
# when we request with a common UA
|
||||||
|
req = compat_urllib_request.Request(url)
|
||||||
|
req.add_header('Youtubedl-user-agent', 'youtube-dl')
|
||||||
|
return self._download_webpage(req, None, note=False)
|
||||||
|
|
||||||
|
|
||||||
class BlipTVUserIE(InfoExtractor):
|
class BlipTVUserIE(InfoExtractor):
|
||||||
"""Information Extractor for blip.tv users."""
|
|
||||||
|
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||||
_PAGE_SIZE = 12
|
_PAGE_SIZE = 12
|
||||||
IE_NAME = 'blip.tv:user'
|
IE_NAME = 'blip.tv:user'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract username
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError('Invalid URL: %s' % url)
|
|
||||||
|
|
||||||
username = mobj.group(1)
|
username = mobj.group(1)
|
||||||
|
|
||||||
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
|
||||||
@@ -131,7 +145,6 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
mobj = re.search(r'data-users-id="([^"]+)"', page)
|
||||||
page_base = page_base % mobj.group(1)
|
page_base = page_base % mobj.group(1)
|
||||||
|
|
||||||
|
|
||||||
# Download video ids using BlipTV Ajax calls. Result size per
|
# Download video ids using BlipTV Ajax calls. Result size per
|
||||||
# query is limited (currently to 12 videos) so we need to query
|
# query is limited (currently to 12 videos) so we need to query
|
||||||
# page by page until there are no video ids - it means we got
|
# page by page until there are no video ids - it means we got
|
||||||
@@ -142,8 +155,8 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
url = page_base + "&page=" + str(pagenum)
|
url = page_base + "&page=" + str(pagenum)
|
||||||
page = self._download_webpage(url, username,
|
page = self._download_webpage(
|
||||||
'Downloading video ids from page %d' % pagenum)
|
url, username, 'Downloading video ids from page %d' % pagenum)
|
||||||
|
|
||||||
# Extract video identifiers
|
# Extract video identifiers
|
||||||
ids_in_page = []
|
ids_in_page = []
|
||||||
@@ -167,4 +180,4 @@ class BlipTVUserIE(InfoExtractor):
|
|||||||
|
|
||||||
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
|
||||||
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
|
||||||
return [self.playlist_result(url_entries, playlist_title = username)]
|
return [self.playlist_result(url_entries, playlist_title=username)]
|
||||||
|
@@ -1,12 +1,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
translation_table = {
|
translation_table = {
|
||||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||||
|
@@ -28,7 +28,25 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
'description': 'This video wasn\'t long enough, so we made it double-spaced.',
|
||||||
'age_limit': 10,
|
'age_limit': 10,
|
||||||
},
|
},
|
||||||
}]
|
},
|
||||||
|
# embedded youtube video
|
||||||
|
{
|
||||||
|
'url': 'http://www.collegehumor.com/embed/6950457',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'W5gMp3ZjYg4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||||
|
'uploader': 'Funnyplox TV',
|
||||||
|
'uploader_id': 'funnyploxtv',
|
||||||
|
'description': 'md5:506f69f7a297ed698ced3375f2363b0e',
|
||||||
|
'upload_date': '20140128',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -38,6 +56,12 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
data = json.loads(self._download_webpage(
|
data = json.loads(self._download_webpage(
|
||||||
jsonUrl, video_id, 'Downloading info JSON'))
|
jsonUrl, video_id, 'Downloading info JSON'))
|
||||||
vdata = data['video']
|
vdata = data['video']
|
||||||
|
if vdata.get('youtubeId') is not None:
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': vdata['youtubeId'],
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
}
|
||||||
|
|
||||||
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0}
|
||||||
rating = vdata.get('rating')
|
rating = vdata.get('rating')
|
||||||
@@ -49,7 +73,7 @@ class CollegeHumorIE(InfoExtractor):
|
|||||||
PREFS = {'high_quality': 2, 'low_quality': 0}
|
PREFS = {'high_quality': 2, 'low_quality': 0}
|
||||||
formats = []
|
formats = []
|
||||||
for format_key in ('mp4', 'webm'):
|
for format_key in ('mp4', 'webm'):
|
||||||
for qname, qurl in vdata[format_key].items():
|
for qname, qurl in vdata.get(format_key, {}).items():
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_key + '_' + qname,
|
'format_id': format_key + '_' + qname,
|
||||||
'url': qurl,
|
'url': qurl,
|
||||||
|
@@ -399,7 +399,7 @@ class InfoExtractor(object):
|
|||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^>]+?)"|\'(.+?)\')'
|
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
||||||
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
return [
|
return [
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re, base64, zlib
|
import re, base64, zlib
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
@@ -18,29 +20,29 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
class CrunchyrollIE(InfoExtractor):
|
class CrunchyrollIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'(?:https?://)?(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
u'file': u'645513.flv',
|
'file': '645513.flv',
|
||||||
#u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
|
#'md5': 'b1639fd6ddfaa43788c85f6d1dddd412',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||||
u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
|
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||||
u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||||
u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
|
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
||||||
u'upload_date': u'20131013',
|
'upload_date': '20131013',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# rtmp
|
# rtmp
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMAT_IDS = {
|
_FORMAT_IDS = {
|
||||||
u'360': (u'60', u'106'),
|
'360': ('60', '106'),
|
||||||
u'480': (u'61', u'106'),
|
'480': ('61', '106'),
|
||||||
u'720': (u'62', u'106'),
|
'720': ('62', '106'),
|
||||||
u'1080': (u'80', u'108'),
|
'1080': ('80', '108'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _decrypt_subtitles(self, data, iv, id):
|
def _decrypt_subtitles(self, data, iv, id):
|
||||||
@@ -63,7 +65,7 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
num3 = key ^ num1
|
num3 = key ^ num1
|
||||||
num4 = num3 ^ (num3 >> 3) ^ num2
|
num4 = num3 ^ (num3 >> 3) ^ num2
|
||||||
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
|
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
|
||||||
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
|
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
|
||||||
# Extend 160 Bit hash to 256 Bit
|
# Extend 160 Bit hash to 256 Bit
|
||||||
return shaHash + [0] * 12
|
return shaHash + [0] * 12
|
||||||
|
|
||||||
@@ -79,93 +81,98 @@ class CrunchyrollIE(InfoExtractor):
|
|||||||
|
|
||||||
def _convert_subtitles_to_srt(self, subtitles):
|
def _convert_subtitles_to_srt(self, subtitles):
|
||||||
i=1
|
i=1
|
||||||
output = u''
|
output = ''
|
||||||
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
|
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
|
||||||
start = start.replace(u'.', u',')
|
start = start.replace('.', ',')
|
||||||
end = end.replace(u'.', u',')
|
end = end.replace('.', ',')
|
||||||
text = clean_html(text)
|
text = clean_html(text)
|
||||||
text = text.replace(u'\\N', u'\n')
|
text = text.replace('\\N', '\n')
|
||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
||||||
i+=1
|
i+=1
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def _real_extract(self,url):
|
def _real_extract(self,url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
webpage_url = u'http://www.' + mobj.group('url')
|
if mobj.group('prefix') == 'm':
|
||||||
video_id = mobj.group(u'video_id')
|
mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
|
||||||
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
|
else:
|
||||||
|
webpage_url = 'http://www.' + mobj.group('url')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
|
||||||
|
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
|
||||||
if note_m:
|
if note_m:
|
||||||
raise ExtractorError(note_m)
|
raise ExtractorError(note_m)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
|
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||||
video_title = re.sub(r' {2,}', u' ', video_title)
|
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
|
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||||
if not video_description:
|
if not video_description:
|
||||||
video_description = None
|
video_description = None
|
||||||
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
|
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
||||||
if video_upload_date:
|
if video_upload_date:
|
||||||
video_upload_date = unified_strdate(video_upload_date)
|
video_upload_date = unified_strdate(video_upload_date)
|
||||||
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
|
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
|
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url'))
|
||||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||||
playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
|
playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url})
|
||||||
playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
|
playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
|
playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info')
|
||||||
|
|
||||||
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
|
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id')
|
||||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
|
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
||||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||||
video_format = fmt+u'p'
|
video_format = fmt+'p'
|
||||||
streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
|
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
|
||||||
# urlencode doesn't work!
|
# urlencode doesn't work!
|
||||||
streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
|
streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+'&media%5Fid='+stream_id+'&video%5Fformat='+stream_format
|
||||||
streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
|
streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
|
streamdata_req.add_header('Content-Length', str(len(streamdata_req.data)))
|
||||||
streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
|
streamdata = self._download_webpage(streamdata_req, video_id, note='Downloading media info for '+video_format)
|
||||||
video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
|
video_url = self._search_regex(r'<host>([^<]+)', streamdata, 'video_url')
|
||||||
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
|
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, 'video_play_path')
|
||||||
formats.append({
|
formats.append({
|
||||||
u'url': video_url,
|
'url': video_url,
|
||||||
u'play_path': video_play_path,
|
'play_path': video_play_path,
|
||||||
u'ext': 'flv',
|
'ext': 'flv',
|
||||||
u'format': video_format,
|
'format': video_format,
|
||||||
u'format_id': video_format,
|
'format_id': video_format,
|
||||||
})
|
})
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||||
sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
|
sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
|
||||||
video_id, note=u'Downloading subtitles for '+sub_name)
|
video_id, note='Downloading subtitles for '+sub_name)
|
||||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
|
id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
|
||||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
|
iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
|
||||||
data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
|
data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
|
||||||
if not id or not iv or not data:
|
if not id or not iv or not data:
|
||||||
continue
|
continue
|
||||||
id = int(id)
|
id = int(id)
|
||||||
iv = base64.b64decode(iv)
|
iv = base64.b64decode(iv)
|
||||||
data = base64.b64decode(data)
|
data = base64.b64decode(data)
|
||||||
|
|
||||||
subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
|
subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
|
||||||
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
|
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False)
|
||||||
if not lang_code:
|
if not lang_code:
|
||||||
continue
|
continue
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
u'id': video_id,
|
'id': video_id,
|
||||||
u'title': video_title,
|
'title': video_title,
|
||||||
u'description': video_description,
|
'description': video_description,
|
||||||
u'thumbnail': video_thumbnail,
|
'thumbnail': video_thumbnail,
|
||||||
u'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
u'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
u'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
u'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@@ -1,49 +1,60 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
find_xpath_attr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CSpanIE(InfoExtractor):
|
class CSpanIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?c-spanvideo\.org/program/(?P<name>.*)'
|
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>\d+)'
|
||||||
IE_DESC = 'C-SPAN'
|
IE_DESC = 'C-SPAN'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.c-spanvideo.org/program/HolderonV',
|
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||||
'file': '315139.mp4',
|
|
||||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '315139',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in [Shelby County v. Holder] in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||||
},
|
},
|
||||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
prog_name = mobj.group('name')
|
page_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, prog_name)
|
webpage = self._download_webpage(url, page_id)
|
||||||
video_id = self._search_regex(r'prog(?:ram)?id=(.*?)&', webpage, 'video id')
|
video_id = self._search_regex(r'data-progid=\'(\d+)\'>', webpage, 'video id')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<!-- title -->\n\s*<h1[^>]*>(.*?)</h1>', webpage, 'title')
|
[
|
||||||
description = self._og_search_description(webpage)
|
# The full description
|
||||||
|
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
|
||||||
|
# If the description is small enough the other div is not
|
||||||
|
# present, otherwise this is a stripped version
|
||||||
|
r'<p class=\'initial\'>(.*?)</p>'
|
||||||
|
],
|
||||||
|
webpage, 'description', flags=re.DOTALL)
|
||||||
|
|
||||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||||
data_json = self._download_webpage(
|
data = self._download_json(info_url, video_id)
|
||||||
info_url, video_id, 'Downloading video info')
|
|
||||||
data = json.loads(data_json)
|
|
||||||
|
|
||||||
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
url = unescapeHTML(data['video']['files'][0]['path']['#text'])
|
||||||
|
|
||||||
|
doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
def find_string(s):
|
||||||
|
return find_xpath_attr(doc, './/string', 'name', s).text
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': find_string('title'),
|
||||||
'url': url,
|
'url': url,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': find_string('poster'),
|
||||||
}
|
}
|
||||||
|
@@ -1,4 +1,7 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@@ -30,7 +33,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PluzzIE(FranceTVBaseInfoExtractor):
|
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'pluzz.francetv.fr'
|
IE_NAME = 'pluzz.francetv.fr'
|
||||||
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
||||||
|
|
||||||
# Can't use tests, videos expire in 7 days
|
# Can't use tests, videos expire in 7 days
|
||||||
@@ -44,17 +47,17 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'francetvinfo.fr'
|
IE_NAME = 'francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||||
u'file': u'84981923.mp4',
|
'file': '84981923.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Soir 3',
|
'title': 'Soir 3',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -62,13 +65,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
page_title = mobj.group('title')
|
page_title = mobj.group('title')
|
||||||
webpage = self._download_webpage(url, page_title)
|
webpage = self._download_webpage(url, page_title)
|
||||||
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
|
video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'francetv'
|
IE_NAME = 'francetv'
|
||||||
IE_DESC = u'France 2, 3, 4, 5 and Ô'
|
IE_DESC = 'France 2, 3, 4, 5 and Ô'
|
||||||
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
_VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
|
||||||
(?:
|
(?:
|
||||||
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
|
||||||
@@ -78,73 +81,73 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
# france2
|
# france2
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||||
u'file': u'75540104.mp4',
|
'file': '75540104.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'13h15, le samedi...',
|
'title': '13h15, le samedi...',
|
||||||
u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france3
|
# france3
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||||
u'ext': u'flv',
|
'ext': 'flv',
|
||||||
u'title': u'Le scandale du prix des médicaments',
|
'title': 'Le scandale du prix des médicaments',
|
||||||
u'description': u'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france4
|
# france4
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||||
u'ext': u'flv',
|
'ext': 'flv',
|
||||||
u'title': u'Hero Corp Making of - Extrait 1',
|
'title': 'Hero Corp Making of - Extrait 1',
|
||||||
u'description': u'md5:c87d54871b1790679aec1197e73d650a',
|
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# france5
|
# france5
|
||||||
{
|
{
|
||||||
u'url': u'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'92837968',
|
'id': '92837968',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'C à dire ?!',
|
'title': 'C à dire ?!',
|
||||||
u'description': u'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# franceo
|
# franceo
|
||||||
{
|
{
|
||||||
u'url': u'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'92327925',
|
'id': '92327925',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'Infô-Afrique',
|
'title': 'Infô-Afrique',
|
||||||
u'description': u'md5:ebf346da789428841bee0fd2a935ea55',
|
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
u'skip': u'The id changes frequently',
|
'skip': 'The id changes frequently',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -160,26 +163,26 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
'\.fr/\?id-video=([^"/&]+)'),
|
'\.fr/\?id-video=([^"/&]+)'),
|
||||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||||
]
|
]
|
||||||
video_id = self._html_search_regex(id_res, webpage, u'video ID')
|
video_id = self._html_search_regex(id_res, webpage, 'video ID')
|
||||||
else:
|
else:
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class GenerationQuoiIE(InfoExtractor):
|
class GenerationQuoiIE(InfoExtractor):
|
||||||
IE_NAME = u'france2.fr:generation-quoi'
|
IE_NAME = 'france2.fr:generation-quoi'
|
||||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
||||||
u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
|
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Génération Quoi - Garde à Vous',
|
'title': 'Génération Quoi - Garde à Vous',
|
||||||
u'uploader': u'Génération Quoi',
|
'uploader': 'Génération Quoi',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# It uses Dailymotion
|
# It uses Dailymotion
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -194,20 +197,20 @@ class GenerationQuoiIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class CultureboxIE(FranceTVBaseInfoExtractor):
|
class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||||
IE_NAME = u'culturebox.francetvinfo.fr'
|
IE_NAME = 'culturebox.francetvinfo.fr'
|
||||||
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'EV_6785',
|
'id': 'EV_6785',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'Einstein on the beach au Théâtre du Châtelet',
|
'title': 'Einstein on the beach au Théâtre du Châtelet',
|
||||||
u'description': u'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
||||||
},
|
},
|
||||||
u'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
u'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,5 +218,5 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
name = mobj.group('name')
|
name = mobj.group('name')
|
||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, u'video id')
|
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
|
||||||
return self._extract_video(video_id)
|
return self._extract_video(video_id)
|
||||||
|
@@ -38,18 +38,6 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'R\u00e9gis plante sa Jeep',
|
'title': 'R\u00e9gis plante sa Jeep',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# embedded vimeo video
|
|
||||||
{
|
|
||||||
'add_ie': ['Vimeo'],
|
|
||||||
'url': 'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
|
|
||||||
'file': '22444065.mp4',
|
|
||||||
'md5': '2903896e23df39722c33f015af0666e2',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'ACCU 2011: Move Semantics,Perfect Forwarding, and Rvalue references- Scott Meyers- 13/04/2011',
|
|
||||||
'uploader_id': 'skillsmatter',
|
|
||||||
'uploader': 'Skills Matter',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
# bandcamp page with custom domain
|
# bandcamp page with custom domain
|
||||||
{
|
{
|
||||||
'add_ie': ['Bandcamp'],
|
'add_ie': ['Bandcamp'],
|
||||||
@@ -358,7 +346,7 @@ class GenericIE(InfoExtractor):
|
|||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Look for gorilla-vid style embedding
|
# Look for gorilla-vid style embedding
|
||||||
mobj = re.search(r'(?s)jw_plugins.*?file:\s*["\'](.*?)["\']', webpage)
|
mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
# Broaden the search a little bit
|
# Broaden the search a little bit
|
||||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class InfoQIE(InfoExtractor):
|
class InfoQIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
|
_VALID_URL = r'https?://(?:www\.)?infoq\.com/[^/]+/(?P<id>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"name": "InfoQ",
|
"name": "InfoQ",
|
||||||
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
"url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",
|
||||||
@@ -26,9 +26,9 @@ class InfoQIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id=url)
|
webpage = self._download_webpage(url, video_id)
|
||||||
self.report_extraction(url)
|
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
encoded_id = self._search_regex(r"jsclassref ?= ?'([^']*)'", webpage, 'encoded id')
|
||||||
@@ -50,6 +50,6 @@ class InfoQIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
'ext': extension, # Extension is always(?) mp4, but seems to be flv
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
}
|
}
|
||||||
|
63
youtube_dl/extractor/lifenews.py
Normal file
63
youtube_dl/extractor/lifenews.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
|
class LifeNewsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'lifenews'
|
||||||
|
IE_DESC = 'LIFE | NEWS'
|
||||||
|
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://lifenews.ru/news/126342',
|
||||||
|
'file': '126342.mp4',
|
||||||
|
'md5': 'e1b50a5c5fb98a6a544250f2e0db570a',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||||
|
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||||
|
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||||
|
'upload_date': '20140130',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||||
|
if title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<div class=\'views\'>(\d+)</div>', webpage, 'view count')
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<div class=\'comments\'>(\d+)</div>', webpage, 'comment count')
|
||||||
|
|
||||||
|
upload_date = self._html_search_regex(
|
||||||
|
r'<time datetime=\'([^\']+)\'>', webpage, 'upload date')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'upload_date': unified_strdate(upload_date),
|
||||||
|
}
|
@@ -4,14 +4,11 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
class LiveLeakIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||||
'file': '757_1364311680.mp4',
|
'file': '757_1364311680.mp4',
|
||||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
||||||
@@ -20,7 +17,17 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'uploader': 'ljfriel2',
|
'uploader': 'ljfriel2',
|
||||||
'title': 'Most unlucky car accident'
|
'title': 'Most unlucky car accident'
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||||
|
'file': 'f93_1390833151.mp4',
|
||||||
|
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
||||||
|
'info_dict': {
|
||||||
|
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
||||||
|
'uploader': 'ARD_Stinkt',
|
||||||
|
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -28,7 +35,11 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
sources_raw = self._search_regex(
|
sources_raw = self._search_regex(
|
||||||
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs')
|
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
|
||||||
|
if sources_raw is None:
|
||||||
|
sources_raw = '[{ %s}]' % (
|
||||||
|
self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
|
||||||
|
|
||||||
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
|
||||||
sources = json.loads(sources_json)
|
sources = json.loads(sources_json)
|
||||||
|
|
||||||
|
@@ -16,7 +16,8 @@ class MalemotionIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "Bien dur",
|
"title": "Bien dur",
|
||||||
"age_limit": 18,
|
"age_limit": 18,
|
||||||
}
|
},
|
||||||
|
'skip': 'This video has been deleted.'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -119,7 +119,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
if mgid.endswith('.swf'):
|
if mgid.endswith('.swf'):
|
||||||
mgid = mgid[:-4]
|
mgid = mgid[:-4]
|
||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
mgid = self._search_regex(r'data-mgid="(.*?)"', webpage, u'mgid')
|
mgid = self._search_regex(
|
||||||
|
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||||
|
webpage, u'mgid')
|
||||||
return self._get_videos_info(mgid)
|
return self._get_videos_info(mgid)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -4,18 +4,18 @@ import json
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import determine_ext
|
|
||||||
|
|
||||||
|
|
||||||
class NewgroundsIE(InfoExtractor):
|
class NewgroundsIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||||
'file': '549479.mp3',
|
|
||||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "B7 - BusMode",
|
'id': '549479',
|
||||||
"uploader": "Burn7",
|
'ext': 'mp3',
|
||||||
|
'title': 'B7 - BusMode',
|
||||||
|
'uploader': 'Burn7',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
61
youtube_dl/extractor/normalboots.py
Normal file
61
youtube_dl/extractor/normalboots.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
class NormalbootsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
|
u'file': u'home-alone-games-jontron.mp4',
|
||||||
|
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
||||||
|
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
||||||
|
u'uploader': u'JonTron',
|
||||||
|
u'upload_date': u'20140125',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||||
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'uploader': None,
|
||||||
|
'upload_date': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if url[:4] != 'http':
|
||||||
|
url = 'http://' + url
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_title = self._og_search_title(webpage)
|
||||||
|
video_description = self._og_search_description(webpage)
|
||||||
|
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
|
webpage, 'uploader')
|
||||||
|
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
|
webpage, 'date')
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||||
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
|
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
||||||
|
|
||||||
|
info['url'] = video_url
|
||||||
|
info['title'] = video_title
|
||||||
|
info['description'] = video_description
|
||||||
|
info['thumbnail'] = video_thumbnail
|
||||||
|
info['uploader'] = video_uploader
|
||||||
|
info['upload_date'] = video_upload_date
|
||||||
|
|
||||||
|
return info
|
@@ -6,8 +6,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,9 +14,10 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
||||||
'file': 'ford-lopatin-live-at-primavera-sound-2011.mp3',
|
|
||||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||||
|
'ext': 'mp3',
|
||||||
"uploader_id": "ford-lopatin",
|
"uploader_id": "ford-lopatin",
|
||||||
"location": "Spain",
|
"location": "Spain",
|
||||||
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
"description": "Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.",
|
||||||
@@ -42,7 +41,6 @@ class RBMARadioIE(InfoExtractor):
|
|||||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
raise ExtractorError('Invalid JSON: ' + str(e))
|
||||||
|
|
||||||
video_url = data['akamai_url'] + '&cbr=256'
|
video_url = data['akamai_url'] + '&cbr=256'
|
||||||
url_parts = compat_urllib_parse_urlparse(video_url)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -11,12 +13,12 @@ class Ro220IE(InfoExtractor):
|
|||||||
IE_NAME = '220.ro'
|
IE_NAME = '220.ro'
|
||||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
"url": "http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||||
u'file': u'LYV6doKo7f.mp4',
|
'file': 'LYV6doKo7f.mp4',
|
||||||
u'md5': u'03af18b73a07b4088753930db7a34add',
|
'md5': '03af18b73a07b4088753930db7a34add',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Luati-le Banii sez 4 ep 1",
|
"title": "Luati-le Banii sez 4 ep 1",
|
||||||
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,10 +29,10 @@ class Ro220IE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
flashVars_str = self._search_regex(
|
flashVars_str = self._search_regex(
|
||||||
r'<param name="flashVars" value="([^"]+)"',
|
r'<param name="flashVars" value="([^"]+)"',
|
||||||
webpage, u'flashVars')
|
webpage, 'flashVars')
|
||||||
flashVars = compat_parse_qs(flashVars_str)
|
flashVars = compat_parse_qs(flashVars_str)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -39,4 +41,3 @@ class Ro220IE(InfoExtractor):
|
|||||||
'description': clean_html(flashVars['desc'][0]),
|
'description': clean_html(flashVars['desc'][0]),
|
||||||
'thumbnail': flashVars['preview'][0],
|
'thumbnail': flashVars['preview'][0],
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -1,34 +1,36 @@
|
|||||||
import re
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
class SouthParkStudiosIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = u'southparkstudios.com'
|
IE_NAME = 'southparkstudios.com'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
|
||||||
|
|
||||||
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
|
||||||
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
|
||||||
u'title': u'Bat Daded',
|
'ext': 'mp4',
|
||||||
u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
'title': 'Bat Daded',
|
||||||
|
'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class SouthparkDeIE(SouthParkStudiosIE):
|
class SouthparkDeIE(SouthParkStudiosIE):
|
||||||
IE_NAME = u'southpark.de'
|
IE_NAME = 'southpark.de'
|
||||||
_VALID_URL = r'(https?://)?(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
_VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
|
||||||
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured',
|
||||||
u'file': u'85487c96-b3b9-4e39-9127-ad88583d9bf2.mp4',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2',
|
||||||
u'title': u'The Government Won\'t Respect My Privacy',
|
'ext': 'mp4',
|
||||||
u'description': u'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
'title': 'The Government Won\'t Respect My Privacy',
|
||||||
|
'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -6,20 +8,20 @@ from .common import InfoExtractor
|
|||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
u'file': u'1259285.mp4',
|
'file': '1259285.mp4',
|
||||||
u'md5': u'2c2754212136f35fb4b19767d242f66e',
|
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
|
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u'url': u'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||||
u'file': u'1309159.mp4',
|
'file': '1309159.mp4',
|
||||||
u'md5': u'f2cdf638d7aa47654e251e1aee360af1',
|
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'title': u'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers'
|
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||||
}
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@@ -29,17 +31,17 @@ class SpiegelIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<div class="module-title">(.*?)</div>', webpage, u'title')
|
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
||||||
|
|
||||||
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
|
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
|
||||||
idoc = self._download_xml(
|
idoc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading XML', errnote=u'Failed to download XML')
|
note='Downloading XML', errnote='Failed to download XML')
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
{
|
{
|
||||||
'format_id': n.tag.rpartition('type')[2],
|
'format_id': n.tag.rpartition('type')[2],
|
||||||
'url': u'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||||
'width': int(n.find('./width').text),
|
'width': int(n.find('./width').text),
|
||||||
'height': int(n.find('./height').text),
|
'height': int(n.find('./height').text),
|
||||||
'abr': int(n.find('./audiobitrate').text),
|
'abr': int(n.find('./audiobitrate').text),
|
||||||
@@ -55,10 +57,9 @@ class SpiegelIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
return info
|
|
||||||
|
@@ -62,24 +62,30 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
|||||||
subtitles[sub_lang] = subtitle
|
subtitles[sub_lang] = subtitle
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
|
def _download_subtitle_url(self, sub_lang, url):
|
||||||
|
return self._download_webpage(url, None, note=False)
|
||||||
|
|
||||||
def _request_subtitle_url(self, sub_lang, url):
|
def _request_subtitle_url(self, sub_lang, url):
|
||||||
""" makes the http request for the subtitle """
|
""" makes the http request for the subtitle """
|
||||||
try:
|
try:
|
||||||
sub = self._download_webpage(url, None, note=False)
|
return self._download_subtitle_url(sub_lang, url)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||||
return
|
return
|
||||||
if not sub:
|
if not sub:
|
||||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||||
return
|
return
|
||||||
return sub
|
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, webpage):
|
def _get_available_subtitles(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
returns {sub_lang: url} or {} if not available
|
returns {sub_lang: url} or {} if not available
|
||||||
Must be redefined by the subclasses
|
Must be redefined by the subclasses
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
# By default, allow implementations to simply pass in the result
|
||||||
|
assert isinstance(webpage, dict), \
|
||||||
|
'_get_available_subtitles not implemented'
|
||||||
|
return webpage
|
||||||
|
|
||||||
def _get_available_automatic_caption(self, video_id, webpage):
|
def _get_available_automatic_caption(self, video_id, webpage):
|
||||||
"""
|
"""
|
||||||
|
50
youtube_dl/extractor/tinypic.py
Normal file
50
youtube_dl/extractor/tinypic.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from youtube_dl.utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class TinyPicIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tinypic'
|
||||||
|
IE_DESC = 'tinypic.com videos'
|
||||||
|
_VALID_URL = r'http://tinypic\.com/player\.php\?v=(?P<id>[^&]+)&s=\d+'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://tinypic.com/player.php?v=6xw7tc%3E&s=5#.UtqZmbRFCM8',
|
||||||
|
'md5': '609b74432465364e72727ebc6203f044',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6xw7tc',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'shadow phenomenon weird',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
||||||
|
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||||
|
if mobj is None:
|
||||||
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
file_id = mobj.group('fileid')
|
||||||
|
server_id = mobj.group('serverid')
|
||||||
|
|
||||||
|
KEYWORDS_SUFFIX = ', Video, images, photos, videos, myspace, ebay, video hosting, photo hosting'
|
||||||
|
keywords = self._html_search_meta('keywords', webpage, 'title')
|
||||||
|
title = keywords[:-len(KEYWORDS_SUFFIX)] if keywords.endswith(KEYWORDS_SUFFIX) else ''
|
||||||
|
|
||||||
|
video_url = 'http://v%s.tinypic.com/%s.flv' % (server_id, file_id)
|
||||||
|
thumbnail = 'http://v%s.tinypic.com/%s_th.jpg' % (server_id, file_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'title': title
|
||||||
|
}
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -6,12 +8,13 @@ from .common import InfoExtractor
|
|||||||
class TrailerAddictIE(InfoExtractor):
|
class TrailerAddictIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
_VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
|
||||||
u'file': u'76184.mp4',
|
'md5': '41365557f3c8c397d091da510e73ceb4',
|
||||||
u'md5': u'57e39dbcf4142ceb8e1f242ff423fd71',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '76184',
|
||||||
u"title": u"Prince Avalanche Trailer",
|
'ext': 'mp4',
|
||||||
u"description": u"Trailer for Prince Avalanche.Two highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind."
|
'title': 'Prince Avalanche Trailer',
|
||||||
|
'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,9 +25,15 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._search_regex(r'<title>(.+?)</title>',
|
title = self._search_regex(r'<title>(.+?)</title>',
|
||||||
webpage, 'video title').replace(' - Trailer Addict','')
|
webpage, 'video title').replace(' - Trailer Addict','')
|
||||||
view_count = self._search_regex(r'Views: (.+?)<br />',
|
view_count_str = self._search_regex(
|
||||||
webpage, 'Views Count')
|
r'<span class="views_n">([0-9,.]+)</span>',
|
||||||
video_id = self._og_search_property('video', webpage, 'Video id').split('=')[1]
|
webpage, 'view count', fatal=False)
|
||||||
|
view_count = (
|
||||||
|
None if view_count_str is None
|
||||||
|
else int(view_count_str.replace(',', '')))
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'<param\s+name="movie"\s+value="/emb/([0-9]+)"\s*/>',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
# Presence of (no)watchplus function indicates HD quality is available
|
# Presence of (no)watchplus function indicates HD quality is available
|
||||||
if re.search(r'function (no)?watchplus()', webpage):
|
if re.search(r'function (no)?watchplus()', webpage):
|
||||||
@@ -39,14 +48,16 @@ class TrailerAddictIE(InfoExtractor):
|
|||||||
info_webpage, 'Download url').replace('%3F','?')
|
info_webpage, 'Download url').replace('%3F','?')
|
||||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||||
info_webpage, 'thumbnail url')
|
info_webpage, 'thumbnail url')
|
||||||
ext = final_url.split('.')[-1].split('?')[0]
|
|
||||||
|
|
||||||
return [{
|
description = self._html_search_regex(
|
||||||
'id' : video_id,
|
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||||
'url' : final_url,
|
webpage, 'description', fatal=False)
|
||||||
'ext' : ext,
|
|
||||||
'title' : title,
|
return {
|
||||||
'thumbnail' : thumbnail_url,
|
'id': video_id,
|
||||||
'description' : self._og_search_description(webpage),
|
'url': final_url,
|
||||||
'view_count' : view_count,
|
'title': title,
|
||||||
}]
|
'thumbnail': thumbnail_url,
|
||||||
|
'description': description,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -10,48 +12,48 @@ from ..utils import (
|
|||||||
|
|
||||||
class UstreamIE(InfoExtractor):
|
class UstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
|
||||||
IE_NAME = u'ustream'
|
IE_NAME = 'ustream'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.ustream.tv/recorded/20274954',
|
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||||
u'file': u'20274954.flv',
|
'file': '20274954.flv',
|
||||||
u'md5': u'088f151799e8f572f84eb62f17d73e5c',
|
'md5': '088f151799e8f572f84eb62f17d73e5c',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"uploader": u"Young Americans for Liberty",
|
"uploader": "Young Americans for Liberty",
|
||||||
u"title": u"Young Americans for Liberty February 7, 2012 2:28 AM"
|
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('videoID')
|
video_id = m.group('videoID')
|
||||||
|
|
||||||
video_url = u'http://tcdn.ustream.tv/video/%s' % video_id
|
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||||
webpage, u'title')
|
webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||||
webpage, u'uploader', fatal=False, flags=re.DOTALL)
|
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||||
webpage, u'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': video_title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': video_title,
|
|
||||||
'uploader': uploader,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
return info
|
|
||||||
|
|
||||||
class UstreamChannelIE(InfoExtractor):
|
class UstreamChannelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
|
||||||
IE_NAME = u'ustream:channel'
|
IE_NAME = 'ustream:channel'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
m = re.match(self._VALID_URL, url)
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
@@ -22,16 +23,16 @@ class VevoIE(InfoExtractor):
|
|||||||
vevo:)
|
vevo:)
|
||||||
(?P<id>[^&?#]+)'''
|
(?P<id>[^&?#]+)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
u'file': u'GB1101300280.mp4',
|
'file': 'GB1101300280.mp4',
|
||||||
u"md5": u"06bea460acb744eab74a9d7dcb4bfd61",
|
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u"upload_date": u"20130624",
|
"upload_date": "20130624",
|
||||||
u"uploader": u"Hurts",
|
"uploader": "Hurts",
|
||||||
u"title": u"Somebody to Die For",
|
"title": "Somebody to Die For",
|
||||||
u"duration": 230.12,
|
"duration": 230.12,
|
||||||
u"width": 1920,
|
"width": 1920,
|
||||||
u"height": 1080,
|
"height": 1080,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||||
@@ -44,7 +45,7 @@ class VevoIE(InfoExtractor):
|
|||||||
if version['version'] > last_version['version']:
|
if version['version'] > last_version['version']:
|
||||||
last_version = version
|
last_version = version
|
||||||
if last_version['version'] == -1:
|
if last_version['version'] == -1:
|
||||||
raise ExtractorError(u'Unable to extract last version of the video')
|
raise ExtractorError('Unable to extract last version of the video')
|
||||||
|
|
||||||
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
renditions = xml.etree.ElementTree.fromstring(last_version['data'])
|
||||||
formats = []
|
formats = []
|
||||||
@@ -85,7 +86,7 @@ class VevoIE(InfoExtractor):
|
|||||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': u'SMIL_' + m.group('cbr'),
|
'format_id': 'SMIL_' + m.group('cbr'),
|
||||||
'vcodec': m.group('vcodec'),
|
'vcodec': m.group('vcodec'),
|
||||||
'acodec': m.group('acodec'),
|
'acodec': m.group('acodec'),
|
||||||
'vbr': int(m.group('vbr')),
|
'vbr': int(m.group('vbr')),
|
||||||
@@ -101,26 +102,25 @@ class VevoIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||||
info_json = self._download_webpage(json_url, video_id, u'Downloading json info')
|
video_info = self._download_json(json_url, video_id)['video']
|
||||||
video_info = json.loads(info_json)['video']
|
|
||||||
|
|
||||||
formats = self._formats_from_json(video_info)
|
formats = self._formats_from_json(video_info)
|
||||||
try:
|
try:
|
||||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
smil_xml = self._download_webpage(smil_url, video_id,
|
smil_xml = self._download_webpage(smil_url, video_id,
|
||||||
u'Downloading SMIL info')
|
'Downloading SMIL info')
|
||||||
formats.extend(self._formats_from_smil(smil_xml))
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if not isinstance(ee.cause, compat_HTTPError):
|
if not isinstance(ee.cause, compat_HTTPError):
|
||||||
raise
|
raise
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
u'Cannot download SMIL information, falling back to JSON ..')
|
'Cannot download SMIL information, falling back to JSON ..')
|
||||||
|
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int(self._search_regex(
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))
|
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||||
info = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info['title'],
|
'title': video_info['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@@ -129,5 +129,3 @@ class VevoIE(InfoExtractor):
|
|||||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||||
'duration': video_info['duration'],
|
'duration': video_info['duration'],
|
||||||
}
|
}
|
||||||
|
|
||||||
return info
|
|
||||||
|
80
youtube_dl/extractor/vube.py
Normal file
80
youtube_dl/extractor/vube.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class VubeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vube'
|
||||||
|
IE_DESC = 'Vube.com'
|
||||||
|
_VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||||
|
'md5': 'f81dcf6d0448e3291f54380181695821',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'YL2qNPkqon',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||||
|
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||||
|
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||||
|
'uploader': 'Chiara.Grispo',
|
||||||
|
'uploader_id': '1u3hX0znhP',
|
||||||
|
'upload_date': '20140103',
|
||||||
|
'duration': 170.56
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
video = self._download_json('http://vube.com/api/v2/video/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
|
public_id = video['public_id']
|
||||||
|
|
||||||
|
formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||||
|
'height': int(fmt['height']),
|
||||||
|
'abr': int(fmt['audio_bitrate']),
|
||||||
|
'vbr': int(fmt['video_bitrate']),
|
||||||
|
'format_id': fmt['media_resolution_id']
|
||||||
|
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed']
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
description = video.get('description')
|
||||||
|
thumbnail = video['thumbnail_src']
|
||||||
|
if thumbnail.startswith('//'):
|
||||||
|
thumbnail = 'http:' + thumbnail
|
||||||
|
uploader = video['user_alias']
|
||||||
|
uploader_id = video['user_url_id']
|
||||||
|
upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d')
|
||||||
|
duration = video['duration']
|
||||||
|
view_count = video['raw_view_count']
|
||||||
|
like_count = video['total_likes']
|
||||||
|
dislike_count= video['total_hates']
|
||||||
|
|
||||||
|
comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id,
|
||||||
|
video_id, 'Downloading video comment JSON')
|
||||||
|
|
||||||
|
comment_count = comment['total']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@@ -5,7 +5,6 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
unescapeHTML,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.01.30'
|
__version__ = '2014.02.03'
|
||||||
|
Reference in New Issue
Block a user