Compare commits
26 Commits
2015.02.02
...
2015.02.02
Author | SHA1 | Date | |
---|---|---|---|
55898ad2cf | |||
a465808592 | |||
5c4862bad4 | |||
995029a142 | |||
a57b562cff | |||
531572578e | |||
3a4cca687f | |||
7d3d06a16c | |||
c21b1fbeeb | |||
f920ce295e | |||
7a7bd19c45 | |||
8f4b58d70e | |||
3fd45e03bf | |||
869b4aeff4 | |||
cc9ca3ba6e | |||
ea71034bd3 | |||
9fffd0469f | |||
ae7773942e | |||
469a64cebf | |||
aae3fdcfae | |||
6a66904f8e | |||
78271e3319 | |||
92bf0bcdf8 | |||
1283204917 | |||
6789defea9 | |||
e77d2975af |
10
README.md
10
README.md
@ -368,11 +368,11 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--add-metadata write metadata to the video file
|
--add-metadata write metadata to the video file
|
||||||
--xattrs write metadata to the video file's xattrs
|
--xattrs write metadata to the video file's xattrs
|
||||||
(using dublin core and xdg standards)
|
(using dublin core and xdg standards)
|
||||||
--fixup POLICY (experimental) Automatically correct known
|
--fixup POLICY Automatically correct known faults of the
|
||||||
faults of the file. One of never (do
|
file. One of never (do nothing), warn (only
|
||||||
nothing), warn (only emit a warning),
|
emit a warning), detect_or_warn(the
|
||||||
detect_or_warn(check whether we can do
|
default; fix file if we can, warn
|
||||||
anything about it, warn otherwise
|
otherwise)
|
||||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||||
postprocessors (default)
|
postprocessors (default)
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||||
|
@ -238,6 +238,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_duration('5 s'), 5)
|
self.assertEqual(parse_duration('5 s'), 5)
|
||||||
self.assertEqual(parse_duration('3 min'), 180)
|
self.assertEqual(parse_duration('3 min'), 180)
|
||||||
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
||||||
|
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||||
|
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||||
|
|
||||||
def test_fix_xml_ampersands(self):
|
def test_fix_xml_ampersands(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -371,6 +373,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
on = js_to_json('{"abc": true}')
|
on = js_to_json('{"abc": true}')
|
||||||
self.assertEqual(json.loads(on), {'abc': True})
|
self.assertEqual(json.loads(on), {'abc': True})
|
||||||
|
|
||||||
|
# Ignore JavaScript code as well
|
||||||
|
on = js_to_json('''{
|
||||||
|
"x": 1,
|
||||||
|
y: "a",
|
||||||
|
z: some.code
|
||||||
|
}''')
|
||||||
|
d = json.loads(on)
|
||||||
|
self.assertEqual(d['x'], 1)
|
||||||
|
self.assertEqual(d['y'], 'a')
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||||
|
@ -45,6 +45,12 @@ class ExternalFD(FileDownloader):
|
|||||||
def supports(cls, info_dict):
|
def supports(cls, info_dict):
|
||||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||||
|
|
||||||
|
def _source_address(self, command_option):
|
||||||
|
source_address = self.params.get('source_address')
|
||||||
|
if source_address is None:
|
||||||
|
return []
|
||||||
|
return [command_option, source_address]
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
cmd = self._make_cmd(tmpfilename, info_dict)
|
cmd = self._make_cmd(tmpfilename, info_dict)
|
||||||
@ -72,6 +78,7 @@ class CurlFD(ExternalFD):
|
|||||||
cmd = [self.exe, '-o', tmpfilename]
|
cmd = [self.exe, '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._source_address('--interface')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
@ -81,6 +88,7 @@ class WgetFD(ExternalFD):
|
|||||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._source_address('--bind-address')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
@ -96,6 +104,7 @@ class Aria2cFD(ExternalFD):
|
|||||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in info_dict['http_headers'].items():
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._source_address('--interface')
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from socket import error as SocketError
|
||||||
|
import errno
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@ -99,6 +102,11 @@ class HttpFD(FileDownloader):
|
|||||||
resume_len = 0
|
resume_len = 0
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
break
|
break
|
||||||
|
except SocketError as e:
|
||||||
|
if e.errno != errno.ECONNRESET:
|
||||||
|
# Connection reset is no problem, just retry
|
||||||
|
raise
|
||||||
|
|
||||||
# Retry
|
# Retry
|
||||||
count += 1
|
count += 1
|
||||||
if count <= retries:
|
if count <= retries:
|
||||||
|
@ -182,6 +182,7 @@ from .heise import HeiseIE
|
|||||||
from .hellporno import HellPornoIE
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
|
from .historicfilms import HistoricFilmsIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hostingbulk import HostingBulkIE
|
from .hostingbulk import HostingBulkIE
|
||||||
@ -284,6 +285,7 @@ from .ndr import NDRIE
|
|||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .netzkino import NetzkinoIE
|
from .netzkino import NetzkinoIE
|
||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
|
from .nerdist import NerdistIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
@ -316,7 +318,8 @@ from .nrk import (
|
|||||||
NRKIE,
|
NRKIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
)
|
)
|
||||||
from .ntv import NTVIE
|
from .ntvde import NTVDeIE
|
||||||
|
from .ntvru import NTVRuIE
|
||||||
from .nytimes import NYTimesIE
|
from .nytimes import NYTimesIE
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
|
@ -860,10 +860,13 @@ class InfoExtractor(object):
|
|||||||
return formats
|
return formats
|
||||||
|
|
||||||
# TODO: improve extraction
|
# TODO: improve extraction
|
||||||
def _extract_smil_formats(self, smil_url, video_id):
|
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||||
smil = self._download_xml(
|
smil = self._download_xml(
|
||||||
smil_url, video_id, 'Downloading SMIL file',
|
smil_url, video_id, 'Downloading SMIL file',
|
||||||
'Unable to download SMIL file')
|
'Unable to download SMIL file', fatal=fatal)
|
||||||
|
if smil is False:
|
||||||
|
assert not fatal
|
||||||
|
return []
|
||||||
|
|
||||||
base = smil.find('./head/meta').get('base')
|
base = smil.find('./head/meta').get('base')
|
||||||
|
|
||||||
|
@ -25,9 +25,15 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
programcard = self._download_json(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
|
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
|
programcard = self._download_json(
|
||||||
|
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
data = programcard['Data'][0]
|
data = programcard['Data'][0]
|
||||||
|
|
||||||
title = data['Title']
|
title = data['Title']
|
||||||
|
46
youtube_dl/extractor/historicfilms.py
Normal file
46
youtube_dl/extractor/historicfilms.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_duration
|
||||||
|
|
||||||
|
|
||||||
|
class HistoricFilmsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.historicfilms.com/tapes/4728',
|
||||||
|
'md5': 'd4a437aec45d8d796a38a215db064e9a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4728',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Historic Films: GP-7',
|
||||||
|
'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2096,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
tape_id = self._search_regex(
|
||||||
|
r'class="tapeId">([^<]+)<', webpage, 'tape id')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._html_search_meta(
|
||||||
|
'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration'))
|
||||||
|
|
||||||
|
video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
80
youtube_dl/extractor/nerdist.py
Normal file
80
youtube_dl/extractor/nerdist.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
xpath_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NerdistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
|
||||||
|
'md5': '3698ed582931b90d9e81e02e26e89f23',
|
||||||
|
'info_dict': {
|
||||||
|
'display_id': 'exclusive-which-dc-characters-w',
|
||||||
|
'id': 'RPHpvJyr',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
|
||||||
|
'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
|
||||||
|
'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
|
||||||
|
'uploader': 'Eric Diaz',
|
||||||
|
'upload_date': '20150202',
|
||||||
|
'timestamp': 1422892808,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'''(?x)<script\s+(?:type="text/javascript"\s+)?
|
||||||
|
src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
|
||||||
|
webpage, 'video ID')
|
||||||
|
timestamp = parse_iso8601(self._html_search_meta(
|
||||||
|
'shareaholic:article_published_time', webpage, 'upload date'))
|
||||||
|
uploader = self._html_search_meta(
|
||||||
|
'shareaholic:article_author_name', webpage, 'article author')
|
||||||
|
|
||||||
|
doc = self._download_xml(
|
||||||
|
'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
|
||||||
|
video_info = doc.find('.//item')
|
||||||
|
title = xpath_text(video_info, './title', fatal=True)
|
||||||
|
description = xpath_text(video_info, './description')
|
||||||
|
thumbnail = xpath_text(
|
||||||
|
video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
|
||||||
|
vurl = source.attrib['file']
|
||||||
|
ext = determine_ext(vurl)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||||
|
preference=0))
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
vurl, video_id, fatal=False
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': ext,
|
||||||
|
'url': vurl,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': uploader,
|
||||||
|
}
|
@ -46,7 +46,18 @@ class NFLIE(InfoExtractor):
|
|||||||
'timestamp': 1388354455,
|
'timestamp': 1388354455,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0ap3000000467607',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Frustrations flare on the field',
|
||||||
|
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
||||||
|
'timestamp': 1422850320,
|
||||||
|
'upload_date': '20150202',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -80,7 +91,11 @@ class NFLIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||||
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL'))
|
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL',
|
||||||
|
default='static/content/static/config/video/config.json'))
|
||||||
|
# For articles, the id in the url is not the video id
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id)
|
||||||
config = self._download_json(config_url, video_id,
|
config = self._download_json(config_url, video_id,
|
||||||
note='Downloading player config')
|
note='Downloading player config')
|
||||||
url_template = NFLIE.prepend_host(
|
url_template = NFLIE.prepend_host(
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -11,7 +9,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class NormalbootsIE(InfoExtractor):
|
class NormalbootsIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
@ -30,19 +28,22 @@ class NormalbootsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
|
||||||
webpage, 'uploader')
|
|
||||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
|
||||||
webpage, 'date')
|
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
|
||||||
|
|
||||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
video_uploader = self._html_search_regex(
|
||||||
|
r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
|
webpage, 'uploader', fatal=False)
|
||||||
|
video_upload_date = unified_strdate(self._html_search_regex(
|
||||||
|
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
|
webpage, 'date', fatal=False))
|
||||||
|
|
||||||
|
player_url = self._html_search_regex(
|
||||||
|
r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"',
|
||||||
|
webpage, 'player url')
|
||||||
player_page = self._download_webpage(player_url, video_id)
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
video_url = self._html_search_regex(
|
||||||
|
r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
68
youtube_dl/extractor/ntvde.py
Normal file
68
youtube_dl/extractor/ntvde.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NTVDeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'n-tv.de'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
|
||||||
|
'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14438086',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
|
||||||
|
'alt_title': 'Winterchaos auf deutschen Straßen',
|
||||||
|
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
|
||||||
|
'duration': 4020,
|
||||||
|
'timestamp': 1422892797,
|
||||||
|
'upload_date': '20150202',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
info = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
|
||||||
|
vdata = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
|
||||||
|
webpage, 'player data'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
duration = parse_duration(vdata.get('duration'))
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'flash',
|
||||||
|
'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
|
||||||
|
}, {
|
||||||
|
'format_id': 'mobile',
|
||||||
|
'url': 'http://video.n-tv.de' + vdata['videoMp4'],
|
||||||
|
'tbr': 400, # estimation
|
||||||
|
}]
|
||||||
|
m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', preference=0))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['headline'],
|
||||||
|
'description': info.get('intro'),
|
||||||
|
'alt_title': info.get('kicker'),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnail': vdata.get('html5VideoPoster'),
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,15 +1,14 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML
|
unescapeHTML
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NTVIE(InfoExtractor):
|
class NTVRuIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ntv.ru'
|
||||||
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -92,9 +91,7 @@ class NTVIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
@ -9,6 +9,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -192,9 +193,29 @@ class VevoIE(InfoExtractor):
|
|||||||
# Download via HLS API
|
# Download via HLS API
|
||||||
formats.extend(self._download_api_formats(video_id))
|
formats.extend(self._download_api_formats(video_id))
|
||||||
|
|
||||||
|
# Download SMIL
|
||||||
|
smil_blocks = sorted((
|
||||||
|
f for f in video_info['videoVersions']
|
||||||
|
if f['sourceType'] == 13),
|
||||||
|
key=lambda f: f['version'])
|
||||||
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
|
if smil_blocks:
|
||||||
|
smil_url_m = self._search_regex(
|
||||||
|
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
||||||
|
default=None)
|
||||||
|
if smil_url_m is not None:
|
||||||
|
smil_url = smil_url_m
|
||||||
|
if smil_url:
|
||||||
|
smil_xml = self._download_webpage(
|
||||||
|
smil_url, video_id, 'Downloading SMIL info', fatal=False)
|
||||||
|
if smil_xml:
|
||||||
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int_or_none(self._search_regex(
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
r'/Date\((\d+)\)/',
|
||||||
|
video_info['launchDate'], 'launch date', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -698,10 +698,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--fixup',
|
'--fixup',
|
||||||
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
||||||
help='(experimental) Automatically correct known faults of the file. '
|
help='Automatically correct known faults of the file. '
|
||||||
'One of never (do nothing), warn (only emit a warning), '
|
'One of never (do nothing), warn (only emit a warning), '
|
||||||
'detect_or_warn(check whether we can do anything about it, warn '
|
'detect_or_warn(the default; fix file if we can, warn otherwise)')
|
||||||
'otherwise')
|
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--prefer-avconv',
|
'--prefer-avconv',
|
||||||
action='store_false', dest='prefer_ffmpeg',
|
action='store_false', dest='prefer_ffmpeg',
|
||||||
|
@ -1275,7 +1275,10 @@ def parse_duration(s):
|
|||||||
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
||||||
|
|
||||||
(?:
|
(?:
|
||||||
(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
|
(?:
|
||||||
|
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
|
||||||
|
(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
|
||||||
|
)?
|
||||||
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
|
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
|
||||||
)?
|
)?
|
||||||
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
|
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
|
||||||
@ -1293,6 +1296,8 @@ def parse_duration(s):
|
|||||||
res += int(m.group('mins')) * 60
|
res += int(m.group('mins')) * 60
|
||||||
if m.group('hours'):
|
if m.group('hours'):
|
||||||
res += int(m.group('hours')) * 60 * 60
|
res += int(m.group('hours')) * 60 * 60
|
||||||
|
if m.group('days'):
|
||||||
|
res += int(m.group('days')) * 24 * 60 * 60
|
||||||
if m.group('ms'):
|
if m.group('ms'):
|
||||||
res += float(m.group('ms'))
|
res += float(m.group('ms'))
|
||||||
return res
|
return res
|
||||||
@ -1543,7 +1548,7 @@ def js_to_json(code):
|
|||||||
res = re.sub(r'''(?x)
|
res = re.sub(r'''(?x)
|
||||||
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
||||||
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
||||||
[a-zA-Z_][a-zA-Z_0-9]*
|
[a-zA-Z_][.a-zA-Z_0-9]*
|
||||||
''', fix_kv, code)
|
''', fix_kv, code)
|
||||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||||
return res
|
return res
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.02.02'
|
__version__ = '2015.02.02.4'
|
||||||
|
Reference in New Issue
Block a user