Compare commits
65 Commits
2016.07.13
...
2016.07.24
Author | SHA1 | Date | |
---|---|---|---|
8fdc538b46 | |||
9513c1eb17 | |||
ae6fff4e64 | |||
5a65668e25 | |||
f75e6890db | |||
d9cb92c840 | |||
94c04a3c79 | |||
f094834857 | |||
111de00289 | |||
b4a131e1a5 | |||
f1991ce928 | |||
6548030a17 | |||
3a8947650b | |||
1979969f91 | |||
0673741af3 | |||
c8e170b209 | |||
bbe1f3634a | |||
4671dd41b2 | |||
f164b97123 | |||
5275efe30d | |||
b13647cf3c | |||
add7d2a0e2 | |||
e298d3a08c | |||
fd8c8c7dcd | |||
9158af16cc | |||
c6668e4ad1 | |||
84e8cca48b | |||
790b06b7d4 | |||
740d7c49c2 | |||
4e51ec5f57 | |||
05087d1b4c | |||
a66a73ee90 | |||
8188b923db | |||
d993a1354d | |||
e8882e7043 | |||
1056821799 | |||
890e6d3309 | |||
246080d378 | |||
b1ea680270 | |||
45550d1039 | |||
7cdfc4c90f | |||
af21f56f98 | |||
1a8f0773b6 | |||
59cc5bd8bf | |||
49bc16b95e | |||
a2f9ca1e67 | |||
371ddb14fe | |||
998895dffa | |||
aadd3ce21f | |||
ae7b846203 | |||
21ba7d0981 | |||
691fbe7f98 | |||
2e221ca3a8 | |||
317f7ab634 | |||
23495d6a39 | |||
224db034ab | |||
ad27649be3 | |||
84571be645 | |||
7b0d333a7e | |||
342f0c3682 | |||
38e0f16a94 | |||
e910fe2fe4 | |||
233b58dec7 | |||
c39b2ed990 | |||
35ec86689c |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.13**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.24**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.07.13
|
||||
[debug] youtube-dl version 2016.07.24
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
1
AUTHORS
1
AUTHORS
@ -178,3 +178,4 @@ Artur Krysiak
|
||||
Jakub Adam Wieczorek
|
||||
Aleksandar Topuzović
|
||||
Nehal Patel
|
||||
Rob van Bekkum
|
||||
|
@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
|
@ -14,6 +14,7 @@
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
- **Abc7News**
|
||||
- **abcnews**
|
||||
@ -45,6 +46,7 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@ -141,6 +143,7 @@
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
@ -335,6 +338,8 @@
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
@ -448,6 +453,7 @@
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **Noco**
|
||||
@ -475,6 +481,7 @@
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
@ -567,6 +574,7 @@
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
@ -643,6 +651,7 @@
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
|
@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], f1['format_id'])
|
||||
|
||||
def test_audio_only_extractor_format_selection(self):
|
||||
# For extractors with incomplete formats (all formats are audio-only or
|
||||
# video-only) best and worst should fallback to corresponding best/worst
|
||||
# video-only or audio-only formats (as per
|
||||
# https://github.com/rg3/youtube-dl/pull/5556)
|
||||
formats = [
|
||||
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
|
||||
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'best'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'high')
|
||||
|
||||
ydl = YDL({'format': 'worst'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'low')
|
||||
|
||||
def test_format_not_available(self):
|
||||
formats = [
|
||||
{'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL},
|
||||
{'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
# This must fail since complete video-audio format does not match filter
|
||||
# and extractor does not provide incomplete only formats (i.e. only
|
||||
# video-only or audio-only).
|
||||
ydl = YDL({'format': 'best[height>360]'})
|
||||
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
|
||||
|
||||
def test_invalid_format_specs(self):
|
||||
def assert_syntax_error(format_spec):
|
||||
ydl = YDL({'format': format_spec})
|
||||
|
@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
@ -1051,9 +1052,9 @@ class YoutubeDL(object):
|
||||
if isinstance(selector, list):
|
||||
fs = [_build_selector_function(s) for s in selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
for format in f(formats):
|
||||
for format in f(ctx):
|
||||
yield format
|
||||
return selector_function
|
||||
elif selector.type == GROUP:
|
||||
@ -1061,17 +1062,17 @@ class YoutubeDL(object):
|
||||
elif selector.type == PICKFIRST:
|
||||
fs = [_build_selector_function(s) for s in selector.selector]
|
||||
|
||||
def selector_function(formats):
|
||||
def selector_function(ctx):
|
||||
for f in fs:
|
||||
picked_formats = list(f(formats))
|
||||
picked_formats = list(f(ctx))
|
||||
if picked_formats:
|
||||
return picked_formats
|
||||
return []
|
||||
elif selector.type == SINGLE:
|
||||
format_spec = selector.selector
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
def selector_function(ctx):
|
||||
formats = list(ctx['formats'])
|
||||
if not formats:
|
||||
return
|
||||
if format_spec == 'all':
|
||||
@ -1084,9 +1085,10 @@ class YoutubeDL(object):
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
yield audiovideo_formats[format_idx]
|
||||
# for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
|
||||
elif (all(f.get('acodec') != 'none' for f in formats) or
|
||||
all(f.get('vcodec') != 'none' for f in formats)):
|
||||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) we will fallback to best/worst
|
||||
# {video,audio}-only format
|
||||
elif ctx['incomplete_formats']:
|
||||
yield formats[format_idx]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
@ -1160,17 +1162,18 @@ class YoutubeDL(object):
|
||||
}
|
||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
||||
|
||||
def selector_function(formats):
|
||||
formats = list(formats)
|
||||
for pair in itertools.product(video_selector(formats), audio_selector(formats)):
|
||||
def selector_function(ctx):
|
||||
for pair in itertools.product(
|
||||
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
||||
yield _merge(pair)
|
||||
|
||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||
|
||||
def final_selector(formats):
|
||||
def final_selector(ctx):
|
||||
ctx_copy = copy.deepcopy(ctx)
|
||||
for _filter in filters:
|
||||
formats = list(filter(_filter, formats))
|
||||
return selector_function(formats)
|
||||
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||
return selector_function(ctx_copy)
|
||||
return final_selector
|
||||
|
||||
stream = io.BytesIO(format_spec.encode('utf-8'))
|
||||
@ -1377,7 +1380,34 @@ class YoutubeDL(object):
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
formats_to_download = list(format_selector(formats))
|
||||
|
||||
# While in format selection we may need to have an access to the original
|
||||
# format set in order to calculate some metrics or do some processing.
|
||||
# For now we need to be able to guess whether original formats provided
|
||||
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||
# video-only or audio-only formats) for proper formats selection for
|
||||
# extractors with such incomplete formats (see
|
||||
# https://github.com/rg3/youtube-dl/pull/5556).
|
||||
# Since formats may be filtered during format selection and may not match
|
||||
# the original formats the results may be incorrect. Thus original formats
|
||||
# or pre-calculated metrics should be passed to format selection routines
|
||||
# as well.
|
||||
# We will pass a context object containing all necessary additional data
|
||||
# instead of just formats.
|
||||
# This fixes incorrect format selection issue (see
|
||||
# https://github.com/rg3/youtube-dl/issues/10083).
|
||||
incomplete_formats = (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
|
||||
# all formats are audio-only
|
||||
all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
|
||||
|
||||
ctx = {
|
||||
'formats': formats,
|
||||
'incomplete_formats': incomplete_formats,
|
||||
}
|
||||
|
||||
formats_to_download = list(format_selector(ctx))
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
|
@ -20,7 +20,7 @@ from ..compat import compat_etree_fromstring
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ardmediathek.de/tv/Dokumentation-und-Reportage/Ich-liebe-das-Leben-trotzdem/rbb-Fernsehen/Video?documentId=29582122&bcastId=3822114',
|
||||
@ -62,6 +62,17 @@ class ARDMediathekIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
|
||||
'md5': '4e8f00631aac0395fee17368ac0e9867',
|
||||
'info_dict': {
|
||||
'id': '30796318',
|
||||
'ext': 'mp3',
|
||||
'title': 'Vor dem Fest',
|
||||
'description': 'md5:c0c1c8048514deaed2a73b3a60eecacb',
|
||||
'duration': 3287,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
|
115
youtube_dl/extractor/arkena.py
Normal file
115
youtube_dl/extractor/arkena.py
Normal file
@ -0,0 +1,115 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
# See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
account_id = mobj.group('account_id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -44,8 +44,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||
# Unified Streaming Platform
|
||||
_USP_RE = r'/([^/]+)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
_NAMESPACES = (
|
||||
_MEDIASELECTION_NS,
|
||||
@ -57,11 +55,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
@ -93,7 +92,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this episode is not currently available',
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
@ -108,7 +107,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this episode is not currently available',
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
|
||||
'info_dict': {
|
||||
@ -128,12 +127,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'p022h44j',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
|
||||
'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
|
||||
'duration': 227,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
@ -141,12 +141,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'note': 'Video',
|
||||
'info_dict': {
|
||||
'id': 'p025c103',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
|
||||
'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
|
||||
'duration': 226,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
@ -162,7 +163,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this episode is not currently available',
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
|
||||
'info_dict': {
|
||||
@ -176,7 +177,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this episode is not currently available',
|
||||
'skip': 'geolocation',
|
||||
}, {
|
||||
# iptv-all mediaset fails with geolocation however there is no geo restriction
|
||||
# for this programme at all
|
||||
@ -191,17 +192,18 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this episode is not currently available on BBC iPlayer Radio',
|
||||
'skip': 'Now it\'s really geo-restricted',
|
||||
}, {
|
||||
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
||||
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||
'info_dict': {
|
||||
'id': 'p028bfkj',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@ -246,15 +248,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
elif transfer_format == 'dash':
|
||||
pass
|
||||
elif transfer_format == 'hls':
|
||||
is_unified_streaming = re.search(self._USP_RE, href)
|
||||
if is_unified_streaming:
|
||||
href = re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href)
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=supplier, fatal=False)
|
||||
if is_unified_streaming:
|
||||
self._check_formats(m3u8_formats, programme_id)
|
||||
formats.extend(m3u8_formats)
|
||||
m3u8_id=supplier, fatal=False))
|
||||
# Direct link
|
||||
else:
|
||||
formats.append({
|
||||
@ -309,14 +305,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
for connection in self._extract_connections(media):
|
||||
conn_formats = self._extract_connection(connection, programme_id)
|
||||
for format in conn_formats:
|
||||
if format.get('protocol') != 'm3u8_native':
|
||||
format.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
format.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
'vcodec': vcodec,
|
||||
'filesize': file_size,
|
||||
})
|
||||
if service:
|
||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
||||
formats.extend(conn_formats)
|
||||
@ -594,7 +589,8 @@ class BBCIE(BBCCoUkIE):
|
||||
'info_dict': {
|
||||
'id': '150615_telabyad_kentin_cogu',
|
||||
'ext': 'mp4',
|
||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||
'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
|
||||
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||
'timestamp': 1434397334,
|
||||
'upload_date': '20150615',
|
||||
},
|
||||
@ -608,6 +604,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
},
|
||||
@ -823,8 +820,20 @@ class BBCIE(BBCCoUkIE):
|
||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||
if playlist:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
||||
for key in ('progressiveDownload', 'streaming'):
|
||||
playlist_url = playlist.get('%sUrl' % key)
|
||||
if not playlist_url:
|
||||
continue
|
||||
try:
|
||||
entries.append(self._extract_from_playlist_sxml(
|
||||
playlist_url, playlist_id, timestamp))
|
||||
except Exception as e:
|
||||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
continue
|
||||
raise
|
||||
|
||||
if entries:
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
@ -1003,10 +1012,10 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||
'info_dict': {
|
||||
'id': 'b05rcz9v',
|
||||
@ -1014,7 +1023,17 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
'description': 'French thriller serial about a missing teenager.',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
||||
}, {
|
||||
# Available for over a year unlike 30 days for most other programmes
|
||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||
'info_dict': {
|
||||
'id': 'p02tcc32',
|
||||
'title': 'Bohemian Icons',
|
||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
|
||||
def _extract_title_and_description(self, webpage):
|
||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
|
@ -27,6 +27,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
@ -545,14 +546,16 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
container = source.get('container')
|
||||
source_type = source.get('type')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if source_type == 'application/x-mpegURL' or container == 'M2TS':
|
||||
if ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'application/dash+xml':
|
||||
elif ext == 'mpd':
|
||||
if not src:
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
|
||||
@ -568,7 +571,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'tbr': tbr,
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'container': container,
|
||||
'ext': container.lower(),
|
||||
'ext': ext or container.lower(),
|
||||
}
|
||||
if width == 0 and height == 0:
|
||||
f.update({
|
||||
|
@ -25,6 +25,7 @@ class CBCIE(InfoExtractor):
|
||||
'upload_date': '20160203',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# with clipId
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
@ -64,6 +65,7 @@ class CBCIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}],
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@ -104,6 +106,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
|
@ -26,6 +26,7 @@ class CBSNewsIE(CBSBaseIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscribers only',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
@ -69,7 +70,7 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
@ -77,7 +78,15 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
}
|
||||
'skip': 'Video gone, redirected to http://www.cbsnews.com/live/',
|
||||
}, {
|
||||
'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/',
|
||||
'info_dict': {
|
||||
'id': 'video-shows-intense-paragliding-accident',
|
||||
'ext': 'flv',
|
||||
'title': 'Video Shows Intense Paragliding Accident',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -23,7 +23,7 @@ class CliphunterIE(InfoExtractor):
|
||||
(?P<id>[0-9]+)/
|
||||
(?P<seo>.+?)(?:$|[#\?])
|
||||
'''
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||
'info_dict': {
|
||||
@ -32,8 +32,19 @@ class CliphunterIE(InfoExtractor):
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
|
||||
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
|
||||
'info_dict': {
|
||||
'id': '2019449',
|
||||
'ext': 'mp4',
|
||||
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -6,7 +6,6 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
@ -17,37 +16,26 @@ from ..utils import (
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
_IE_DESC = 'cloudy.ec and videoraj.ch'
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/
|
||||
https?://(?:www\.)?cloudy\.ec/
|
||||
(?:v/|embed\.php\?id=)
|
||||
(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
||||
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
|
||||
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
|
||||
_MAX_TRIES = 2
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'flv',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.videoraj.to/v/47f399fd8bb60',
|
||||
'md5': '7d0f8799d91efd4eda26587421c3c3b0',
|
||||
'info_dict': {
|
||||
'id': '47f399fd8bb60',
|
||||
'ext': 'flv',
|
||||
'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
|
||||
}
|
||||
_TEST = {
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
'md5': '5cb253ace826a42f35b4740539bedf07',
|
||||
'info_dict': {
|
||||
'id': 'af511e2527aac',
|
||||
'ext': 'flv',
|
||||
'title': 'Funny Cats and Animals Compilation june 2013',
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
||||
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
|
||||
|
||||
if try_num > self._MAX_TRIES - 1:
|
||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||
@ -64,9 +52,8 @@ class CloudyIE(InfoExtractor):
|
||||
'errorUrl': error_url,
|
||||
})
|
||||
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form))
|
||||
player_data = self._download_webpage(
|
||||
data_url, video_id, 'Downloading player data')
|
||||
self._API_URL, video_id, 'Downloading player data', query=form)
|
||||
data = compat_parse_qs(player_data)
|
||||
|
||||
try_num += 1
|
||||
@ -88,7 +75,7 @@ class CloudyIE(InfoExtractor):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
||||
return self._extract_video(video_id, file_key, video_url, try_num)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -98,14 +85,13 @@ class CloudyIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
url = self._EMBED_URL % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
|
||||
webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
return self._extract_video(video_id, file_key)
|
||||
|
@ -273,3 +273,36 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
'title': show_name + ' ' + title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mrss_url = self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
@ -1481,6 +1481,13 @@ class InfoExtractor(object):
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
|
||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||
"""
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
@ -1513,8 +1520,16 @@ class InfoExtractor(object):
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
@ -1551,7 +1566,7 @@ class InfoExtractor(object):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
# According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
|
||||
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||
mime_type = representation_attrib['mimeType']
|
||||
content_type = mime_type.split('/')[0]
|
||||
if content_type == 'text':
|
||||
@ -1595,16 +1610,40 @@ class InfoExtractor(object):
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth')}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template:
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
representation_ms_info['segment_urls'] = []
|
||||
segment_time = 0
|
||||
|
||||
def add_segment_url():
|
||||
representation_ms_info['segment_urls'].append(
|
||||
media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
)
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
add_segment_url()
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += s['d']
|
||||
add_segment_url()
|
||||
segment_time += s['d']
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
|
@ -51,8 +51,11 @@ class CSpanIE(InfoExtractor):
|
||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
}]
|
||||
|
||||
|
@ -5,19 +5,20 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DailyMailIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html',
|
||||
'md5': '2f639d446394f53f3a33658b518b6615',
|
||||
'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html',
|
||||
'md5': 'f6129624562251f628296c3a9ffde124',
|
||||
'info_dict': {
|
||||
'id': '1288527',
|
||||
'id': '1295863',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turn any video into an impressionist masterpiece',
|
||||
'description': 'md5:88ddbcb504367987b2708bb38677c9d2',
|
||||
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
|
||||
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,7 +27,7 @@ class DailyMailIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r"data-opts='({.+?})'", webpage, 'video data'), video_id)
|
||||
title = video_data['title']
|
||||
title = unescapeHTML(video_data['title'])
|
||||
video_sources = self._download_json(video_data.get(
|
||||
'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id)
|
||||
|
||||
@ -55,7 +56,7 @@ class DailyMailIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('descr'),
|
||||
'description': unescapeHTML(video_data.get('descr')),
|
||||
'thumbnail': video_data.get('poster') or video_data.get('thumbnail'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -62,11 +62,9 @@ class DCNBaseIE(InfoExtractor):
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# format_url_base + '/manifest.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url_base + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
|
@ -17,8 +17,12 @@ class DreiSatIE(ZDFIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': '3sat',
|
||||
'uploader': 'SCHWEIZWEIT',
|
||||
'uploader_id': '100000210',
|
||||
'upload_date': '20140913'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -4,19 +4,23 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
'id': '95008',
|
||||
'id': 'qlDUmNsj6VS',
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
@ -28,34 +32,72 @@ class EpornerIE(InfoExtractor):
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?) - EPORNER', webpage, 'title')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
redirect_url = 'http://www.eporner.com/config5/%s' % video_id
|
||||
player_code = self._download_webpage(
|
||||
redirect_url, display_id, note='Downloading player config')
|
||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
||||
|
||||
sources = self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||
|
||||
# Reverse engineered from vjs.js
|
||||
def calc_hash(s):
|
||||
return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.eporner.com/xhr/video/%s' % video_id,
|
||||
display_id, note='Downloading video JSON',
|
||||
query={
|
||||
'hash': calc_hash(hash),
|
||||
'device': 'generic',
|
||||
'domain': 'www.eporner.com',
|
||||
'fallback': 'false',
|
||||
})
|
||||
|
||||
if video.get('available') is False:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, video['message']), expected=True)
|
||||
|
||||
sources = video['sources']
|
||||
|
||||
formats = []
|
||||
for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(\d+)', format_id)
|
||||
if m:
|
||||
fmt['height'] = int(m.group(1))
|
||||
formats.append(fmt)
|
||||
for kind, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=kind, fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', format_id, 'height', default=None))
|
||||
fps = int_or_none(self._search_regex(
|
||||
r'(\d+)fps', format_id, 'fps', default=None))
|
||||
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
|
@ -44,6 +44,7 @@ from .appletrailers import (
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
@ -156,7 +157,11 @@ from .cnn import (
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralIE,
|
||||
ComedyCentralShowsIE,
|
||||
ComedyCentralTVIE,
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
@ -393,6 +398,10 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
LcpIE,
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
@ -537,6 +546,8 @@ from .nick import (
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@ -581,6 +592,7 @@ from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .onet import (
|
||||
@ -689,7 +701,7 @@ from .rtlnl import RtlNlIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
@ -781,6 +793,7 @@ from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
|
@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
(?:\w+\.)?facebook\.com/
|
||||
(?:[\w-]+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
@ -127,6 +127,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@ -28,10 +28,13 @@ class GameSpotIE(OnceIE):
|
||||
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
||||
'info_dict': {
|
||||
'id': 'gs-2300-6424837',
|
||||
'ext': 'flv',
|
||||
'title': 'The Witcher 3: Wild Hunt [Xbox ONE] - Now Playing',
|
||||
'ext': 'mp4',
|
||||
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
||||
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -62,6 +62,7 @@ from .videomore import VideomoreIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .arkena import ArkenaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
@ -1249,6 +1250,20 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'www.hudl.com',
|
||||
},
|
||||
},
|
||||
# twitter:player:stream embed
|
||||
{
|
||||
'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
|
||||
'info_dict': {
|
||||
'id': 'master',
|
||||
'ext': 'mp4',
|
||||
'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
|
||||
'uploader': 'www.rtl.be',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# twitter:player embed
|
||||
{
|
||||
'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
|
||||
@ -1328,6 +1343,23 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
},
|
||||
{
|
||||
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [ArkenaIE.ie_key()],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -2132,6 +2164,11 @@ class GenericIE(InfoExtractor):
|
||||
if digiteka_url:
|
||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||
|
||||
# Look for Arkena embeds
|
||||
arkena_url = ArkenaIE._extract_url(webpage)
|
||||
if arkena_url:
|
||||
return self.url_result(arkena_url, ArkenaIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
@ -2184,11 +2221,6 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
return self.url_result(embed_url)
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default=None, expected_type='VideoObject')
|
||||
@ -2245,6 +2277,9 @@ class GenericIE(InfoExtractor):
|
||||
r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
# twitter:player:stream should be checked before twitter:player since
|
||||
# it is expected to contain a raw stream (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
found = filter_video(re.findall(
|
||||
r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
|
||||
if not found:
|
||||
@ -2278,6 +2313,15 @@ class GenericIE(InfoExtractor):
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
|
||||
if not found:
|
||||
# twitter:player is a https URL to iframe player that may or may not
|
||||
# be supported by youtube-dl thus this is checked the very last (see
|
||||
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
|
||||
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
|
||||
if embed_url:
|
||||
return self.url_result(embed_url)
|
||||
|
||||
if not found:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
|
90
youtube_dl/extractor/lcp.py
Normal file
90
youtube_dl/extractor/lcp.py
Normal file
@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .arkena import ArkenaIE
|
||||
|
||||
|
||||
class LcpPlayIE(ArkenaIE):
|
||||
_VALID_URL = r'https?://play\.lcp\.fr/embed/(?P<id>[^/]+)/(?P<account_id>[^/]+)/[^/]+/[^/]+'
|
||||
_TESTS = [{
|
||||
'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
|
||||
'md5': 'b8bd9298542929c06c1c15788b1f277a',
|
||||
'info_dict': {
|
||||
'id': '327336',
|
||||
'ext': 'mp4',
|
||||
'title': '327336',
|
||||
'timestamp': 1456391602,
|
||||
'upload_date': '20160225',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class LcpIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# arkena embed
|
||||
'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
|
||||
'md5': 'b8bd9298542929c06c1c15788b1f277a',
|
||||
'info_dict': {
|
||||
'id': 'd56d03e9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
|
||||
'description': 'md5:96ad55009548da9dea19f4120c6c16a8',
|
||||
'timestamp': 1456488895,
|
||||
'upload_date': '20160226',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# dailymotion live stream
|
||||
'url': 'http://www.lcp.fr/le-direct',
|
||||
'info_dict': {
|
||||
'id': 'xji3qy',
|
||||
'ext': 'mp4',
|
||||
'title': 'La Chaine Parlementaire (LCP), Live TNT',
|
||||
'description': 'md5:5c69593f2de0f38bd9a949f2c95e870b',
|
||||
'uploader': 'LCP',
|
||||
'uploader_id': 'xbz33d',
|
||||
'timestamp': 1308923058,
|
||||
'upload_date': '20110624',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 live stream
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.lcp.fr/emissions/277792-les-volontaires',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
play_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL,
|
||||
webpage, 'play iframe', default=None, group='url')
|
||||
|
||||
if not play_url:
|
||||
return self.url_result(url, 'Generic')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, fatal=True)
|
||||
description = self._html_search_meta(
|
||||
('description', 'twitter:description'), webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': LcpPlayIE.ie_key(),
|
||||
'url': play_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
@ -133,7 +134,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
message += item.text
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
description = xpath_text(itemdoc, 'description')
|
||||
description = strip_or_none(xpath_text(itemdoc, 'description'))
|
||||
|
||||
title_el = None
|
||||
if title_el is None:
|
||||
|
72
youtube_dl/extractor/ninenow.py
Normal file
72
youtube_dl/extractor/ninenow.py
Normal file
@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class NineNowIE(InfoExtractor):
|
||||
IE_NAME = '9now.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?9now\.com\.au/(?:[^/]+/){2}(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/clip-ciql02091000g0hp5oktrnytc',
|
||||
'md5': '17cf47d63ec9323e562c9957a968b565',
|
||||
'info_dict': {
|
||||
'id': '16801',
|
||||
'ext': 'mp4',
|
||||
'title': 'St. Kilda\'s Joey Montagna on the potential for a player\'s strike',
|
||||
'description': 'Is a boycott of the NAB Cup "on the table"?',
|
||||
'uploader_id': '4460760524001',
|
||||
'upload_date': '20160713',
|
||||
'timestamp': 1468421266,
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
}, {
|
||||
# episode
|
||||
'url': 'https://www.9now.com.au/afl-footy-show/2016/episode-19',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DRM protected
|
||||
'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.*?});', webpage,
|
||||
'page data'), display_id)
|
||||
common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip')
|
||||
video_data = common_data['video']
|
||||
|
||||
if video_data.get('drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId']
|
||||
video_id = compat_str(video_data.get('id') or brightcove_id)
|
||||
title = common_data['name']
|
||||
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail_id[1:])
|
||||
} for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': common_data.get('description'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
46
youtube_dl/extractor/nintendo.py
Normal file
46
youtube_dl/extractor/nintendo.py
Normal file
@ -0,0 +1,46 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import unescapeHTML
|
||||
|
||||
|
||||
class NintendoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj',
|
||||
'info_dict': {
|
||||
'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW',
|
||||
'ext': 'flv',
|
||||
'title': 'Duck Hunt Wii U VC NES - Trailer',
|
||||
'duration': 60.326,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u',
|
||||
'info_dict': {
|
||||
'id': 'tokyo-mirage-sessions-fe-wii-u',
|
||||
'title': 'Tokyo Mirage Sessions ♯FE',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
entries = [
|
||||
OoyalaIE._build_url_result(m.group('code'))
|
||||
for m in re.finditer(
|
||||
r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P<code>(?:(?!\2).)+)\2',
|
||||
webpage)]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False)))
|
50
youtube_dl/extractor/odatv.py
Normal file
50
youtube_dl/extractor/odatv.py
Normal file
@ -0,0 +1,50 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
NO_DEFAULT,
|
||||
remove_start
|
||||
)
|
||||
|
||||
|
||||
class OdaTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://odatv.com/vid_video.php?id=8E388',
|
||||
'md5': 'dc61d052f205c9bf2da3545691485154',
|
||||
'info_dict': {
|
||||
'id': '8E388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Artık Davutoğlu ile devam edemeyiz'
|
||||
}
|
||||
}, {
|
||||
# mobile URL
|
||||
'url': 'http://odatv.com/mob_video.php?id=8E388',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# no video
|
||||
'url': 'http://odatv.com/mob_video.php?id=8E900',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
no_video = 'NO VIDEO!' in webpage
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'mp4\s*:\s*(["\'])(?P<url>http.+?)\1', webpage, 'video url',
|
||||
default=None if no_video else NO_DEFAULT, group='url')
|
||||
|
||||
if no_video:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': remove_start(self._og_search_title(webpage), 'Video: '),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@ -59,11 +59,8 @@ class OnetBaseIE(InfoExtractor):
|
||||
# TODO: Support Microsoft Smooth Streaming
|
||||
continue
|
||||
elif ext == 'mpd':
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url, video_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@ -40,16 +40,16 @@ class ORFTVthekIE(InfoExtractor):
|
||||
'skip': 'Blocked outside of Austria / Germany',
|
||||
}, {
|
||||
'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
|
||||
'playlist': [{
|
||||
'md5': '68f543909aea49d621dfc7703a11cfaf',
|
||||
'info_dict': {
|
||||
'id': '7982259',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best of Ingrid Thurnher',
|
||||
'upload_date': '20140527',
|
||||
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
|
||||
}
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': '7982259',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best of Ingrid Thurnher',
|
||||
'upload_date': '20140527',
|
||||
'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # rtsp downloads
|
||||
},
|
||||
'_skip': 'Blocked outside of Austria / Germany',
|
||||
}]
|
||||
|
||||
|
@ -111,7 +111,7 @@ class PornHubIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=(["\']).*?\b(?:removed|userMessageSection)\b.*?\1[^>]*>(?P<error>.+?)</div>',
|
||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
webpage, 'error message', default=None, group='error')
|
||||
if error_msg:
|
||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||
|
@ -113,9 +113,7 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
video_url = video_url.replace(
|
||||
'resources/', 'auth/resources/'
|
||||
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||
|
||||
subtitles = None
|
||||
if info.get('sbtFile') is not None:
|
||||
@ -222,3 +220,34 @@ class RTVELiveIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class RTVETelevisionIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:television'
|
||||
_VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
|
||||
'info_dict': {
|
||||
'id': '3069778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Documentos TV - La revolución del móvil',
|
||||
'duration': 3496.948,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
alacarta_url = self._search_regex(
|
||||
r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&',
|
||||
webpage, 'alacarta url', default=None)
|
||||
if alacarta_url is None:
|
||||
raise ExtractorError(
|
||||
'The webpage doesn\'t contain any video', expected=True)
|
||||
|
||||
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
|
||||
|
@ -4,11 +4,8 @@ from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SpikeIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+|
|
||||
m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
|
||||
'''
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spike\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||
'md5': '1a9265f32b0c375793d6c4ce45255256',
|
||||
'info_dict': {
|
||||
@ -17,13 +14,19 @@ class SpikeIE(MTVServicesInfoExtractor):
|
||||
'title': 'Auction Hunters|Can Allen Ride A Hundred Year-Old Motorcycle?',
|
||||
'description': 'md5:fbed7e82ed5fad493615b3094a9499cb',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bellator.spike.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bellator.spike.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobile_id = self._match_id(url)
|
||||
if mobile_id:
|
||||
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||
return super(SpikeIE, self)._real_extract(url)
|
||||
|
98
youtube_dl/extractor/streamable.py
Normal file
98
youtube_dl/extractor/streamable.py
Normal file
@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class StreamableIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://streamable\.com/(?:e/)?(?P<id>\w+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://streamable.com/dnd1',
|
||||
'md5': '3e3bc5ca088b48c2d436529b64397fef',
|
||||
'info_dict': {
|
||||
'id': 'dnd1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'uploader': 'teabaker',
|
||||
'timestamp': 1454964157.35115,
|
||||
'upload_date': '20160208',
|
||||
'duration': 61.516,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
# older video without bitrate, width/height, etc. info
|
||||
{
|
||||
'url': 'https://streamable.com/moo',
|
||||
'md5': '2cf6923639b87fba3279ad0df3a64e73',
|
||||
'info_dict': {
|
||||
'id': 'moo',
|
||||
'ext': 'mp4',
|
||||
'title': '"Please don\'t eat me!"',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'timestamp': 1426115495,
|
||||
'upload_date': '20150311',
|
||||
'duration': 12,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://streamable.com/e/dnd1',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: Using the ajax API, as the public Streamable API doesn't seem
|
||||
# to return video info like the title properly sometimes, and doesn't
|
||||
# include info like the video duration
|
||||
video = self._download_json(
|
||||
'https://streamable.com/ajax/videos/%s' % video_id, video_id)
|
||||
|
||||
# Format IDs:
|
||||
# 0 The video is being uploaded
|
||||
# 1 The video is being processed
|
||||
# 2 The video has at least one file ready
|
||||
# 3 The video is unavailable due to an error
|
||||
status = video.get('status')
|
||||
if status != 2:
|
||||
raise ExtractorError(
|
||||
'This video is currently unavailable. It may still be uploading or processing.',
|
||||
expected=True)
|
||||
|
||||
title = video.get('reddit_title') or video['title']
|
||||
|
||||
formats = []
|
||||
for key, info in video['files'].items():
|
||||
if not info.get('url'):
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': key,
|
||||
'url': self._proto_relative_url(info['url']),
|
||||
'width': int_or_none(info.get('width')),
|
||||
'height': int_or_none(info.get('height')),
|
||||
'filesize': int_or_none(info.get('size')),
|
||||
'fps': int_or_none(info.get('framerate')),
|
||||
'vbr': float_or_none(info.get('bitrate'), 1000)
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': self._proto_relative_url(video.get('thumbnail_url')),
|
||||
'uploader': video.get('owner', {}).get('user_name'),
|
||||
'timestamp': float_or_none(video.get('date_added')),
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'view_count': int_or_none(video.get('plays')),
|
||||
'formats': formats
|
||||
}
|
@ -1,46 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
update_url_query,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class SyfyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))'
|
||||
|
||||
class SyfyIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
|
||||
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
|
||||
'info_dict': {
|
||||
'id': 'NmqMrGnXvmO1',
|
||||
'ext': 'flv',
|
||||
'title': 'George Lucas has Advice for his Daughter',
|
||||
'description': 'Listen to what insights George Lucas give his daughter Amanda.',
|
||||
'id': '2968097',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Internet Ruined My Life: Season 1 Trailer',
|
||||
'description': 'One tweet, one post, one click, can destroy everything.',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'upload_date': '20170113',
|
||||
'timestamp': 1484345640,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.syfy.com/wilwheaton',
|
||||
'md5': '94dfa54ee3ccb63295b276da08c415f6',
|
||||
'info_dict': {
|
||||
'id': '4yoffOOXC767',
|
||||
'ext': 'flv',
|
||||
'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
|
||||
'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Blocked outside the US',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_name = mobj.group('video_name')
|
||||
if video_name:
|
||||
generic_webpage = self._download_webpage(url, video_name)
|
||||
video_id = self._search_regex(
|
||||
r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">',
|
||||
generic_webpage, 'video ID')
|
||||
url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % (
|
||||
video_name, video_name, video_id)
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self.url_result(self._og_search_video_url(webpage))
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
syfy_mpx = list(self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'),
|
||||
display_id)['syfy']['syfy_mpx'].values())[0]
|
||||
video_id = syfy_mpx['mpxGUID']
|
||||
title = syfy_mpx['episodeTitle']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if syfy_mpx.get('entitlement') == 'auth':
|
||||
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'syfy', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(
|
||||
self._proto_relative_url(syfy_mpx['releaseURL']), query),
|
||||
{'force_smil_url': True}),
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
@ -47,11 +47,10 @@ class TelegraafIE(InfoExtractor):
|
||||
ext = determine_ext(manifest_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
continue
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
self.report_warning('Unknown adaptive format %s' % ext)
|
||||
for location in locations.get('progressive', []):
|
||||
|
@ -24,16 +24,20 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'is_live': False,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
|
||||
}, {
|
||||
# live video stream
|
||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||
'info_dict': {
|
||||
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'ext': 'mp4',
|
||||
'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'is_live': False,
|
||||
'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
}, {
|
||||
# live audio stream
|
||||
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
|
||||
@ -114,7 +118,7 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
||||
for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
||||
f = self._parse_json(
|
||||
item_js, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not f:
|
||||
|
@ -89,8 +89,8 @@ class TVPIE(InfoExtractor):
|
||||
r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)',
|
||||
video_url, 'video base url', default=None)
|
||||
if video_url_base:
|
||||
# TODO: Current DASH formats are broken - $Time$ pattern in
|
||||
# <SegmentTemplate> not implemented yet
|
||||
# TODO: <Group> found instead of <AdaptationSet> in MPD manifest.
|
||||
# It's not mentioned in MPEG-DASH standard. Figure that out.
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url_base + '.ism/video.mpd',
|
||||
# video_id, mpd_id='dash', fatal=False))
|
||||
|
@ -130,7 +130,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
||||
'md5': 'feea2b1d7b3957f70886e6dfd8b8be84',
|
||||
'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
|
||||
'info_dict': {
|
||||
'id': '1067139v',
|
||||
'ext': 'mp4',
|
||||
@ -156,15 +156,11 @@ class VikiIE(VikiBaseIE):
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '1f54697dabc8f13f31bf06bb2e4de6db',
|
||||
'md5': '5fa476a902e902783ac7a4d615cdbc7a',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
@ -200,7 +196,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '013dc282714e22acf9447cad14ff1208',
|
||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
@ -281,9 +277,16 @@ class VikiIE(VikiBaseIE):
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
if format_id == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_dict['url'], video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_dict['url'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', preference=-1,
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_dict['url'],
|
||||
|
@ -9,8 +9,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class YouJizzIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||
'md5': '07e15fa469ba384c7693fd246905547c',
|
||||
'info_dict': {
|
||||
@ -19,7 +19,10 @@ class YouJizzIE(InfoExtractor):
|
||||
'title': 'Zeichentrick 1',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.youjizz.com/videos/-2189178.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -53,6 +53,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
|
||||
_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
@ -116,12 +117,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
'hl': 'en_US',
|
||||
}
|
||||
|
||||
login_data = urlencode_postdata(login_form_strs)
|
||||
|
||||
req = sanitized_Request(self._LOGIN_URL, login_data)
|
||||
login_results = self._download_webpage(
|
||||
req, None,
|
||||
note='Logging in', errnote='unable to log in', fatal=False)
|
||||
self._PASSWORD_CHALLENGE_URL, None,
|
||||
note='Logging in', errnote='unable to log in', fatal=False,
|
||||
data=urlencode_postdata(login_form_strs))
|
||||
if login_results is False:
|
||||
return False
|
||||
|
||||
@ -1736,7 +1735,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
|
||||
class YoutubeSharedVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
|
||||
_VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P<id>[0-9A-Za-z_-]{11})'
|
||||
IE_NAME = 'youtube:shared'
|
||||
|
||||
_TEST = {
|
||||
|
@ -363,10 +363,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
input_files = [filename] + sub_filenames
|
||||
|
||||
opts = [
|
||||
'-map', '0:v',
|
||||
'-c:v', 'copy',
|
||||
'-map', '0:a',
|
||||
'-c:a', 'copy',
|
||||
'-map', '0',
|
||||
'-c', 'copy',
|
||||
# Don't copy the existing subtitles, we may be running the
|
||||
# postprocessor a second time
|
||||
'-map', '-0:s',
|
||||
|
@ -2123,6 +2123,8 @@ def mimetype2ext(mt):
|
||||
'dash+xml': 'mpd',
|
||||
'f4m': 'f4m',
|
||||
'f4m+xml': 'f4m',
|
||||
'hds+xml': 'f4m',
|
||||
'vnd.ms-sstr+xml': 'ism',
|
||||
}.get(res, res)
|
||||
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.07.13'
|
||||
__version__ = '2016.07.24'
|
||||
|
Reference in New Issue
Block a user