Compare commits
82 Commits
2016.02.04
...
2016.02.10
Author | SHA1 | Date | |
---|---|---|---|
f817d9bec1 | |||
e2effb08a4 | |||
7fcea295c5 | |||
cc799437ea | |||
89d23f37f2 | |||
b92071ef00 | |||
47246ae26c | |||
9c15869c28 | |||
51e9094f4a | |||
5e3a6fec33 | |||
d413095f7e | |||
1bedf4de06 | |||
3967a761f4 | |||
b081350bd9 | |||
16f1430ba6 | |||
085ad71157 | |||
35972ba172 | |||
3834d3e35c | |||
8d0a2a2a4e | |||
11c0339bec | |||
915dd77783 | |||
b6bfa6fb79 | |||
f070197bd7 | |||
5a7699bb2e | |||
8628d26f38 | |||
8411229bd5 | |||
72b9ebc65d | |||
3b799ca14c | |||
0474512e30 | |||
f0905c6ec3 | |||
86296ad2cd | |||
52f5889f77 | |||
81e0b4f2d1 | |||
cbecc9b903 | |||
b8b465af3e | |||
59b35c6745 | |||
7032833011 | |||
f406c78785 | |||
f326b5837a | |||
5dd4b3468f | |||
d4f8e83404 | |||
7b8b007cd9 | |||
3547d26587 | |||
7e62c2eb6d | |||
56401e1e5f | |||
860db2d508 | |||
4b8874975c | |||
bd6b6f6622 | |||
4340727e6c | |||
3ceccade87 | |||
28ad7df65d | |||
79a3508579 | |||
1b840245bd | |||
6a3828fddd | |||
91cb6b5065 | |||
0826a0b555 | |||
bcbbb98bfe | |||
66159b38aa | |||
23d17e4beb | |||
d97b0e3241 | |||
eb2533ec4c | |||
b7b365067f | |||
86e284e028 | |||
d9e543b680 | |||
c773c232d8 | |||
58ae24336a | |||
7d3a035ee0 | |||
e06e75c7e7 | |||
593e0f43b4 | |||
008ab0f814 | |||
3f7e8750d4 | |||
255732f0d3 | |||
53c269c6fd | |||
675d001633 | |||
d577c79632 | |||
f14be22816 | |||
9c74423510 | |||
5976e7ab57 | |||
a1a22572fb | |||
c11875b328 | |||
8ff648e4f9 | |||
1bac34556f |
1
AUTHORS
1
AUTHORS
@ -156,3 +156,4 @@ Tom Gijselinck
|
||||
Founder Fang
|
||||
Andrew Alexeyew
|
||||
Saso Bezlaj
|
||||
Erwin de Haan
|
||||
|
97
README.md
97
README.md
@ -442,30 +442,97 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
|
||||
|
||||
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||
|
||||
- `id`: The sequence will be replaced by the video identifier.
|
||||
- `url`: The sequence will be replaced by the video URL.
|
||||
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
|
||||
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
|
||||
- `title`: The sequence will be replaced by the video title.
|
||||
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
||||
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
||||
- `id`: Video identifier
|
||||
- `title`: Video title
|
||||
- `url`: Video URL
|
||||
- `ext`: Video filename extension
|
||||
- `alt_title`: A secondary title of the video
|
||||
- `display_id`: An alternative identifier for the video
|
||||
- `uploader`: Full name of the video uploader
|
||||
- `creator`: The main artist who created the video
|
||||
- `release_date`: The date (YYYYMMDD) when the video was released
|
||||
- `timestamp`: UNIX timestamp of the moment the video became available
|
||||
- `upload_date`: Video upload date (YYYYMMDD)
|
||||
- `uploader_id`: Nickname or id of the video uploader
|
||||
- `location`: Physical location where the video was filmed
|
||||
- `duration`: Length of the video in seconds
|
||||
- `view_count`: How many users have watched the video on the platform
|
||||
- `like_count`: Number of positive ratings of the video
|
||||
- `dislike_count`: Number of negative ratings of the video
|
||||
- `repost_count`: Number of reposts of the video
|
||||
- `average_rating`: Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count`: Number of comments on the video
|
||||
- `age_limit`: Age restriction for the video (years)
|
||||
- `format`: A human-readable description of the format
|
||||
- `format_id`: Format code specified by `--format`
|
||||
- `format_note`: Additional info about the format
|
||||
- `width`: Width of the video
|
||||
- `height`: Height of the video
|
||||
- `resolution`: Textual description of width and height
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `fps`: Frame rate
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `filesize`: The number of bytes, if known in advance
|
||||
- `filesize_approx`: An estimate for the number of bytes
|
||||
- `protocol`: The protocol that will be used for the actual download
|
||||
- `extractor`: Name of the extractor
|
||||
- `extractor_key`: Key name of the extractor
|
||||
- `epoch`: Unix epoch when creating the file
|
||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||
- `playlist`: Name or id of the playlist that contains the video
|
||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
|
||||
Note that some of the aforementioned sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
- `chapter`: Name or title of the chapter the video belongs to
|
||||
- `chapter_number`: Number of the chapter the video belongs to
|
||||
- `chapter_id`: Id of the chapter the video belongs to
|
||||
|
||||
Available for the video that is an episode of some series or programme:
|
||||
- `series`: Title of the series or programme the video episode belongs to
|
||||
- `season`: Title of the season the video episode belongs to
|
||||
- `season_number`: Number of the season the video episode belongs to
|
||||
- `season_id`: Id of the season the video episode belongs to
|
||||
- `episode`: Title of the video episode
|
||||
- `episode_number`: Number of the video episode within a season
|
||||
- `episode_id`: Id of the video episode
|
||||
|
||||
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
|
||||
Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
|
||||
To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
Examples (note on Windows you may need to use double quotes instead of single):
|
||||
|
||||
```bash
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
||||
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.mp4 # A simple file name
|
||||
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
|
||||
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
||||
|
||||
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
||||
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
|
||||
|
||||
# Stream the video being downloaded to stdout
|
||||
$ youtube-dl -o - BaW_jenozKc
|
||||
```
|
||||
|
||||
# FORMAT SELECTION
|
||||
|
@ -262,7 +262,7 @@
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima** (Currently broken)
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ivi**: ivi.ru
|
||||
@ -283,6 +283,7 @@
|
||||
- **KeezMovies**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
- **Ku6**
|
||||
@ -681,7 +682,9 @@
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidme**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
|
@ -249,7 +249,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
def format_info(f_id):
|
||||
info = YoutubeIE._formats[f_id].copy()
|
||||
|
||||
# XXX: In real cases InfoExtractor._parse_mpd() fills up 'acodec'
|
||||
# XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
|
||||
# and 'vcodec', while in tests such information is incomplete since
|
||||
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
||||
# test_YoutubeDL.test_youtube_format_selection is broken without
|
||||
|
@ -22,6 +22,7 @@ from youtube_dl.utils import (
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
@ -450,6 +451,28 @@ class TestUtil(unittest.TestCase):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
'false': False,
|
||||
'zero': 0,
|
||||
'empty_string': '',
|
||||
'empty_list': [],
|
||||
}
|
||||
d = FALSE_VALUES.copy()
|
||||
d['a'] = 42
|
||||
self.assertEqual(dict_get(d, 'a'), 42)
|
||||
self.assertEqual(dict_get(d, 'b'), None)
|
||||
self.assertEqual(dict_get(d, 'b', 42), 42)
|
||||
self.assertEqual(dict_get(d, ('a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
||||
for key, false_value in FALSE_VALUES.items():
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
||||
|
||||
def test_encode_compat_str(self):
|
||||
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
|
||||
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
|
||||
@ -471,6 +494,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'STATUS': 'OK'})
|
||||
|
||||
stripped = strip_jsonp('ps.embedHandler({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
@ -1288,6 +1288,9 @@ class YoutubeDL(object):
|
||||
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
else:
|
||||
# Sanitize format_id from characters used in format selector expression
|
||||
format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
|
||||
format_id = format['format_id']
|
||||
if format_id not in formats_dict:
|
||||
formats_dict[format_id] = []
|
||||
|
@ -40,9 +40,10 @@ class DashSegmentsFD(FileDownloader):
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
append_url_to_file(
|
||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||
'initialization segment')
|
||||
if info_dict.get('initialization_url'):
|
||||
append_url_to_file(
|
||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||
'initialization segment')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
segment_len = append_url_to_file(
|
||||
outf, combine_url(base_url, segment_url),
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .common import FileDownloader
|
||||
from .fragment import FragmentFD
|
||||
@ -57,8 +58,10 @@ class HlsFD(FileDownloader):
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams)
|
||||
proc.communicate(b'q')
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/rg3/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
raise
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
@ -328,6 +328,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
@ -822,7 +823,11 @@ from .videomore import (
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .vidme import VidmeIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
VidmeUserLikesIE,
|
||||
)
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
qualities,
|
||||
)
|
||||
|
||||
@ -93,9 +94,18 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url')
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url')
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
@ -10,6 +10,7 @@ import re
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
@ -44,6 +45,7 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@ -1184,6 +1186,7 @@ class InfoExtractor(object):
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
||||
src_urls = []
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
@ -1220,6 +1223,9 @@ class InfoExtractor(object):
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
if src_url in src_urls:
|
||||
continue
|
||||
src_urls.append(src_url)
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
@ -1265,11 +1271,13 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
if not src or src in urls:
|
||||
continue
|
||||
urls.append(src)
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
@ -1330,81 +1338,159 @@ class InfoExtractor(object):
|
||||
})
|
||||
return entries
|
||||
|
||||
def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True):
|
||||
return self._download_xml(
|
||||
dash_manifest_url, video_id,
|
||||
note='Downloading DASH manifest',
|
||||
errnote='Could not download DASH manifest',
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||
res = self._download_webpage_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
errnote=errnote or 'Failed to download MPD manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
|
||||
def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}):
|
||||
dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal)
|
||||
if dash_doc is False:
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
return self._parse_dash_manifest(
|
||||
dash_doc, namespace=namespace, formats_dict=formats_dict)
|
||||
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}):
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, namespace)
|
||||
|
||||
formats = []
|
||||
for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')):
|
||||
mime_type = a.attrib.get('mimeType')
|
||||
for r in a.findall(_add_ns('Representation')):
|
||||
mime_type = r.attrib.get('mimeType') or mime_type
|
||||
url_el = r.find(_add_ns('BaseURL'))
|
||||
if mime_type == 'text/vtt':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
|
||||
segment_list = r.find(_add_ns('SegmentList'))
|
||||
format_id = r.attrib['id']
|
||||
video_url = url_el.text if url_el is not None else None
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'width': int_or_none(r.attrib.get('width')),
|
||||
'height': int_or_none(r.attrib.get('height')),
|
||||
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
|
||||
'filesize': filesize,
|
||||
'fps': int_or_none(r.attrib.get('frameRate')),
|
||||
}
|
||||
if segment_list is not None:
|
||||
initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL']
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == format_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(format_id, {}).copy()
|
||||
full_info.update(f)
|
||||
codecs = r.attrib.get('codecs')
|
||||
if codecs:
|
||||
if mime_type.startswith('video/'):
|
||||
vcodec, acodec = codecs, 'none'
|
||||
else: # mime_type.startswith('audio/')
|
||||
vcodec, acodec = 'none', codecs
|
||||
def is_drm_protected(element):
|
||||
return element.find(_add_ns('ContentProtection')) is not None
|
||||
|
||||
full_info.update({
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
})
|
||||
formats.append(full_info)
|
||||
def extract_multisegment_info(element, ms_parent_info):
|
||||
ms_info = ms_parent_info.copy()
|
||||
segment_list = element.find(_add_ns('SegmentList'))
|
||||
if segment_list is not None:
|
||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||
if segment_urls_e:
|
||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||
initialization = segment_list.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
else:
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
start_number = segment_template.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
for s in s_e:
|
||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||
else:
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = segment_template.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
initialization = segment_template.get('initialization')
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
else:
|
||||
initialization = segment_template.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
return ms_info
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats = []
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||
period_ms_info = extract_multisegment_info(period, {
|
||||
'start_number': 1,
|
||||
'timescale': 1,
|
||||
})
|
||||
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
|
||||
if is_drm_protected(adaptation_set):
|
||||
continue
|
||||
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
|
||||
for representation in adaptation_set.findall(_add_ns('Representation')):
|
||||
if is_drm_protected(representation):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
mime_type = representation_attrib.get('mimeType')
|
||||
content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
|
||||
if content_type == 'text':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif content_type == 'video' or content_type == 'audio':
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if base_url_e is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if not re.match(r'^https?://', base_url):
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': mpd_id or representation_id,
|
||||
'url': base_url,
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
|
||||
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == representation_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
|
@ -215,9 +215,8 @@ class FacebookIE(InfoExtractor):
|
||||
})
|
||||
dash_manifest = f[0].get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_dash_manifest(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)),
|
||||
namespace='urn:mpeg:dash:schema:mpd:2011'))
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
|
@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||
'info_dict': {
|
||||
'id': '255180355939',
|
||||
'ext': 'mp4',
|
||||
@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
|
||||
'duration': 129,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -29,7 +26,7 @@ class FOXIE(InfoExtractor):
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url'] + '&manifest=m3u'
|
||||
video_id)['release_url'] + '&switch=http'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
@ -1289,7 +1289,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||
try:
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
@ -1298,6 +1298,12 @@ class GenericIE(InfoExtractor):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'formats': self._parse_mpd_formats(doc, video_id),
|
||||
}
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@ -1951,6 +1957,8 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||
else:
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
|
@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class HotStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||
|
@ -2,46 +2,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from random import random
|
||||
from math import floor
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
||||
'info_dict': {
|
||||
'id': 'p136534',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gondíci s. r. o. (34)',
|
||||
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||
'info_dict': {
|
||||
'id': '39152',
|
||||
'ext': 'flv',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
|
||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
|
||||
'info_dict': {
|
||||
'id': '9718337',
|
||||
'ext': 'flv',
|
||||
'title': 'Tchibo Partička - Jarní móda',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -51,62 +35,24 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
|
||||
raise ExtractorError(
|
||||
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
|
||||
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||
|
||||
player_url = (
|
||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||
(floor(random() * 1073741824), floor(random() * 1073741824))
|
||||
)
|
||||
|
||||
req = sanitized_Request(player_url)
|
||||
req = sanitized_Request(
|
||||
'http://play.iprima.cz/prehravac/init?_infuse=1'
|
||||
'&_ts=%s&productId=%s' % (round(time.time()), video_id))
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id)
|
||||
playerpage = self._download_webpage(req, video_id, note='Downloading player')
|
||||
|
||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||
m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
|
||||
|
||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||
if zoneGEO != '0':
|
||||
base_url = base_url.replace('token', 'token_' + zoneGEO)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'hq', 'hd']:
|
||||
filename = self._html_search_regex(
|
||||
r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||
|
||||
if filename == 'null':
|
||||
continue
|
||||
|
||||
real_id = self._search_regex(
|
||||
r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
|
||||
filename, 'real video id')
|
||||
|
||||
if format_id == 'lq':
|
||||
quality = 0
|
||||
elif format_id == 'hq':
|
||||
quality = 1
|
||||
elif format_id == 'hd':
|
||||
quality = 2
|
||||
filename = 'hq/' + filename
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'quality': quality,
|
||||
'play_path': 'mp4:' + filename.replace('"', '')[:-4],
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'),
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._search_regex(
|
||||
r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
|
||||
webpage, 'description', default=None),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
107
youtube_dl/extractor/konserthusetplay.py
Normal file
107
youtube_dl/extractor/konserthusetplay.py
Normal file
@ -0,0 +1,107 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KonserthusetPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||
'info_dict': {
|
||||
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||
'ext': 'flv',
|
||||
'title': 'Orkesterns instrument: Valthornen',
|
||||
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'duration': 398.8,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
e = self._search_regex(
|
||||
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
|
||||
|
||||
rest = self._download_json(
|
||||
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
|
||||
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
media = rest['media']
|
||||
player_config = media['playerconfig']
|
||||
playlist = player_config['playlist']
|
||||
|
||||
source = next(f for f in playlist if f.get('bitrates'))
|
||||
|
||||
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
|
||||
|
||||
formats = []
|
||||
|
||||
fallback_url = source.get('fallbackUrl')
|
||||
fallback_format_id = None
|
||||
if fallback_url:
|
||||
fallback_format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
|
||||
|
||||
connection_url = (player_config.get('rtmp', {}).get(
|
||||
'netConnectionUrl') or player_config.get(
|
||||
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
|
||||
if connection_url:
|
||||
for f in source['bitrates']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, video_url, 'format id', default=None)
|
||||
f_common = {
|
||||
'vbr': int_or_none(f.get('bitrate')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
}
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': connection_url,
|
||||
'play_path': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(f)
|
||||
if format_id and format_id == fallback_format_id:
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': fallback_url,
|
||||
'format_id': 'http-%s' % format_id if format_id else 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
if not formats and fallback_url:
|
||||
formats.append({
|
||||
'url': fallback_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = player_config.get('title') or media['title']
|
||||
description = player_config.get('mediaInfo', {}).get('description')
|
||||
thumbnail = media.get('image')
|
||||
duration = float_or_none(media.get('duration'), 1000)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class MailRuIE(InfoExtractor):
|
||||
@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor):
|
||||
'id': '46843144_1263',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||
'timestamp': 1397217632,
|
||||
'upload_date': '20140411',
|
||||
'uploader': 'hitech',
|
||||
'timestamp': 1397039888,
|
||||
'upload_date': '20140409',
|
||||
'uploader': 'hitech@corp.mail.ru',
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
{
|
||||
# only available via metaUrl API
|
||||
'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
|
||||
'md5': '3b26d2491c6949d031a32b96bd97c096',
|
||||
'info_dict': {
|
||||
'id': '56664382_502',
|
||||
'ext': 'mp4',
|
||||
'title': ':8336',
|
||||
'timestamp': 1449094163,
|
||||
'upload_date': '20151202',
|
||||
'uploader': '720pizle@mail.ru',
|
||||
'uploader_id': '720pizle@mail.ru',
|
||||
'duration': 6001,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor):
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = video_data.get('views_count')
|
||||
video_data = None
|
||||
|
||||
page_config = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||
if page_config:
|
||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||
if meta_url:
|
||||
video_data = self._download_json(
|
||||
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
|
||||
|
||||
# Fallback old approach
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
for f in video_data['videos']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = f.get('key')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta_data = video_data['meta']
|
||||
content_id = '%s_%s' % (
|
||||
meta_data.get('accId', ''), meta_data['itemId'])
|
||||
title = meta_data['title']
|
||||
if title.endswith('.mp4'):
|
||||
title = title[:-4]
|
||||
thumbnail = meta_data['poster']
|
||||
duration = meta_data['duration']
|
||||
timestamp = meta_data['timestamp']
|
||||
title = remove_end(meta_data['title'], '.mp4')
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['key'],
|
||||
'height': int(video['key'].rstrip('p'))
|
||||
} for video in video_data['videos']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
author = video_data.get('author')
|
||||
uploader = author.get('name')
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
|
||||
|
||||
acc_id = meta_data.get('accId')
|
||||
item_id = meta_data.get('itemId')
|
||||
content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
|
||||
|
||||
thumbnail = meta_data.get('poster')
|
||||
duration = int_or_none(meta_data.get('duration'))
|
||||
timestamp = int_or_none(meta_data.get('timestamp'))
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
@ -432,9 +433,20 @@ class PBSIE(InfoExtractor):
|
||||
for vid_id in video_id]
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id)
|
||||
player = self._download_webpage(
|
||||
'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
|
||||
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
player, 'video data', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
# Fallback to old videoInfo API
|
||||
if not info:
|
||||
info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id, 'Downloading video info JSON')
|
||||
|
||||
formats = []
|
||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
@ -61,12 +63,15 @@ class RteIE(InfoExtractor):
|
||||
class RteRadioIE(InfoExtractor):
|
||||
IE_NAME = 'rte:radio'
|
||||
IE_DESC = 'Raidió Teilifís Éireann radio'
|
||||
# Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||
# Radioplayer URLs have two distinct specifier formats,
|
||||
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
|
||||
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
||||
# An <id> uniquely defines an individual recording, and is the only part we require.
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# Old-style player URL; HLS and RTMPE formats
|
||||
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
||||
'info_dict': {
|
||||
'id': '10507902',
|
||||
@ -81,7 +86,23 @@ class RteRadioIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# New-style player URL; RTMPE formats only
|
||||
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
|
||||
'info_dict': {
|
||||
'id': '3250678',
|
||||
'ext': 'flv',
|
||||
'title': 'The Lyric Concert with Paul Herriott',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'timestamp': 1333742400,
|
||||
'upload_date': '20120406',
|
||||
'duration': 7199.016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
@ -102,8 +123,18 @@ class RteRadioIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
if mg.get('url') and not mg['url'].startswith('rtmpe:'):
|
||||
formats.append({'url': mg['url']})
|
||||
if mg.get('url'):
|
||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||
if m:
|
||||
m = m.groupdict()
|
||||
formats.append({
|
||||
'url': m['url'] + '/' + m['app'],
|
||||
'app': m['app'],
|
||||
'play_path': m['playpath'],
|
||||
'player_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||
'info_dict': {
|
||||
@ -19,7 +19,11 @@ class SpankBangIE(InfoExtractor):
|
||||
'uploader': 'silly2587',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# 480p only
|
||||
'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -34,7 +38,8 @@ class SpankBangIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
} for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)]
|
||||
} for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
|
@ -197,8 +197,14 @@ class VevoIE(InfoExtractor):
|
||||
if not version_url:
|
||||
continue
|
||||
|
||||
if '.mpd' in version_url or '.ism' in version_url:
|
||||
if '.ism' in version_url:
|
||||
continue
|
||||
elif '.mpd' in version_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
version_url, video_id, mpd_id='dash-%s' % version,
|
||||
note='Downloading %s MPD information' % version,
|
||||
errnote='Failed to download %s MPD information' % version,
|
||||
fatal=False))
|
||||
elif '.m3u8' in version_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
version_url, video_id, 'mp4', 'm3u8_native',
|
||||
|
@ -1,6 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viddler.com/v/43903784',
|
||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
||||
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
|
||||
'info_dict': {
|
||||
'id': '43903784',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mov',
|
||||
'title': 'Video Made Easy',
|
||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||
'uploader': 'viddler',
|
||||
@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
||||
'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'ext': 'ts',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'upload_date': '20150126',
|
||||
'uploader': 'deadspin',
|
||||
@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
||||
'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
|
||||
'info_dict': {
|
||||
'id': '221ebbbd',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mov',
|
||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||
'description': ' ',
|
||||
'upload_date': '20140929',
|
||||
@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
# secret protected
|
||||
'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
|
||||
'info_dict': {
|
||||
'id': '890c0985',
|
||||
'ext': 'mp4',
|
||||
'title': 'Complete Property Training - Traineeships',
|
||||
'description': ' ',
|
||||
'upload_date': '20130606',
|
||||
'uploader': 'TiffanyBowtell',
|
||||
'timestamp': 1370496993,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = (
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||
video_id)
|
||||
query = {
|
||||
'video_id': video_id,
|
||||
'key': 'v0vhrt7bg2xq1vyxhkct',
|
||||
}
|
||||
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
secret = qs.get('secret', [None])[0]
|
||||
if secret:
|
||||
query['secret'] = secret
|
||||
|
||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||
request = sanitized_Request(json_url, None, headers)
|
||||
request = sanitized_Request(
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
|
||||
% compat_urllib_parse.urlencode(query), None, headers)
|
||||
data = self._download_json(request, video_id)['video']
|
||||
|
||||
formats = []
|
||||
|
@ -114,7 +114,7 @@ class VideomoreIE(InfoExtractor):
|
||||
|
||||
data = self._download_json(
|
||||
'http://videomore.ru/video/tracks/%s.json' % video_id,
|
||||
video_id, 'Downloadinng video JSON')
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
title = data.get('title') or data['project_title']
|
||||
description = data.get('description') or data.get('description_raw')
|
||||
|
@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
@ -11,7 +13,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class VidmeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
||||
IE_NAME = 'vidme'
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://vid.me/QNB',
|
||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||
@ -202,3 +205,69 @@ class VidmeIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class VidmeListBaseIE(InfoExtractor):
|
||||
# Max possible limit according to https://docs.vid.me/#api-Videos-List
|
||||
_LIMIT = 100
|
||||
|
||||
def _entries(self, user_id, user_name):
|
||||
for page_num in itertools.count(1):
|
||||
page = self._download_json(
|
||||
'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
|
||||
% (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
|
||||
user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
|
||||
|
||||
videos = page.get('videos', [])
|
||||
if not videos:
|
||||
break
|
||||
|
||||
for video in videos:
|
||||
video_url = video.get('full_url') or video.get('embed_url')
|
||||
if video_url:
|
||||
yield self.url_result(video_url, VidmeIE.ie_key())
|
||||
|
||||
total = int_or_none(page.get('page', {}).get('total'))
|
||||
if total and self._LIMIT * page_num >= total:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_name = self._match_id(url)
|
||||
|
||||
user_id = self._download_json(
|
||||
'https://api.vid.me/userByUsername?username=%s' % user_name,
|
||||
user_name)['user']['user_id']
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(user_id, user_name), user_id,
|
||||
'%s - %s' % (user_name, self._TITLE))
|
||||
|
||||
|
||||
class VidmeUserIE(VidmeListBaseIE):
|
||||
IE_NAME = 'vidme:user'
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
|
||||
_API_ITEM = 'list'
|
||||
_TITLE = 'Videos'
|
||||
_TEST = {
|
||||
'url': 'https://vid.me/EFARCHIVE',
|
||||
'info_dict': {
|
||||
'id': '3834632',
|
||||
'title': 'EFARCHIVE - %s' % _TITLE,
|
||||
},
|
||||
'playlist_mincount': 238,
|
||||
}
|
||||
|
||||
|
||||
class VidmeUserLikesIE(VidmeListBaseIE):
|
||||
IE_NAME = 'vidme:user:likes'
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
|
||||
_API_ITEM = 'likes'
|
||||
_TITLE = 'Likes'
|
||||
_TEST = {
|
||||
'url': 'https://vid.me/ErinAlexis/likes',
|
||||
'info_dict': {
|
||||
'id': '6483530',
|
||||
'title': 'ErinAlexis - %s' % _TITLE,
|
||||
},
|
||||
'playlist_mincount': 415,
|
||||
}
|
||||
|
@ -265,7 +265,7 @@ class VKIE(InfoExtractor):
|
||||
return self.url_result(pladform_url)
|
||||
|
||||
m_rutube = re.search(
|
||||
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
|
||||
r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
|
||||
if m_rutube is not None:
|
||||
rutube_url = self._proto_relative_url(
|
||||
m_rutube.group(1).replace('\\', ''))
|
||||
|
@ -1,86 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hmac
|
||||
from hashlib import sha1
|
||||
from base64 import b64encode
|
||||
from time import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
from ..compat import compat_urllib_parse
|
||||
|
||||
|
||||
class VLiveIE(InfoExtractor):
|
||||
IE_NAME = 'vlive'
|
||||
# www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices
|
||||
_VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://m.vlive.tv/video/1326',
|
||||
'url': 'http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': '[V] Girl\'s Day\'s Broadcast',
|
||||
'creator': 'Girl\'s Day',
|
||||
'title': "[V] Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
_SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://m.vlive.tv/video/%s' % video_id,
|
||||
video_id, note='Download video page')
|
||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
|
||||
long_video_id = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"',
|
||||
webpage, 'long video id')
|
||||
|
||||
key = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"',
|
||||
webpage, 'key')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
creator = self._html_search_regex(
|
||||
r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
|
||||
|
||||
url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id
|
||||
msgpad = '%.0f' % (time() * 1000)
|
||||
md = b64encode(
|
||||
hmac.new(self._SECRET.encode('ascii'),
|
||||
(url[:255] + msgpad).encode('ascii'), sha1).digest()
|
||||
)
|
||||
url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md})
|
||||
playinfo = self._download_json(url, video_id, 'Downloading video json')
|
||||
playinfo = self._download_json(
|
||||
'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
|
||||
% compat_urllib_parse.urlencode({
|
||||
'videoId': long_video_id,
|
||||
'key': key,
|
||||
'ptc': 'http',
|
||||
'doct': 'json', # document type (xml or json)
|
||||
'cpt': 'vtt', # captions type (vtt or ttml)
|
||||
}), video_id)
|
||||
|
||||
if playinfo.get('message', '') != 'success':
|
||||
raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful'))
|
||||
|
||||
if not playinfo.get('result'):
|
||||
raise ExtractorError('No videos found.')
|
||||
|
||||
formats = []
|
||||
for vid in playinfo['result'].get('videos', {}).get('list', []):
|
||||
formats.append({
|
||||
'url': vid['source'],
|
||||
'ext': 'mp4',
|
||||
'abr': vid.get('bitrate', {}).get('audio'),
|
||||
'vbr': vid.get('bitrate', {}).get('video'),
|
||||
'format_id': vid['encodingOption']['name'],
|
||||
'height': vid.get('height'),
|
||||
'width': vid.get('width'),
|
||||
})
|
||||
formats = [{
|
||||
'url': vid['source'],
|
||||
'format_id': vid.get('encodingOption', {}).get('name'),
|
||||
'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
|
||||
'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
|
||||
'width': int_or_none(vid.get('encodingOption', {}).get('width')),
|
||||
'height': int_or_none(vid.get('encodingOption', {}).get('height')),
|
||||
'filesize': int_or_none(vid.get('size')),
|
||||
} for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
creator = self._html_search_regex(
|
||||
r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
|
||||
webpage, 'creator', fatal=False)
|
||||
|
||||
view_count = int_or_none(playinfo.get('meta', {}).get('count'))
|
||||
|
||||
subtitles = {}
|
||||
for caption in playinfo['result'].get('captions', {}).get('list', []):
|
||||
subtitles[caption['language']] = [
|
||||
{'ext': determine_ext(caption['source'], default_ext='vtt'),
|
||||
'url': caption['source']}]
|
||||
for caption in playinfo.get('captions', {}).get('list', []):
|
||||
lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
|
||||
if lang and caption.get('source'):
|
||||
subtitles[lang] = [{
|
||||
'ext': 'vtt',
|
||||
'url': caption['source']}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'creator': creator,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):
|
||||
if error_note is not None and '因版权原因无法观看此视频' in error_note:
|
||||
raise ExtractorError(
|
||||
'Youku said: Sorry, this video is available in China only', expected=True)
|
||||
elif error_note and '该视频被设为私密' in error_note:
|
||||
raise ExtractorError(
|
||||
'Youku said: Sorry, this video is private', expected=True)
|
||||
else:
|
||||
msg = 'Youku server reported error %i' % error.get('code')
|
||||
if error_note is not None:
|
||||
|
@ -286,7 +286,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
|
||||
'36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'},
|
||||
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
|
||||
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
|
||||
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
|
||||
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
|
||||
@ -369,6 +370,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# RTMP (unnamed)
|
||||
'_rtmp': {'protocol': 'rtmp'},
|
||||
}
|
||||
_SUBTITLE_FORMATS = ('ttml', 'vtt')
|
||||
|
||||
IE_NAME = 'youtube'
|
||||
_TESTS = [
|
||||
@ -918,7 +920,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if lang in sub_lang_list:
|
||||
continue
|
||||
sub_formats = []
|
||||
for ext in ['ttml', 'vtt']:
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': lang,
|
||||
'v': video_id,
|
||||
@ -988,7 +990,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
for lang_node in caption_list.findall('target'):
|
||||
sub_lang = lang_node.attrib['lang_code']
|
||||
sub_formats = []
|
||||
for ext in ['sbv', 'vtt', 'srt']:
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': original_lang,
|
||||
'tlang': sub_lang,
|
||||
@ -1463,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Look for the DASH manifest
|
||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
dash_mpd_fatal = True
|
||||
for dash_manifest_url in dash_mpds:
|
||||
for mpd_url in dash_mpds:
|
||||
dash_formats = {}
|
||||
try:
|
||||
def decrypt_sig(mobj):
|
||||
@ -1471,11 +1473,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
||||
return '/signature/%s' % dec_s
|
||||
|
||||
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url)
|
||||
mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
|
||||
|
||||
for df in self._extract_dash_manifest_formats(
|
||||
dash_manifest_url, video_id, fatal=dash_mpd_fatal,
|
||||
namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats):
|
||||
for df in self._extract_mpd_formats(
|
||||
mpd_url, video_id, fatal=dash_mpd_fatal,
|
||||
formats_dict=self._formats):
|
||||
# Do not overwrite DASH format found in some previous DASH manifest
|
||||
if df['format_id'] not in dash_formats:
|
||||
dash_formats[df['format_id']] = df
|
||||
|
@ -392,7 +392,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
|
||||
# https://github.com/rg3/youtube-dl/issues/8350
|
||||
if info['protocol'] == 'm3u8_native' or self._downloader.params.get('hls_prefer_native', False):
|
||||
if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
|
||||
options.extend(['-bsf:a', 'aac_adtstoasc'])
|
||||
|
||||
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||
@ -508,8 +508,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
|
||||
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
||||
f.write(srt_data)
|
||||
old_file = srt_file
|
||||
|
||||
ext = 'srt'
|
||||
subs[lang] = {
|
||||
'ext': 'srt',
|
||||
'data': srt_data
|
||||
@ -517,12 +517,14 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
||||
|
||||
if new_ext == 'srt':
|
||||
continue
|
||||
else:
|
||||
sub_filenames.append(srt_file)
|
||||
|
||||
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
||||
|
||||
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
||||
subs[lang] = {
|
||||
'ext': ext,
|
||||
'ext': new_ext,
|
||||
'data': f.read(),
|
||||
}
|
||||
|
||||
|
@ -1717,6 +1717,16 @@ def encode_dict(d, encoding='utf-8'):
|
||||
return dict((encode(k), encode(v)) for k, v in d.items())
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
if isinstance(key_or_keys, (list, tuple)):
|
||||
for key in key_or_keys:
|
||||
if key not in d or d[key] is None or skip_false_values and not d[key]:
|
||||
continue
|
||||
return d[key]
|
||||
return default
|
||||
return d.get(key_or_keys, default)
|
||||
|
||||
|
||||
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
|
||||
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
|
||||
|
||||
@ -1739,7 +1749,7 @@ def parse_age_limit(s):
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(
|
||||
r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
|
||||
|
||||
def js_to_json(code):
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.02.04'
|
||||
__version__ = '2016.02.10'
|
||||
|
Reference in New Issue
Block a user