Compare commits

...

82 Commits

Author SHA1 Message Date
f817d9bec1 release 2016.02.10 2016-02-10 16:17:38 +01:00
e2effb08a4 [YoutubeDL] Sanitize format_id (Closes #8494) 2016-02-10 21:16:58 +06:00
7fcea295c5 [pbs] Switch to portal player by default (Closes #8491) 2016-02-10 20:46:38 +06:00
cc799437ea [youku] Report private videos (Closes #8498) 2016-02-10 20:05:17 +06:00
89d23f37f2 [hotstar] Relax _VALID_URL (Closes #8487) 2016-02-10 04:43:00 +06:00
b92071ef00 release 2016.02.09.1 2016-02-09 20:12:36 +01:00
47246ae26c [viddler] Update tests 2016-02-10 01:12:47 +06:00
9c15869c28 [viddler] Add support for secret videos (Closes #8481) 2016-02-10 01:09:07 +06:00
51e9094f4a [extractor/common] extract youtube dash formats filesize(fixes #8480) 2016-02-09 20:05:39 +01:00
5e3a6fec33 [fox] update test 2016-02-09 17:30:42 +01:00
d413095f7e [extractor/common] remove duplicated formats and subtiles in smil manifests 2016-02-09 17:15:41 +01:00
1bedf4de06 [fox] extract http formats 2016-02-09 17:12:34 +01:00
3967a761f4 [mailru] Fix tests 2016-02-09 21:31:51 +06:00
b081350bd9 [mailru] Improve and modernize 2016-02-09 21:30:48 +06:00
16f1430ba6 [mailru] Prefer metaUrl API (Closes #8474) 2016-02-09 21:14:02 +06:00
085ad71157 release 2016.02.09 2016-02-09 12:57:51 +01:00
35972ba172 [vk] Improve rutube embeds detection (Closes #8461) 2016-02-08 21:30:23 +06:00
3834d3e35c [youtube] Clarify itag 36 height and abr (Closes #8457) 2016-02-08 01:30:57 +06:00
8d0a2a2a4e [README.md] Fix typo 2016-02-07 21:23:29 +06:00
11c0339bec [README.md] Clarify quotes in output template 2016-02-07 21:22:33 +06:00
915dd77783 [README.md] Add output template example for streaming to stdout 2016-02-07 21:21:14 +06:00
b6bfa6fb79 [konserthusetplay] Reorder code pieces 2016-02-07 21:18:32 +06:00
f070197bd7 [konserthusetplay] Improve _VALID_URL 2016-02-07 21:16:31 +06:00
5a7699bb2e [konserthusetplay] Improve and extract all formats (Closes #8381) 2016-02-07 21:11:59 +06:00
8628d26f38 [KonserthusetPlay] Add new extractor (partial support) 2016-02-07 19:47:29 +06:00
8411229bd5 [utils] Allow dot in strip_jsonp 2016-02-07 19:47:09 +06:00
72b9ebc65d [README.md] Document extractor sequences in output template 2016-02-07 19:08:54 +06:00
3b799ca14c [README.md] Clarify percent literal and output to stdout 2016-02-07 19:06:42 +06:00
0474512e30 [README.md] Document even more sequences in output template 2016-02-07 19:00:59 +06:00
f0905c6ec3 [README.md] Document more sequences in output template 2016-02-07 18:45:44 +06:00
86296ad2cd [utils] Add ability to control skipping false values in dict_get 2016-02-07 08:13:04 +06:00
52f5889f77 [vlive] Improve and extract more metadata (Closes #8446) 2016-02-07 06:17:40 +06:00
81e0b4f2d1 Credit @EraYaN for vlive update (#8446) 2016-02-07 06:14:26 +06:00
cbecc9b903 [utils] Add dict_get convenience method 2016-02-07 06:12:53 +06:00
b8b465af3e [vlive] Updated to new V App/VLive api.
More robust with getting keys and ids from website.
2016-02-07 05:27:17 +06:00
59b35c6745 [IPrima] Remove test video_id 2016-02-06 21:42:24 +01:00
7032833011 [iprima] Follow pep8 2016-02-06 21:37:28 +01:00
f406c78785 [IPrima] Fix extractor (fixes #7617) 2016-02-06 21:23:41 +01:00
f326b5837a Merge pull request #8445 from bpfoley/rte-newurl
[rte:radio] Add support for RTMP downloads, alternate URL style
2016-02-07 00:28:39 +05:00
5dd4b3468f [rte:radio] Add support for RTMP downloads, alternate URL style
This is useful as
a) RTMP downloads are a good deal faster to download
b) Older items are available only as RTMP streams
2016-02-06 18:42:57 +00:00
d4f8e83404 [FFmpegSubtitlesConvertorPP] remove unused variable 2016-02-06 19:04:53 +01:00
7b8b007cd9 [FFmpegSubtitlesConvertorPP] remove intermediate srt files 2016-02-06 19:04:18 +01:00
3547d26587 [FFmpegSubtitlesConvertorPP] correctly update the extension (fixes #8444) 2016-02-06 18:58:18 +01:00
7e62c2eb6d [FFmpegSubtitlesConvertorPP] fix not working when srt is used as the intermediate format between ttml/dfxp and other format
It was trying to use the ttml/dfxp file with ffmpeg, which doesn't have support for them.
I broke it in e04398e397.
2016-02-06 18:51:05 +01:00
56401e1e5f [downloader/hls] Do not send 'q' to ffmpeg on Windows (Closes #8300) 2016-02-06 23:24:22 +06:00
860db2d508 [README.md] Fix typo 2016-02-06 22:40:20 +06:00
4b8874975c [README.md] Remove non-relevant info 2016-02-06 22:39:50 +06:00
bd6b6f6622 [README.md] Fix typo 2016-02-06 22:36:30 +06:00
4340727e6c [videomore] Fix typo 2016-02-06 22:36:30 +06:00
3ceccade87 [README.md] Improve output template documentation and add more examples 2016-02-06 22:33:49 +06:00
28ad7df65d [generic] detect MPD manfiest only from the content 2016-02-06 14:51:45 +01:00
79a3508579 [extractor/generic] Detect DASH manifests in found URLs and extract mpd formats 2016-02-06 19:42:03 +06:00
1b840245bd [extractor/generic] Detect DASH manifests and extract mpd formats 2016-02-06 19:35:32 +06:00
6a3828fddd [common] use float conversion instead of using division from __future__ 2016-02-06 14:27:04 +01:00
91cb6b5065 rename _parse_mpd to _parse_mpd_formats and add default value for mpd namespace 2016-02-06 14:03:48 +01:00
0826a0b555 [common] sort dash formats 2016-02-06 06:52:48 +01:00
bcbbb98bfe [generic] extract dash formats detected using content type 2016-02-06 06:47:38 +01:00
66159b38aa Merge pull request #8408 from remitamine/dash
Add generic support for mpd manifests(dash formats)
2016-02-06 06:26:02 +01:00
23d17e4beb [youtube] Fix automatic captions 2016-02-06 06:44:38 +06:00
d97b0e3241 [vidme] Clarify IE_NAMEs 2016-02-05 23:58:26 +06:00
eb2533ec4c [vidme:user:likes] Add extractor 2016-02-05 23:57:35 +06:00
b7b365067f [vidme:user] Add extractor (Closes #8416) 2016-02-05 23:32:22 +06:00
86e284e028 Merge branch 'master' of github.com:rg3/youtube-dl 2016-02-05 17:20:00 +01:00
d9e543b680 [spankbang] Add test with single format (#8398) 2016-02-05 22:16:56 +06:00
c773c232d8 [spankbang] Check formats (#8398) 2016-02-05 22:16:40 +06:00
58ae24336a [spankbang] Extend format id regex (Closes #8398) 2016-02-05 22:16:12 +06:00
7d3a035ee0 [ffmpeg] check for m3u8 protocol in FFmpegMetadataPP 2016-02-05 17:12:49 +01:00
e06e75c7e7 release 2016.02.05.1 2016-02-05 15:17:31 +01:00
593e0f43b4 [ffmpeg] fix condition(fixes #8440) 2016-02-05 15:12:06 +01:00
008ab0f814 release 2016.02.05 2016-02-05 11:04:00 +01:00
3f7e8750d4 [arte.tv:+7] Fix extraction (fixes #8427) 2016-02-04 20:16:47 +01:00
255732f0d3 [common] fix segment duration calculation 2016-02-03 23:57:08 +01:00
53c269c6fd [common] fix media_template string formating 2016-02-03 23:54:34 +01:00
675d001633 [common] skip drm protected dash formats 2016-02-03 18:44:43 +01:00
d577c79632 [common] ignore ISO 639-2 generic codes 2016-02-03 13:24:07 +01:00
f14be22816 [common] remove duplicate reference to namespace 2016-02-02 22:02:08 +01:00
9c74423510 [common] fix media template regex 2016-02-02 18:30:31 +01:00
5976e7ab57 [vevo] add support for dash formats 2016-02-02 18:13:01 +01:00
a1a22572fb [downloader/dash] make initialization_url optional 2016-02-02 18:12:32 +01:00
c11875b328 [facebook] use _parse_mpd 2016-02-02 18:11:16 +01:00
8ff648e4f9 [youtube] use _extract_mpd_formats 2016-02-02 18:10:23 +01:00
1bac34556f [common] add a generic support for mpd manifests 2016-02-02 18:09:25 +01:00
32 changed files with 777 additions and 293 deletions

View File

@ -156,3 +156,4 @@ Tom Gijselinck
Founder Fang Founder Fang
Andrew Alexeyew Andrew Alexeyew
Saso Bezlaj Saso Bezlaj
Erwin de Haan

View File

@ -442,30 +442,97 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are: The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
- `id`: The sequence will be replaced by the video identifier. - `id`: Video identifier
- `url`: The sequence will be replaced by the video URL. - `title`: Video title
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video. - `url`: Video URL
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format. - `ext`: Video filename extension
- `title`: The sequence will be replaced by the video title. - `alt_title`: A secondary title of the video
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4). - `display_id`: An alternative identifier for the video
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file. - `uploader`: Full name of the video uploader
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. - `creator`: The main artist who created the video
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video. - `release_date`: The date (YYYYMMDD) when the video was released
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist. - `timestamp`: UNIX timestamp of the moment the video became available
- `format_id`: The sequence will be replaced by the format code specified by `--format`. - `upload_date`: Video upload date (YYYYMMDD)
- `duration`: The sequence will be replaced by the length of the video in seconds. - `uploader_id`: Nickname or id of the video uploader
- `location`: Physical location where the video was filmed
- `duration`: Length of the video in seconds
- `view_count`: How many users have watched the video on the platform
- `like_count`: Number of positive ratings of the video
- `dislike_count`: Number of negative ratings of the video
- `repost_count`: Number of reposts of the video
- `average_rating`: Average rating give by users, the scale used depends on the webpage
- `comment_count`: Number of comments on the video
- `age_limit`: Age restriction for the video (years)
- `format`: A human-readable description of the format
- `format_id`: Format code specified by `--format`
- `format_note`: Additional info about the format
- `width`: Width of the video
- `height`: Height of the video
- `resolution`: Textual description of width and height
- `tbr`: Average bitrate of audio and video in KBit/s
- `abr`: Average audio bitrate in KBit/s
- `acodec`: Name of the audio codec in use
- `asr`: Audio sampling rate in Hertz
- `vbr`: Average video bitrate in KBit/s
- `fps`: Frame rate
- `vcodec`: Name of the video codec in use
- `container`: Name of the container format
- `filesize`: The number of bytes, if known in advance
- `filesize_approx`: An estimate for the number of bytes
- `protocol`: The protocol that will be used for the actual download
- `extractor`: Name of the extractor
- `extractor_key`: Key name of the extractor
- `epoch`: Unix epoch when creating the file
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
- `playlist`: Name or id of the playlist that contains the video
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
Note that some of the aforementioned sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`. Available for the video that belongs to some logical chapter or section:
- `chapter`: Name or title of the chapter the video belongs to
- `chapter_number`: Number of the chapter the video belongs to
- `chapter_id`: Id of the chapter the video belongs to
Available for the video that is an episode of some series or programme:
- `series`: Title of the series or programme the video episode belongs to
- `season`: Title of the season the video episode belongs to
- `season_number`: Number of the season the video episode belongs to
- `season_id`: Id of the season the video episode belongs to
- `episode`: Title of the video episode
- `episode_number`: Number of the video episode within a season
- `episode_id`: Id of the video episode
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
The current default template is `%(title)s-%(id)s.%(ext)s`. The current default template is `%(title)s-%(id)s.%(ext)s`.
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
Examples (note on Windows you may need to use double quotes instead of single):
```bash ```bash
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
youtube-dl_test_video_.mp4 # A simple file name youtube-dl_test_video_.mp4 # A simple file name
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
# Stream the video being downloaded to stdout
$ youtube-dl -o - BaW_jenozKc
``` ```
# FORMAT SELECTION # FORMAT SELECTION

View File

@ -262,7 +262,7 @@
- **Instagram** - **Instagram**
- **instagram:user**: Instagram user profile - **instagram:user**: Instagram user profile
- **InternetVideoArchive** - **InternetVideoArchive**
- **IPrima** (Currently broken) - **IPrima**
- **iqiyi**: 爱奇艺 - **iqiyi**: 爱奇艺
- **Ir90Tv** - **Ir90Tv**
- **ivi**: ivi.ru - **ivi**: ivi.ru
@ -283,6 +283,7 @@
- **KeezMovies** - **KeezMovies**
- **KhanAcademy** - **KhanAcademy**
- **KickStarter** - **KickStarter**
- **KonserthusetPlay**
- **kontrtube**: KontrTube.ru - Труба зовёт - **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью - **KrasView**: Красвью
- **Ku6** - **Ku6**
@ -681,7 +682,9 @@
- **VideoPremium** - **VideoPremium**
- **VideoTt**: video.tt - Your True Tube (Currently broken) - **VideoTt**: video.tt - Your True Tube (Currently broken)
- **videoweed**: VideoWeed - **videoweed**: VideoWeed
- **Vidme** - **vidme**
- **vidme:user**
- **vidme:user:likes**
- **Vidzi** - **Vidzi**
- **vier** - **vier**
- **vier:videos** - **vier:videos**

View File

@ -249,7 +249,7 @@ class TestFormatSelection(unittest.TestCase):
def format_info(f_id): def format_info(f_id):
info = YoutubeIE._formats[f_id].copy() info = YoutubeIE._formats[f_id].copy()
# XXX: In real cases InfoExtractor._parse_mpd() fills up 'acodec' # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
# and 'vcodec', while in tests such information is incomplete since # and 'vcodec', while in tests such information is incomplete since
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593 # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
# test_YoutubeDL.test_youtube_format_selection is broken without # test_YoutubeDL.test_youtube_format_selection is broken without

View File

@ -22,6 +22,7 @@ from youtube_dl.utils import (
DateRange, DateRange,
detect_exe_version, detect_exe_version,
determine_ext, determine_ext,
dict_get,
encode_compat_str, encode_compat_str,
encodeFilename, encodeFilename,
escape_rfc3986, escape_rfc3986,
@ -450,6 +451,28 @@ class TestUtil(unittest.TestCase):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes)) self.assertTrue(isinstance(data, bytes))
def test_dict_get(self):
FALSE_VALUES = {
'none': None,
'false': False,
'zero': 0,
'empty_string': '',
'empty_list': [],
}
d = FALSE_VALUES.copy()
d['a'] = 42
self.assertEqual(dict_get(d, 'a'), 42)
self.assertEqual(dict_get(d, 'b'), None)
self.assertEqual(dict_get(d, 'b', 42), 42)
self.assertEqual(dict_get(d, ('a', )), 42)
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
self.assertEqual(dict_get(d, ('b', 'c', )), None)
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
for key, false_value in FALSE_VALUES.items():
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
def test_encode_compat_str(self): def test_encode_compat_str(self):
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
@ -471,6 +494,10 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped) d = json.loads(stripped)
self.assertEqual(d, {'STATUS': 'OK'}) self.assertEqual(d, {'STATUS': 'OK'})
stripped = strip_jsonp('ps.embedHandler({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
def test_uppercase_escape(self): def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '') self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

View File

@ -1288,6 +1288,9 @@ class YoutubeDL(object):
if format.get('format_id') is None: if format.get('format_id') is None:
format['format_id'] = compat_str(i) format['format_id'] = compat_str(i)
else:
# Sanitize format_id from characters used in format selector expression
format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
format_id = format['format_id'] format_id = format['format_id']
if format_id not in formats_dict: if format_id not in formats_dict:
formats_dict[format_id] = [] formats_dict[format_id] = []

View File

@ -40,9 +40,10 @@ class DashSegmentsFD(FileDownloader):
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
with open(tmpfilename, 'wb') as outf: with open(tmpfilename, 'wb') as outf:
append_url_to_file( if info_dict.get('initialization_url'):
outf, combine_url(base_url, info_dict['initialization_url']), append_url_to_file(
'initialization segment') outf, combine_url(base_url, info_dict['initialization_url']),
'initialization segment')
for i, segment_url in enumerate(segment_urls): for i, segment_url in enumerate(segment_urls):
segment_len = append_url_to_file( segment_len = append_url_to_file(
outf, combine_url(base_url, segment_url), outf, combine_url(base_url, segment_url),

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import os import os
import re import re
import subprocess import subprocess
import sys
from .common import FileDownloader from .common import FileDownloader
from .fragment import FragmentFD from .fragment import FragmentFD
@ -57,8 +58,10 @@ class HlsFD(FileDownloader):
# subprocces.run would send the SIGKILL signal to ffmpeg and the # subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it # mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live # produces a file that is playable (this is mostly useful for live
# streams) # streams). Note that Windows is not affected and produces playable
proc.communicate(b'q') # files (see https://github.com/rg3/youtube-dl/issues/8300).
if sys.platform != 'win32':
proc.communicate(b'q')
raise raise
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))

View File

@ -328,6 +328,7 @@ from .keezmovies import KeezMoviesIE
from .khanacademy import KhanAcademyIE from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .keek import KeekIE from .keek import KeekIE
from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
@ -822,7 +823,11 @@ from .videomore import (
) )
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE
from .videott import VideoTtIE from .videott import VideoTtIE
from .vidme import VidmeIE from .vidme import (
VidmeIE,
VidmeUserIE,
VidmeUserLikesIE,
)
from .vidzi import VidziIE from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE from .vier import VierIE, VierVideosIE
from .viewster import ViewsterIE from .viewster import ViewsterIE

View File

@ -13,6 +13,7 @@ from ..utils import (
unified_strdate, unified_strdate,
get_element_by_attribute, get_element_by_attribute,
int_or_none, int_or_none,
NO_DEFAULT,
qualities, qualities,
) )
@ -93,9 +94,18 @@ class ArteTVPlus7IE(InfoExtractor):
json_url = self._html_search_regex( json_url = self._html_search_regex(
patterns, webpage, 'json vp url', default=None) patterns, webpage, 'json vp url', default=None)
if not json_url: if not json_url:
iframe_url = self._html_search_regex( def find_iframe_url(webpage, default=NO_DEFAULT):
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1', return self._html_search_regex(
webpage, 'iframe url', group='url') r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
webpage, 'iframe url', group='url', default=default)
iframe_url = find_iframe_url(webpage, None)
if not iframe_url:
embed_url = self._html_search_regex(
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url')
player = self._download_json(
embed_url, video_id, 'Downloading player page')
iframe_url = find_iframe_url(player['html'])
json_url = compat_parse_qs( json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
return self._extract_from_json_url(json_url, video_id, lang) return self._extract_from_json_url(json_url, video_id, lang)

View File

@ -10,6 +10,7 @@ import re
import socket import socket
import sys import sys
import time import time
import math
from ..compat import ( from ..compat import (
compat_cookiejar, compat_cookiejar,
@ -44,6 +45,7 @@ from ..utils import (
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
determine_protocol, determine_protocol,
parse_duration,
) )
@ -1184,6 +1186,7 @@ class InfoExtractor(object):
http_count = 0 http_count = 0
m3u8_count = 0 m3u8_count = 0
src_urls = []
videos = smil.findall(self._xpath_ns('.//video', namespace)) videos = smil.findall(self._xpath_ns('.//video', namespace))
for video in videos: for video in videos:
src = video.get('src') src = video.get('src')
@ -1220,6 +1223,9 @@ class InfoExtractor(object):
continue continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
if src_url in src_urls:
continue
src_urls.append(src_url)
if proto == 'm3u8' or src_ext == 'm3u8': if proto == 'm3u8' or src_ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
@ -1265,11 +1271,13 @@ class InfoExtractor(object):
return formats return formats
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
urls = []
subtitles = {} subtitles = {}
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
src = textstream.get('src') src = textstream.get('src')
if not src: if not src or src in urls:
continue continue
urls.append(src)
ext = textstream.get('ext') or determine_ext(src) ext = textstream.get('ext') or determine_ext(src)
if not ext: if not ext:
type_ = textstream.get('type') type_ = textstream.get('type')
@ -1330,81 +1338,159 @@ class InfoExtractor(object):
}) })
return entries return entries
def _download_dash_manifest(self, dash_manifest_url, video_id, fatal=True): def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
return self._download_xml( res = self._download_webpage_handle(
dash_manifest_url, video_id, mpd_url, video_id,
note='Downloading DASH manifest', note=note or 'Downloading MPD manifest',
errnote='Could not download DASH manifest', errnote=errnote or 'Failed to download MPD manifest',
fatal=fatal) fatal=fatal)
if res is False:
return []
mpd, urlh = res
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
def _extract_dash_manifest_formats(self, dash_manifest_url, video_id, fatal=True, namespace=None, formats_dict={}): return self._parse_mpd_formats(
dash_doc = self._download_dash_manifest(dash_manifest_url, video_id, fatal) compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
if dash_doc is False:
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
if mpd_doc.get('type') == 'dynamic':
return [] return []
return self._parse_dash_manifest( namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
dash_doc, namespace=namespace, formats_dict=formats_dict)
def _parse_dash_manifest(self, dash_doc, namespace=None, formats_dict={}):
def _add_ns(path): def _add_ns(path):
return self._xpath_ns(path, namespace) return self._xpath_ns(path, namespace)
formats = [] def is_drm_protected(element):
for a in dash_doc.findall('.//' + _add_ns('AdaptationSet')): return element.find(_add_ns('ContentProtection')) is not None
mime_type = a.attrib.get('mimeType')
for r in a.findall(_add_ns('Representation')):
mime_type = r.attrib.get('mimeType') or mime_type
url_el = r.find(_add_ns('BaseURL'))
if mime_type == 'text/vtt':
# TODO implement WebVTT downloading
pass
elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
segment_list = r.find(_add_ns('SegmentList'))
format_id = r.attrib['id']
video_url = url_el.text if url_el is not None else None
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
f = {
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
'height': int_or_none(r.attrib.get('height')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
'fps': int_or_none(r.attrib.get('frameRate')),
}
if segment_list is not None:
initialization_url = segment_list.find(_add_ns('Initialization')).attrib['sourceURL']
f.update({
'initialization_url': initialization_url,
'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall(_add_ns('SegmentURL'))],
'protocol': 'http_dash_segments',
})
if not f.get('url'):
f['url'] = initialization_url
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
full_info = formats_dict.get(format_id, {}).copy()
full_info.update(f)
codecs = r.attrib.get('codecs')
if codecs:
if mime_type.startswith('video/'):
vcodec, acodec = codecs, 'none'
else: # mime_type.startswith('audio/')
vcodec, acodec = 'none', codecs
full_info.update({ def extract_multisegment_info(element, ms_parent_info):
'vcodec': vcodec, ms_info = ms_parent_info.copy()
'acodec': acodec, segment_list = element.find(_add_ns('SegmentList'))
}) if segment_list is not None:
formats.append(full_info) segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
if segment_urls_e:
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
initialization = segment_list.find(_add_ns('Initialization'))
if initialization is not None:
ms_info['initialization_url'] = initialization.attrib['sourceURL']
else:
segment_template = element.find(_add_ns('SegmentTemplate'))
if segment_template is not None:
start_number = segment_template.get('startNumber')
if start_number:
ms_info['start_number'] = int(start_number)
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
if segment_timeline is not None:
s_e = segment_timeline.findall(_add_ns('S'))
if s_e:
ms_info['total_number'] = 0
for s in s_e:
ms_info['total_number'] += 1 + int(s.get('r', '0'))
else: else:
existing_format.update(f) timescale = segment_template.get('timescale')
else: if timescale:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) ms_info['timescale'] = int(timescale)
segment_duration = segment_template.get('duration')
if segment_duration:
ms_info['segment_duration'] = int(segment_duration)
media_template = segment_template.get('media')
if media_template:
ms_info['media_template'] = media_template
initialization = segment_template.get('initialization')
if initialization:
ms_info['initialization_url'] = initialization
else:
initialization = segment_template.find(_add_ns('Initialization'))
if initialization is not None:
ms_info['initialization_url'] = initialization.attrib['sourceURL']
return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
for period in mpd_doc.findall(_add_ns('Period')):
period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, {
'start_number': 1,
'timescale': 1,
})
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
if is_drm_protected(adaptation_set):
continue
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')):
if is_drm_protected(representation):
continue
representation_attrib = adaptation_set.attrib.copy()
representation_attrib.update(representation.attrib)
mime_type = representation_attrib.get('mimeType')
content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
if content_type == 'text':
# TODO implement WebVTT downloading
pass
elif content_type == 'video' or content_type == 'audio':
base_url = ''
for element in (representation, adaptation_set, period, mpd_doc):
base_url_e = element.find(_add_ns('BaseURL'))
if base_url_e is not None:
base_url = base_url_e.text + base_url
if re.match(r'^https?://', base_url):
break
if not re.match(r'^https?://', base_url):
base_url = mpd_base_url + base_url
representation_id = representation_attrib.get('id')
lang = representation_attrib.get('lang')
url_el = representation.find(_add_ns('BaseURL'))
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
f = {
'format_id': mpd_id or representation_id,
'url': base_url,
'width': int_or_none(representation_attrib.get('width')),
'height': int_or_none(representation_attrib.get('height')),
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
'fps': int_or_none(representation_attrib.get('frameRate')),
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
}
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
if 'total_number' not in representation_ms_info and 'segment_duration':
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
media_template = representation_ms_info['media_template']
media_template = media_template.replace('$RepresentationID$', representation_id)
media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
media_template.replace('$$', '$')
representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
if 'segment_urls' in representation_ms_info:
f.update({
'segment_urls': representation_ms_info['segment_urls'],
'protocol': 'http_dash_segments',
})
if 'initialization_url' in representation_ms_info:
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
f.update({
'initialization_url': initialization_url,
})
if not f.get('url'):
f['url'] = initialization_url
try:
existing_format = next(
fo for fo in formats
if fo['format_id'] == representation_id)
except StopIteration:
full_info = formats_dict.get(representation_id, {}).copy()
full_info.update(f)
formats.append(full_info)
else:
existing_format.update(f)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
self._sort_formats(formats)
return formats return formats
def _live_title(self, name): def _live_title(self, name):

View File

@ -215,9 +215,8 @@ class FacebookIE(InfoExtractor):
}) })
dash_manifest = f[0].get('dash_manifest') dash_manifest = f[0].get('dash_manifest')
if dash_manifest: if dash_manifest:
formats.extend(self._parse_dash_manifest( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest)), compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
namespace='urn:mpeg:dash:schema:mpd:2011'))
if not formats: if not formats:
raise ExtractorError('Cannot find video formats') raise ExtractorError('Cannot find video formats')

View File

@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.fox.com/watch/255180355939/7684182528', 'url': 'http://www.fox.com/watch/255180355939/7684182528',
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
'info_dict': { 'info_dict': {
'id': '255180355939', 'id': '255180355939',
'ext': 'mp4', 'ext': 'mp4',
@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
'duration': 129, 'duration': 129,
}, },
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'params': {
# m3u8 download
'skip_download': True,
},
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -29,7 +26,7 @@ class FOXIE(InfoExtractor):
release_url = self._parse_json(self._search_regex( release_url = self._parse_json(self._search_regex(
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
video_id)['release_url'] + '&manifest=m3u' video_id)['release_url'] + '&switch=http'
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

View File

@ -1289,7 +1289,7 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id) self.report_extraction(video_id)
# Is it an RSS feed, a SMIL file or a XSPF playlist? # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
try: try:
doc = compat_etree_fromstring(webpage.encode('utf-8')) doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss': if doc.tag == 'rss':
@ -1298,6 +1298,12 @@ class GenericIE(InfoExtractor):
return self._parse_smil(doc, url, video_id) return self._parse_smil(doc, url, video_id)
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
return {
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'formats': self._parse_mpd_formats(doc, video_id),
}
except compat_xml_parse_error: except compat_xml_parse_error:
pass pass
@ -1951,6 +1957,8 @@ class GenericIE(InfoExtractor):
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8': elif ext == 'm3u8':
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4') entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
elif ext == 'mpd':
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
else: else:
entry_info_dict['url'] = video_url entry_info_dict['url'] = video_url

View File

@ -10,8 +10,8 @@ from ..utils import (
class HotStarIE(InfoExtractor): class HotStarIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TEST = { _TESTS = [{
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
'info_dict': { 'info_dict': {
'id': '1000076273', 'id': '1000076273',
@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} }
} }, {
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
'only_matching': True,
}, {
'url': 'http://www.hotstar.com/1000000515',
'only_matching': True,
}]
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s' _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'

View File

@ -2,46 +2,30 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
from random import random import time
from math import floor
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
remove_end,
sanitized_Request, sanitized_Request,
) )
class IPrimaIE(InfoExtractor): class IPrimaIE(InfoExtractor):
_WORKING = False _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
_VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
'info_dict': {
'id': 'p136534',
'ext': 'mp4',
'title': 'Gondíci s. r. o. (34)',
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
},
'params': {
'skip_download': True, # m3u8 download
},
}, {
'url': 'http://play.iprima.cz/particka/particka-92', 'url': 'http://play.iprima.cz/particka/particka-92',
'info_dict': {
'id': '39152',
'ext': 'flv',
'title': 'Partička (92)',
'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
},
'params': {
'skip_download': True, # requires rtmpdump
},
}, {
'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
'info_dict': {
'id': '9718337',
'ext': 'flv',
'title': 'Tchibo Partička - Jarní móda',
'thumbnail': 're:^http:.*\.jpg$',
},
'params': {
'skip_download': True, # requires rtmpdump
},
}, {
'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
'only_matching': True, 'only_matching': True,
}] }]
@ -51,62 +35,24 @@ class IPrimaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage): video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
raise ExtractorError(
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
player_url = ( req = sanitized_Request(
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % 'http://play.iprima.cz/prehravac/init?_infuse=1'
(floor(random() * 1073741824), floor(random() * 1073741824)) '&_ts=%s&productId=%s' % (round(time.time()), video_id))
)
req = sanitized_Request(player_url)
req.add_header('Referer', url) req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id) playerpage = self._download_webpage(req, video_id, note='Downloading player')
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1]) m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
if zoneGEO != '0':
base_url = base_url.replace('token', 'token_' + zoneGEO)
formats = []
for format_id in ['lq', 'hq', 'hd']:
filename = self._html_search_regex(
r'"%s_id":(.+?),' % format_id, webpage, 'filename')
if filename == 'null':
continue
real_id = self._search_regex(
r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
filename, 'real video id')
if format_id == 'lq':
quality = 0
elif format_id == 'hq':
quality = 1
elif format_id == 'hd':
quality = 2
filename = 'hq/' + filename
formats.append({
'format_id': format_id,
'url': base_url,
'quality': quality,
'play_path': 'mp4:' + filename.replace('"', '')[:-4],
'rtmp_live': True,
'ext': 'flv',
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': real_id, 'id': video_id,
'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'), 'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats, 'formats': formats,
'description': self._search_regex( 'description': self._og_search_description(webpage),
r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
webpage, 'description', default=None),
} }

View File

@ -0,0 +1,107 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
)
class KonserthusetPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
_TEST = {
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
'info_dict': {
'id': 'CKDDnlCY-dhWAAqiMERd-A',
'ext': 'flv',
'title': 'Orkesterns instrument: Valthornen',
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
'thumbnail': 're:^https?://.*$',
'duration': 398.8,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
e = self._search_regex(
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
rest = self._download_json(
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
media = rest['media']
player_config = media['playerconfig']
playlist = player_config['playlist']
source = next(f for f in playlist if f.get('bitrates'))
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
formats = []
fallback_url = source.get('fallbackUrl')
fallback_format_id = None
if fallback_url:
fallback_format_id = self._search_regex(
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
connection_url = (player_config.get('rtmp', {}).get(
'netConnectionUrl') or player_config.get(
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
if connection_url:
for f in source['bitrates']:
video_url = f.get('url')
if not video_url:
continue
format_id = self._search_regex(
FORMAT_ID_REGEX, video_url, 'format id', default=None)
f_common = {
'vbr': int_or_none(f.get('bitrate')),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
}
f = f_common.copy()
f.update({
'url': connection_url,
'play_path': video_url,
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
'ext': 'flv',
})
formats.append(f)
if format_id and format_id == fallback_format_id:
f = f_common.copy()
f.update({
'url': fallback_url,
'format_id': 'http-%s' % format_id if format_id else 'http',
})
formats.append(f)
if not formats and fallback_url:
formats.append({
'url': fallback_url,
})
self._sort_formats(formats)
title = player_config.get('title') or media['title']
description = player_config.get('mediaInfo', {}).get('description')
thumbnail = media.get('image')
duration = float_or_none(media.get('duration'), 1000)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
int_or_none,
remove_end,
)
class MailRuIE(InfoExtractor): class MailRuIE(InfoExtractor):
@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor):
'id': '46843144_1263', 'id': '46843144_1263',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
'timestamp': 1397217632, 'timestamp': 1397039888,
'upload_date': '20140411', 'upload_date': '20140409',
'uploader': 'hitech', 'uploader': 'hitech@corp.mail.ru',
'uploader_id': 'hitech@corp.mail.ru', 'uploader_id': 'hitech@corp.mail.ru',
'duration': 245, 'duration': 245,
}, },
'skip': 'Not accessible from Travis CI server', 'skip': 'Not accessible from Travis CI server',
}, },
{
# only available via metaUrl API
'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
'md5': '3b26d2491c6949d031a32b96bd97c096',
'info_dict': {
'id': '56664382_502',
'ext': 'mp4',
'title': ':8336',
'timestamp': 1449094163,
'upload_date': '20151202',
'uploader': '720pizle@mail.ru',
'uploader_id': '720pizle@mail.ru',
'duration': 6001,
},
'skip': 'Not accessible from Travis CI server',
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor):
if not video_id: if not video_id:
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
video_data = self._download_json( webpage = self._download_webpage(url, video_id)
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
author = video_data['author'] video_data = None
uploader = author['name']
uploader_id = author.get('id') or author.get('email') page_config = self._parse_json(self._search_regex(
view_count = video_data.get('views_count') r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
webpage, 'page config', default='{}'), video_id, fatal=False)
if page_config:
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
if meta_url:
video_data = self._download_json(
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
# Fallback old approach
if not video_data:
video_data = self._download_json(
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
video_id, 'Downloading video JSON')
formats = []
for f in video_data['videos']:
video_url = f.get('url')
if not video_url:
continue
format_id = f.get('key')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
formats.append({
'url': video_url,
'format_id': format_id,
'height': height,
})
self._sort_formats(formats)
meta_data = video_data['meta'] meta_data = video_data['meta']
content_id = '%s_%s' % ( title = remove_end(meta_data['title'], '.mp4')
meta_data.get('accId', ''), meta_data['itemId'])
title = meta_data['title']
if title.endswith('.mp4'):
title = title[:-4]
thumbnail = meta_data['poster']
duration = meta_data['duration']
timestamp = meta_data['timestamp']
formats = [ author = video_data.get('author')
{ uploader = author.get('name')
'url': video['url'], uploader_id = author.get('id') or author.get('email')
'format_id': video['key'], view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
'height': int(video['key'].rstrip('p'))
} for video in video_data['videos'] acc_id = meta_data.get('accId')
] item_id = meta_data.get('itemId')
self._sort_formats(formats) content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
thumbnail = meta_data.get('poster')
duration = int_or_none(meta_data.get('duration'))
timestamp = int_or_none(meta_data.get('timestamp'))
return { return {
'id': content_id, 'id': content_id,

View File

@ -8,6 +8,7 @@ from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
js_to_json,
strip_jsonp, strip_jsonp,
unified_strdate, unified_strdate,
US_RATINGS, US_RATINGS,
@ -432,9 +433,20 @@ class PBSIE(InfoExtractor):
for vid_id in video_id] for vid_id in video_id]
return self.playlist_result(entries, display_id) return self.playlist_result(entries, display_id)
info = self._download_json( player = self._download_webpage(
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, 'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
display_id)
info = self._parse_json(
self._search_regex(
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
player, 'video data', default='{}'),
display_id, transform_source=js_to_json, fatal=False)
# Fallback to old videoInfo API
if not info:
info = self._download_json(
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
display_id, 'Downloading video info JSON')
formats = [] formats = []
for encoding_name in ('recommended_encoding', 'alternate_encoding'): for encoding_name in ('recommended_encoding', 'alternate_encoding'):

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
@ -61,12 +63,15 @@ class RteIE(InfoExtractor):
class RteRadioIE(InfoExtractor): class RteRadioIE(InfoExtractor):
IE_NAME = 'rte:radio' IE_NAME = 'rte:radio'
IE_DESC = 'Raidió Teilifís Éireann radio' IE_DESC = 'Raidió Teilifís Éireann radio'
# Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>: # Radioplayer URLs have two distinct specifier formats,
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
# An <id> uniquely defines an individual recording, and is the only part we require. # An <id> uniquely defines an individual recording, and is the only part we require.
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
# Old-style player URL; HLS and RTMPE formats
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
'info_dict': { 'info_dict': {
'id': '10507902', 'id': '10507902',
@ -81,7 +86,23 @@ class RteRadioIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'f4m fails with --test atm' 'skip_download': 'f4m fails with --test atm'
} }
} }, {
# New-style player URL; RTMPE formats only
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
'info_dict': {
'id': '3250678',
'ext': 'flv',
'title': 'The Lyric Concert with Paul Herriott',
'thumbnail': 're:^https?://.*\.jpg$',
'description': '',
'timestamp': 1333742400,
'upload_date': '20120406',
'duration': 7199.016,
},
'params': {
'skip_download': 'f4m fails with --test atm'
}
}]
def _real_extract(self, url): def _real_extract(self, url):
item_id = self._match_id(url) item_id = self._match_id(url)
@ -102,8 +123,18 @@ class RteRadioIE(InfoExtractor):
formats = [] formats = []
if mg.get('url') and not mg['url'].startswith('rtmpe:'): if mg.get('url'):
formats.append({'url': mg['url']}) m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
if m:
m = m.groupdict()
formats.append({
'url': m['url'] + '/' + m['app'],
'app': m['app'],
'play_path': m['playpath'],
'player_url': url,
'ext': 'flv',
'format_id': 'rtmp',
})
if mg.get('hls_server') and mg.get('hls_url'): if mg.get('hls_server') and mg.get('hls_url'):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
class SpankBangIE(InfoExtractor): class SpankBangIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
_TEST = { _TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7', 'md5': '1cc433e1d6aa14bc376535b8679302f7',
'info_dict': { 'info_dict': {
@ -19,7 +19,11 @@ class SpankBangIE(InfoExtractor):
'uploader': 'silly2587', 'uploader': 'silly2587',
'age_limit': 18, 'age_limit': 18,
} }
} }, {
# 480p only
'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -34,7 +38,8 @@ class SpankBangIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'format_id': '%sp' % height, 'format_id': '%sp' % height,
'height': int(height), 'height': int(height),
} for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)] } for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
self._check_formats(formats, video_id)
self._sort_formats(formats) self._sort_formats(formats)
title = self._html_search_regex( title = self._html_search_regex(

View File

@ -197,8 +197,14 @@ class VevoIE(InfoExtractor):
if not version_url: if not version_url:
continue continue
if '.mpd' in version_url or '.ism' in version_url: if '.ism' in version_url:
continue continue
elif '.mpd' in version_url:
formats.extend(self._extract_mpd_formats(
version_url, video_id, mpd_id='dash-%s' % version,
note='Downloading %s MPD information' % version,
errnote='Failed to download %s MPD information' % version,
fatal=False))
elif '.m3u8' in version_url: elif '.m3u8' in version_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
version_url, video_id, 'mp4', 'm3u8_native', version_url, video_id, 'mp4', 'm3u8_native',

View File

@ -1,6 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
compat_urlparse,
)
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.viddler.com/v/43903784', 'url': 'http://www.viddler.com/v/43903784',
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4', 'md5': '9eee21161d2c7f5b39690c3e325fab2f',
'info_dict': { 'info_dict': {
'id': '43903784', 'id': '43903784',
'ext': 'mp4', 'ext': 'mov',
'title': 'Video Made Easy', 'title': 'Video Made Easy',
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd', 'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
'uploader': 'viddler', 'uploader': 'viddler',
@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):
} }
}, { }, {
'url': 'http://www.viddler.com/v/4d03aad9/', 'url': 'http://www.viddler.com/v/4d03aad9/',
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0', 'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
'info_dict': { 'info_dict': {
'id': '4d03aad9', 'id': '4d03aad9',
'ext': 'mp4', 'ext': 'ts',
'title': 'WALL-TO-GORTAT', 'title': 'WALL-TO-GORTAT',
'upload_date': '20150126', 'upload_date': '20150126',
'uploader': 'deadspin', 'uploader': 'deadspin',
@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):
} }
}, { }, {
'url': 'http://www.viddler.com/player/221ebbbd/0/', 'url': 'http://www.viddler.com/player/221ebbbd/0/',
'md5': '0defa2bd0ea613d14a6e9bd1db6be326', 'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
'info_dict': { 'info_dict': {
'id': '221ebbbd', 'id': '221ebbbd',
'ext': 'mp4', 'ext': 'mov',
'title': 'LETeens-Grammar-snack-third-conditional', 'title': 'LETeens-Grammar-snack-third-conditional',
'description': ' ', 'description': ' ',
'upload_date': '20140929', 'upload_date': '20140929',
@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
} }
}, {
# secret protected
'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
'info_dict': {
'id': '890c0985',
'ext': 'mp4',
'title': 'Complete Property Training - Traineeships',
'description': ' ',
'upload_date': '20130606',
'uploader': 'TiffanyBowtell',
'timestamp': 1370496993,
'view_count': int,
'comment_count': int,
},
'params': {
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
json_url = ( query = {
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' % 'video_id': video_id,
video_id) 'key': 'v0vhrt7bg2xq1vyxhkct',
}
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
secret = qs.get('secret', [None])[0]
if secret:
query['secret'] = secret
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'} headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
request = sanitized_Request(json_url, None, headers) request = sanitized_Request(
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
% compat_urllib_parse.urlencode(query), None, headers)
data = self._download_json(request, video_id)['video'] data = self._download_json(request, video_id)['video']
formats = [] formats = []

View File

@ -114,7 +114,7 @@ class VideomoreIE(InfoExtractor):
data = self._download_json( data = self._download_json(
'http://videomore.ru/video/tracks/%s.json' % video_id, 'http://videomore.ru/video/tracks/%s.json' % video_id,
video_id, 'Downloadinng video JSON') video_id, 'Downloading video JSON')
title = data.get('title') or data['project_title'] title = data.get('title') or data['project_title']
description = data.get('description') or data.get('description_raw') description = data.get('description') or data.get('description_raw')

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
@ -11,7 +13,8 @@ from ..utils import (
class VidmeIE(InfoExtractor): class VidmeIE(InfoExtractor):
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)' IE_NAME = 'vidme'
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vid.me/QNB', 'url': 'https://vid.me/QNB',
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82', 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
@ -202,3 +205,69 @@ class VidmeIE(InfoExtractor):
'comment_count': comment_count, 'comment_count': comment_count,
'formats': formats, 'formats': formats,
} }
class VidmeListBaseIE(InfoExtractor):
# Max possible limit according to https://docs.vid.me/#api-Videos-List
_LIMIT = 100
def _entries(self, user_id, user_name):
for page_num in itertools.count(1):
page = self._download_json(
'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
% (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
videos = page.get('videos', [])
if not videos:
break
for video in videos:
video_url = video.get('full_url') or video.get('embed_url')
if video_url:
yield self.url_result(video_url, VidmeIE.ie_key())
total = int_or_none(page.get('page', {}).get('total'))
if total and self._LIMIT * page_num >= total:
break
def _real_extract(self, url):
user_name = self._match_id(url)
user_id = self._download_json(
'https://api.vid.me/userByUsername?username=%s' % user_name,
user_name)['user']['user_id']
return self.playlist_result(
self._entries(user_id, user_name), user_id,
'%s - %s' % (user_name, self._TITLE))
class VidmeUserIE(VidmeListBaseIE):
IE_NAME = 'vidme:user'
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
_API_ITEM = 'list'
_TITLE = 'Videos'
_TEST = {
'url': 'https://vid.me/EFARCHIVE',
'info_dict': {
'id': '3834632',
'title': 'EFARCHIVE - %s' % _TITLE,
},
'playlist_mincount': 238,
}
class VidmeUserLikesIE(VidmeListBaseIE):
IE_NAME = 'vidme:user:likes'
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
_API_ITEM = 'likes'
_TITLE = 'Likes'
_TEST = {
'url': 'https://vid.me/ErinAlexis/likes',
'info_dict': {
'id': '6483530',
'title': 'ErinAlexis - %s' % _TITLE,
},
'playlist_mincount': 415,
}

View File

@ -265,7 +265,7 @@ class VKIE(InfoExtractor):
return self.url_result(pladform_url) return self.url_result(pladform_url)
m_rutube = re.search( m_rutube = re.search(
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
if m_rutube is not None: if m_rutube is not None:
rutube_url = self._proto_relative_url( rutube_url = self._proto_relative_url(
m_rutube.group(1).replace('\\', '')) m_rutube.group(1).replace('\\', ''))

View File

@ -1,86 +1,88 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import hmac
from hashlib import sha1
from base64 import b64encode
from time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, dict_get,
determine_ext float_or_none,
int_or_none,
) )
from ..compat import compat_urllib_parse from ..compat import compat_urllib_parse
class VLiveIE(InfoExtractor): class VLiveIE(InfoExtractor):
IE_NAME = 'vlive' IE_NAME = 'vlive'
# www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://m.vlive.tv/video/1326', 'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983', 'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': { 'info_dict': {
'id': '1326', 'id': '1326',
'ext': 'mp4', 'ext': 'mp4',
'title': '[V] Girl\'s Day\'s Broadcast', 'title': "[V] Girl's Day's Broadcast",
'creator': 'Girl\'s Day', 'creator': "Girl's Day",
'view_count': int,
}, },
} }
_SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
'http://m.vlive.tv/video/%s' % video_id, 'http://www.vlive.tv/video/%s' % video_id, video_id)
video_id, note='Download video page')
long_video_id = self._search_regex(
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"',
webpage, 'long video id')
key = self._search_regex(
r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"',
webpage, 'key')
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
creator = self._html_search_regex(
r'<span[^>]+class="name">([^<>]+)</span>', webpage, 'creator')
url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id playinfo = self._download_json(
msgpad = '%.0f' % (time() * 1000) 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
md = b64encode( % compat_urllib_parse.urlencode({
hmac.new(self._SECRET.encode('ascii'), 'videoId': long_video_id,
(url[:255] + msgpad).encode('ascii'), sha1).digest() 'key': key,
) 'ptc': 'http',
url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md}) 'doct': 'json', # document type (xml or json)
playinfo = self._download_json(url, video_id, 'Downloading video json') 'cpt': 'vtt', # captions type (vtt or ttml)
}), video_id)
if playinfo.get('message', '') != 'success': formats = [{
raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful')) 'url': vid['source'],
'format_id': vid.get('encodingOption', {}).get('name'),
if not playinfo.get('result'): 'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
raise ExtractorError('No videos found.') 'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
'width': int_or_none(vid.get('encodingOption', {}).get('width')),
formats = [] 'height': int_or_none(vid.get('encodingOption', {}).get('height')),
for vid in playinfo['result'].get('videos', {}).get('list', []): 'filesize': int_or_none(vid.get('size')),
formats.append({ } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
'url': vid['source'],
'ext': 'mp4',
'abr': vid.get('bitrate', {}).get('audio'),
'vbr': vid.get('bitrate', {}).get('video'),
'format_id': vid['encodingOption']['name'],
'height': vid.get('height'),
'width': vid.get('width'),
})
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
creator = self._html_search_regex(
r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
webpage, 'creator', fatal=False)
view_count = int_or_none(playinfo.get('meta', {}).get('count'))
subtitles = {} subtitles = {}
for caption in playinfo['result'].get('captions', {}).get('list', []): for caption in playinfo.get('captions', {}).get('list', []):
subtitles[caption['language']] = [ lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
{'ext': determine_ext(caption['source'], default_ext='vtt'), if lang and caption.get('source'):
'url': caption['source']}] subtitles[lang] = [{
'ext': 'vtt',
'url': caption['source']}]
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'creator': creator, 'creator': creator,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'view_count': view_count,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
} }

View File

@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor):
if error_note is not None and '因版权原因无法观看此视频' in error_note: if error_note is not None and '因版权原因无法观看此视频' in error_note:
raise ExtractorError( raise ExtractorError(
'Youku said: Sorry, this video is available in China only', expected=True) 'Youku said: Sorry, this video is available in China only', expected=True)
elif error_note and '该视频被设为私密' in error_note:
raise ExtractorError(
'Youku said: Sorry, this video is private', expected=True)
else: else:
msg = 'Youku server reported error %i' % error.get('code') msg = 'Youku server reported error %i' % error.get('code')
if error_note is not None: if error_note is not None:

View File

@ -286,7 +286,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
'36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'}, # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
@ -369,6 +370,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# RTMP (unnamed) # RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'}, '_rtmp': {'protocol': 'rtmp'},
} }
_SUBTITLE_FORMATS = ('ttml', 'vtt')
IE_NAME = 'youtube' IE_NAME = 'youtube'
_TESTS = [ _TESTS = [
@ -918,7 +920,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if lang in sub_lang_list: if lang in sub_lang_list:
continue continue
sub_formats = [] sub_formats = []
for ext in ['ttml', 'vtt']: for ext in self._SUBTITLE_FORMATS:
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
'lang': lang, 'lang': lang,
'v': video_id, 'v': video_id,
@ -988,7 +990,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for lang_node in caption_list.findall('target'): for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code'] sub_lang = lang_node.attrib['lang_code']
sub_formats = [] sub_formats = []
for ext in ['sbv', 'vtt', 'srt']: for ext in self._SUBTITLE_FORMATS:
params = compat_urllib_parse.urlencode({ params = compat_urllib_parse.urlencode({
'lang': original_lang, 'lang': original_lang,
'tlang': sub_lang, 'tlang': sub_lang,
@ -1463,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Look for the DASH manifest # Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True): if self._downloader.params.get('youtube_include_dash_manifest', True):
dash_mpd_fatal = True dash_mpd_fatal = True
for dash_manifest_url in dash_mpds: for mpd_url in dash_mpds:
dash_formats = {} dash_formats = {}
try: try:
def decrypt_sig(mobj): def decrypt_sig(mobj):
@ -1471,11 +1473,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
return '/signature/%s' % dec_s return '/signature/%s' % dec_s
dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url) mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
for df in self._extract_dash_manifest_formats( for df in self._extract_mpd_formats(
dash_manifest_url, video_id, fatal=dash_mpd_fatal, mpd_url, video_id, fatal=dash_mpd_fatal,
namespace='urn:mpeg:DASH:schema:MPD:2011', formats_dict=self._formats): formats_dict=self._formats):
# Do not overwrite DASH format found in some previous DASH manifest # Do not overwrite DASH format found in some previous DASH manifest
if df['format_id'] not in dash_formats: if df['format_id'] not in dash_formats:
dash_formats[df['format_id']] = df dash_formats[df['format_id']] = df

View File

@ -392,7 +392,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
options.extend(['-metadata', '%s=%s' % (name, value)]) options.extend(['-metadata', '%s=%s' % (name, value)])
# https://github.com/rg3/youtube-dl/issues/8350 # https://github.com/rg3/youtube-dl/issues/8350
if info['protocol'] == 'm3u8_native' or self._downloader.params.get('hls_prefer_native', False): if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
options.extend(['-bsf:a', 'aac_adtstoasc']) options.extend(['-bsf:a', 'aac_adtstoasc'])
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename) self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
@ -508,8 +508,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
with io.open(srt_file, 'wt', encoding='utf-8') as f: with io.open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data) f.write(srt_data)
old_file = srt_file
ext = 'srt'
subs[lang] = { subs[lang] = {
'ext': 'srt', 'ext': 'srt',
'data': srt_data 'data': srt_data
@ -517,12 +517,14 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
if new_ext == 'srt': if new_ext == 'srt':
continue continue
else:
sub_filenames.append(srt_file)
self.run_ffmpeg(old_file, new_file, ['-f', new_format]) self.run_ffmpeg(old_file, new_file, ['-f', new_format])
with io.open(new_file, 'rt', encoding='utf-8') as f: with io.open(new_file, 'rt', encoding='utf-8') as f:
subs[lang] = { subs[lang] = {
'ext': ext, 'ext': new_ext,
'data': f.read(), 'data': f.read(),
} }

View File

@ -1717,6 +1717,16 @@ def encode_dict(d, encoding='utf-8'):
return dict((encode(k), encode(v)) for k, v in d.items()) return dict((encode(k), encode(v)) for k, v in d.items())
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
if isinstance(key_or_keys, (list, tuple)):
for key in key_or_keys:
if key not in d or d[key] is None or skip_false_values and not d[key]:
continue
return d[key]
return default
return d.get(key_or_keys, default)
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
@ -1739,7 +1749,7 @@ def parse_age_limit(s):
def strip_jsonp(code): def strip_jsonp(code):
return re.sub( return re.sub(
r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
def js_to_json(code): def js_to_json(code):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.02.04' __version__ = '2016.02.10'