Compare commits
85 Commits
2017.01.28
...
2017.02.04
Author | SHA1 | Date | |
---|---|---|---|
|
a713a86755 | ||
|
7bccd5fc8a | ||
|
3144eccf55 | ||
|
9db8f6c540 | ||
|
8e4041cf3f | ||
|
31487eb974 | ||
|
c2521c1ac6 | ||
|
643dc0fcfe | ||
|
36fce54816 | ||
|
2c15db829c | ||
|
f65dba7cdb | ||
|
605fd6392f | ||
|
f962790ee5 | ||
|
b7cc5f078e | ||
|
f7a10d8cd6 | ||
|
daac118bf4 | ||
|
8939f784d9 | ||
|
df0588a31f | ||
|
4ce3407d08 | ||
|
d7f9242e30 | ||
|
45024183ae | ||
|
33da98f493 | ||
|
4195096ea8 | ||
|
0bbcc8a10a | ||
|
b3ee552e4b | ||
|
a22b2fd19b | ||
|
c54c01f82d | ||
|
5a116e1302 | ||
|
a685751051 | ||
|
bd8f48c78b | ||
|
81aeafeb44 | ||
|
8bdc149441 | ||
|
020c5df52d | ||
|
da162c1135 | ||
|
5069594993 | ||
|
b996b88092 | ||
|
b83ef507b4 | ||
|
000f207944 | ||
|
fe5aa197b5 | ||
|
7882f1115e | ||
|
2b2d5d319b | ||
|
26c0f09935 | ||
|
c15cd29640 | ||
|
c38a67bcd5 | ||
|
363245ad94 | ||
|
7c5329e6f4 | ||
|
8fd65faece | ||
|
d7e215b42d | ||
|
3a528ffd89 | ||
|
3c90cc8b6f | ||
|
ae9a173b64 | ||
|
75822ca790 | ||
|
dadb836139 | ||
|
4719419951 | ||
|
c2d9c25f81 | ||
|
4d2fdb07c4 | ||
|
fe323a4800 | ||
|
f13da8af28 | ||
|
e228616c6e | ||
|
c58c2d63cb | ||
|
d04621daf4 | ||
|
76aaf1faae | ||
|
56fc078da8 | ||
|
0842b8241d | ||
|
59c307891a | ||
|
4d07b748c2 | ||
|
f5169501d2 | ||
|
186f4abe93 | ||
|
34cea6137e | ||
|
ffcfb7e3e0 | ||
|
c0af11abee | ||
|
1a241a2d02 | ||
|
acbb2374bc | ||
|
4edeac5bfa | ||
|
f592ff9868 | ||
|
24ee6b9721 | ||
|
a71b8d3b3b | ||
|
732fb3f8be | ||
|
008f247077 | ||
|
661cc229d2 | ||
|
b92d3c5343 | ||
|
ab6f6aee78 | ||
|
26e40542dd | ||
|
99a0baf370 | ||
|
a0758dfa1a |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.28**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04.1**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.01.28
|
||||
[debug] youtube-dl version 2017.02.04.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
10
AUTHORS
10
AUTHORS
@@ -191,3 +191,13 @@ Rich Leeper
|
||||
Zhong Jianxin
|
||||
Thor77
|
||||
Mattias Wadman
|
||||
Arjan Verwer
|
||||
Costy Petrisor
|
||||
Logan B
|
||||
Alex Seiler
|
||||
Vijay Singh
|
||||
Paul Hartmann
|
||||
Stephen Chen
|
||||
Fabian Stahl
|
||||
Bagira
|
||||
Odd Stråbø
|
||||
|
84
ChangeLog
84
ChangeLog
@@ -1,3 +1,87 @@
|
||||
version 2017.02.04.1
|
||||
|
||||
Extractors
|
||||
+ [twitch:stream] Add support for player.twitch.tv (#11971)
|
||||
* [radiocanada] Fix extraction for toutv rtmp formats
|
||||
|
||||
|
||||
version 2017.02.04
|
||||
|
||||
Core
|
||||
+ Add --playlist-random to shuffle playlists (#11889, #11901)
|
||||
* [utils] Improve comments processing in js_to_json (#11947)
|
||||
* [utils] Handle single-line comments in js_to_json
|
||||
* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter
|
||||
|
||||
Extractors
|
||||
+ [piksel] Add another app token pattern (#11969)
|
||||
+ [vk] Capture and output author blocked error message (#11965)
|
||||
+ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373,
|
||||
#11800)
|
||||
+ [drtv] Add support for live and radio sections (#1827, #3427)
|
||||
* [myspace] Fix extraction and extract HLS and HTTP formats
|
||||
+ [youtube] Add format info for itag 325 and 328
|
||||
* [vine] Fix extraction (#11955)
|
||||
- [sportbox] Remove extractor (#11954)
|
||||
+ [filmon] Add support for filmon.com (#11187)
|
||||
+ [infoq] Add audio only formats (#11565)
|
||||
* [douyutv] Improve room id regular expression (#11931)
|
||||
* [iprima] Fix extraction (#11920, #11896)
|
||||
* [youtube] Fix ytsearch when cookies are provided (#11924)
|
||||
* [go] Relax video id regular expression (#11937)
|
||||
* [facebook] Fix title extraction (#11941)
|
||||
+ [youtube:playlist] Recognize TL playlists (#11945)
|
||||
+ [bilibili] Support new Bangumi URLs (#11845)
|
||||
+ [cbc:watch] Extract audio codec for audio only formats (#11893)
|
||||
+ [elpais] Fix extraction for some URLs (#11765)
|
||||
|
||||
|
||||
version 2017.02.01
|
||||
|
||||
Extractors
|
||||
+ [facebook] Add another fallback extraction scenario (#11926)
|
||||
* [prosiebensat1] Fix extraction of descriptions (#11810, #11929)
|
||||
- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028)
|
||||
+ [vimeo] Extract upload timestamp
|
||||
+ [vimeo] Extract license (#8726, #11880)
|
||||
+ [nrk:series] Add support for series (#11571, #11711)
|
||||
|
||||
|
||||
version 2017.01.31
|
||||
|
||||
Core
|
||||
+ [compat] Add compat_etree_register_namespace
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892,
|
||||
#11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909,
|
||||
#11913, #11914, #11915, #11916, #11917, #11918, #11919)
|
||||
+ [vimeo] Extract both mixed and separated DASH formats
|
||||
+ [ruutu] Extract DASH formats
|
||||
* [itv] Fix extraction for python 2.6
|
||||
|
||||
|
||||
version 2017.01.29
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix initialization template (#11605, #11825)
|
||||
+ [extractor/common] Document fragment_base_url and fragment's path fields
|
||||
* [extractor/common] Fix duration per DASH segment (#11868)
|
||||
+ Introduce --autonumber-start option for initial value of %(autonumber)s
|
||||
template (#727, #2702, #9362, #10457, #10529, #11862)
|
||||
|
||||
Extractors
|
||||
+ [azmedien:playlist] Add support for topic and themen playlists (#11817)
|
||||
* [npo] Fix subtitles extraction
|
||||
+ [itv] Extract subtitles
|
||||
+ [itv] Add support for itv.com (#9240)
|
||||
+ [mtv81] Add support for mtv81.com (#7619)
|
||||
+ [vlive] Add support for channels (#11826)
|
||||
+ [kaltura] Add fallback for fileExt
|
||||
+ [kaltura] Improve uploader_id extraction
|
||||
+ [konserthusetplay] Add support for rspoplay.se (#11828)
|
||||
|
||||
|
||||
version 2017.01.28
|
||||
|
||||
Core
|
||||
|
48
README.md
48
README.md
@@ -88,8 +88,6 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
@@ -99,16 +97,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
string (--proxy "") for direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
(experimental)
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
(experimental)
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
(experimental)
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
options is not present) is used for the
|
||||
actual downloading. (experimental)
|
||||
actual downloading.
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
@@ -139,23 +134,23 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter (experimental).
|
||||
Specify any key (see help for -o for a list
|
||||
of available keys) to match if the key is
|
||||
present, !key to check if the key is not
|
||||
present,key > NUMBER (like "comment_count >
|
||||
12", also works with >=, <, <=, !=, =) to
|
||||
compare against a number, and & to require
|
||||
multiple matches. Values which are not
|
||||
known are excluded unless you put a
|
||||
question mark (?) after the operator.For
|
||||
example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
functionality is not available at the given
|
||||
service), but who also have a description,
|
||||
use --match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description" .
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
help for -o for a list of available keys)
|
||||
to match if the key is present, !key to
|
||||
check if the key is not present,key >
|
||||
NUMBER (like "comment_count > 12", also
|
||||
works with >=, <, <=, !=, =) to compare
|
||||
against a number, and & to require multiple
|
||||
matches. Values which are not known are
|
||||
excluded unless you put a question mark (?)
|
||||
after the operator.For example, to only
|
||||
match videos that have been liked more than
|
||||
100 times and disliked less than 50 times
|
||||
(or the dislike functionality is not
|
||||
available at the given service), but who
|
||||
also have a description, use --match-filter
|
||||
"like_count > 100 & dislike_count <? 50 &
|
||||
description" .
|
||||
--no-playlist Download only the video, if the URL refers
|
||||
to a video and a playlist.
|
||||
--yes-playlist Download the playlist, if the URL refers to
|
||||
@@ -178,6 +173,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
only)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||
hlsnative only)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
@@ -185,6 +182,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
@@ -210,7 +208,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--autonumber-size NUMBER Specify the number of digits in
|
||||
%(autonumber)s when it is present in output
|
||||
filename template or --auto-number option
|
||||
is given
|
||||
is given (default is 5)
|
||||
--autonumber-start NUMBER Specify the start value for %(autonumber)s
|
||||
(default is 1)
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
|
@@ -76,7 +76,7 @@
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienShow**: AZ Medien shows
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
@@ -84,6 +84,7 @@
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **bangumi.bilibili.com**: BiliBili番剧
|
||||
- **bbc**: BBC
|
||||
- **bbc.co.uk**: BBC iPlayer
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
@@ -211,7 +212,8 @@
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **drtv**
|
||||
- **drtv:live**
|
||||
- **Dumpert**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**
|
||||
@@ -247,6 +249,8 @@
|
||||
- **fc2:embed**
|
||||
- **Fczenit**
|
||||
- **fernsehkritik.tv**
|
||||
- **filmon**
|
||||
- **filmon:channel**
|
||||
- **Firstpost**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
@@ -337,6 +341,7 @@
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ITV**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
@@ -445,6 +450,7 @@
|
||||
- **mtg**: MTG services
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv81**
|
||||
- **mtv:video**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
@@ -526,6 +532,7 @@
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisodes**
|
||||
- **NRKTVSeries**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
@@ -700,7 +707,6 @@
|
||||
- **Spiegeltv**
|
||||
- **Spike**
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportBoxEmbed**
|
||||
- **SportDeutschland**
|
||||
- **Sportschau**
|
||||
@@ -887,6 +893,7 @@
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **Vodlocker**
|
||||
- **VODPlatform**
|
||||
- **VoiceRepublic**
|
||||
|
@@ -785,12 +785,27 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('["abc", "def",]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('{"abc": "def",}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
on = js_to_json('{ 0: /* " \n */ ",]" , }')
|
||||
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||
|
||||
on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }')
|
||||
self.assertEqual(json.loads(on), {'0': ',]'})
|
||||
|
||||
on = js_to_json('{ 0: // comment\n1 }')
|
||||
self.assertEqual(json.loads(on), {'0': 1})
|
||||
|
||||
on = js_to_json(r'["<p>x<\/p>"]')
|
||||
self.assertEqual(json.loads(on), ['<p>x</p>'])
|
||||
|
||||
@@ -800,15 +815,27 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json("['a\\\nb']")
|
||||
self.assertEqual(json.loads(on), ['ab'])
|
||||
|
||||
on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/")
|
||||
self.assertEqual(json.loads(on), ['ab'])
|
||||
|
||||
on = js_to_json('{0xff:0xff}')
|
||||
self.assertEqual(json.loads(on), {'255': 255})
|
||||
|
||||
on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'255': 255})
|
||||
|
||||
on = js_to_json('{077:077}')
|
||||
self.assertEqual(json.loads(on), {'63': 63})
|
||||
|
||||
on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'63': 63})
|
||||
|
||||
on = js_to_json('{42:42}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}')
|
||||
self.assertEqual(json.loads(on), {'42': 42})
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
@@ -24,6 +24,7 @@ import sys
|
||||
import time
|
||||
import tokenize
|
||||
import traceback
|
||||
import random
|
||||
|
||||
from .compat import (
|
||||
compat_basestring,
|
||||
@@ -159,6 +160,7 @@ class YoutubeDL(object):
|
||||
playlistend: Playlist item to end at.
|
||||
playlist_items: Specific indices of playlist to download.
|
||||
playlistreverse: Download playlist items in reverse order.
|
||||
playlistrandom: Download playlist items in random order.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logger: Log messages to a logging.Logger instance.
|
||||
@@ -584,7 +586,7 @@ class YoutubeDL(object):
|
||||
if autonumber_size is None:
|
||||
autonumber_size = 5
|
||||
autonumber_templ = '%0' + str(autonumber_size) + 'd'
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
|
||||
if template_dict.get('playlist_index') is not None:
|
||||
template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
|
||||
if template_dict.get('resolution') is None:
|
||||
@@ -842,6 +844,9 @@ class YoutubeDL(object):
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
extra = {
|
||||
|
@@ -133,6 +133,12 @@ def _real_main(argv=None):
|
||||
parser.error('TV Provider account username missing\n')
|
||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||
parser.error('using output template conflicts with using title, video ID or auto number')
|
||||
if opts.autonumber_size is not None:
|
||||
if opts.autonumber_size <= 0:
|
||||
parser.error('auto number size must be positive')
|
||||
if opts.autonumber_start is not None:
|
||||
if opts.autonumber_start < 0:
|
||||
parser.error('auto number start must be positive or 0')
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error('using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
@@ -321,6 +327,7 @@ def _real_main(argv=None):
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'autonumber_start': opts.autonumber_start,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
@@ -337,6 +344,7 @@ def _real_main(argv=None):
|
||||
'playliststart': opts.playliststart,
|
||||
'playlistend': opts.playlistend,
|
||||
'playlistreverse': opts.playlist_reverse,
|
||||
'playlistrandom': opts.playlist_random,
|
||||
'noplaylist': opts.noplaylist,
|
||||
'logtostderr': opts.outtmpl == '-',
|
||||
'consoletitle': opts.consoletitle,
|
||||
|
@@ -2529,6 +2529,24 @@ else:
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
if hasattr(etree, 'register_namespace'):
|
||||
compat_etree_register_namespace = etree.register_namespace
|
||||
else:
|
||||
def compat_etree_register_namespace(prefix, uri):
|
||||
"""Register a namespace prefix.
|
||||
The registry is global, and any existing mapping for either the
|
||||
given prefix or the namespace URI will be removed.
|
||||
*prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
|
||||
attributes in this namespace will be serialized with prefix if possible.
|
||||
ValueError is raised if prefix is reserved or is invalid.
|
||||
"""
|
||||
if re.match(r"ns\d+$", prefix):
|
||||
raise ValueError("Prefix format reserved for internal use")
|
||||
for k, v in list(etree._namespace_map.items()):
|
||||
if k == uri or v == prefix:
|
||||
del etree._namespace_map[k]
|
||||
etree._namespace_map[uri] = prefix
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
@@ -2865,6 +2883,7 @@ __all__ = [
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
|
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
encodeArgument,
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
is_outdated_version,
|
||||
)
|
||||
|
||||
|
||||
@@ -198,6 +199,15 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
args = [ffpp.executable, '-y']
|
||||
|
||||
seekable = info_dict.get('_seekable')
|
||||
if seekable is not None:
|
||||
# setting -seekable prevents ffmpeg from guessing if the server
|
||||
# supports seeking(by adding the header `Range: bytes=0-`), which
|
||||
# can cause problems in some cases
|
||||
# https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
|
||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
args += self._configuration_args()
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
@@ -264,7 +274,9 @@ class FFmpegFD(ExternalFD):
|
||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
args += ['-f', 'mp4']
|
||||
if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
|
||||
args += ['-bsf:a', 'aac_adtstoasc']
|
||||
elif protocol == 'rtmp':
|
||||
args += ['-f', 'flv']
|
||||
else:
|
||||
|
@@ -5,8 +5,9 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -83,8 +84,8 @@ class AZMedienIE(AZMedienBaseIE):
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienShowIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien shows'
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
@@ -93,7 +94,12 @@ class AZMedienShowIE(AZMedienBaseIE):
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-show-[^/\#]+
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
@@ -108,6 +114,18 @@ class AZMedienShowIE(AZMedienBaseIE):
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
@@ -136,10 +154,19 @@ class AZMedienShowIE(AZMedienBaseIE):
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_class(
|
||||
'title-block-cell', webpage)), group='title')
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
||||
|
@@ -5,19 +5,27 @@ import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
@@ -32,25 +40,61 @@ class BiliBiliIE(InfoExtractor):
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Tested in BiliBiliBangumiIE
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
|
||||
'md5': '3f721ad1e75030cc06faf73587cfec57',
|
||||
'info_dict': {
|
||||
'id': '100643',
|
||||
'ext': 'mp4',
|
||||
'title': 'CHAOS;CHILD',
|
||||
'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
|
||||
},
|
||||
'skip': 'Geo-restricted to China',
|
||||
}]
|
||||
|
||||
_APP_KEY = '84956560bc028eb7'
|
||||
_BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
|
||||
|
||||
def _report_error(self, result):
|
||||
if 'message' in result:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
|
||||
elif 'code' in result:
|
||||
raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Can\'t extract Bangumi episode ID')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
anime_id = mobj.group('anime_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'anime/v' not in url:
|
||||
if 'anime/' not in url:
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
if 'no_bangumi_tip' not in smuggled_data:
|
||||
self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
|
||||
video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
}
|
||||
headers.update(self.geo_verification_headers())
|
||||
|
||||
js = self._download_json(
|
||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||
data=urlencode_postdata({'episode_id': video_id}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
headers=headers)
|
||||
if 'result' not in js:
|
||||
self._report_error(js)
|
||||
cid = js['result']['cid']
|
||||
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
@@ -58,7 +102,11 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page')
|
||||
video_id, note='Downloading video info page',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
if 'durl' not in video_info:
|
||||
self._report_error(video_info)
|
||||
|
||||
entries = []
|
||||
|
||||
@@ -85,7 +133,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
@@ -99,7 +147,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
@@ -123,3 +171,70 @@ class BiliBiliIE(InfoExtractor):
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
|
||||
|
||||
IE_NAME = 'bangumi.bilibili.com'
|
||||
IE_DESC = 'BiliBili番剧'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist_count': 26,
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/1869',
|
||||
'info_dict': {
|
||||
'id': '1869',
|
||||
'title': '混沌武士',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '91da8621454dd58316851c27c68b0c13',
|
||||
'info_dict': {
|
||||
'id': '40062',
|
||||
'ext': 'mp4',
|
||||
'title': '混沌武士',
|
||||
'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
|
||||
'timestamp': 1414538739,
|
||||
'upload_date': '20141028',
|
||||
'episode': '疾风怒涛 Tempestuous Temperaments',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'playlist_items': '1',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
bangumi_id = self._match_id(url)
|
||||
|
||||
# Sometimes this API returns a JSONP response
|
||||
season_info = self._download_json(
|
||||
'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
|
||||
bangumi_id, transform_source=strip_jsonp)['result']
|
||||
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
|
||||
'ie_key': BiliBiliIE.ie_key(),
|
||||
'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
|
||||
'episode': episode.get('index_title'),
|
||||
'episode_number': int_or_none(episode.get('index')),
|
||||
} for episode in season_info['episodes']]
|
||||
|
||||
entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bangumi_id,
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
|
@@ -296,6 +296,12 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||
if len(formats) < 2:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
for f in formats:
|
||||
format_id = f.get('format_id')
|
||||
if format_id.startswith('AAC'):
|
||||
f['acodec'] = 'aac'
|
||||
elif format_id.startswith('AC3'):
|
||||
f['acodec'] = 'ac-3'
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
|
@@ -121,9 +121,19 @@ class InfoExtractor(object):
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||
* fragments A list of fragments of the fragmented media,
|
||||
with the following entries:
|
||||
* "url" (mandatory) - fragment's URL
|
||||
* fragment_base_url
|
||||
Base URL for fragments. Each fragment's path
|
||||
value (if present) will be relative to
|
||||
this URL.
|
||||
* fragments A list of fragments of a fragmented media.
|
||||
Each fragment entry must contain either an url
|
||||
or a path. If an url is present it should be
|
||||
considered by a client. Otherwise both path and
|
||||
fragment_base_url must be present. Here is
|
||||
the list of all potential fields:
|
||||
* "url" - fragment's URL
|
||||
* "path" - fragment's path relative to
|
||||
fragment_base_url
|
||||
* "duration" (optional, int or float)
|
||||
* "filesize" (optional, int)
|
||||
* preference Order number of this format. If this field is
|
||||
@@ -1015,13 +1025,13 @@ class InfoExtractor(object):
|
||||
unique_formats.append(f)
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video'):
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
url = self._proto_relative_url(url, scheme='http:')
|
||||
# For now assume non HTTP(S) URLs always valid
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
return True
|
||||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item)
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
@@ -1627,12 +1637,12 @@ class InfoExtractor(object):
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
extract_common(segment_template)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
media = segment_template.get('media')
|
||||
if media:
|
||||
ms_info['media'] = media
|
||||
initialization = segment_template.get('initialization')
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
ms_info['initialization'] = initialization
|
||||
else:
|
||||
extract_Initialization(segment_template)
|
||||
return ms_info
|
||||
@@ -1676,6 +1686,7 @@ class InfoExtractor(object):
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
@@ -1683,7 +1694,7 @@ class InfoExtractor(object):
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
'tbr': int_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
@@ -1692,13 +1703,32 @@ class InfoExtractor(object):
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
def prepare_template(template_name, identifiers):
|
||||
t = representation_ms_info[template_name]
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
t.replace('$$', '$')
|
||||
return t
|
||||
|
||||
# @initialization is a regular template like @media one
|
||||
# so it should be handled just the same way (see
|
||||
# https://github.com/rg3/youtube-dl/issues/11605)
|
||||
if 'initialization' in representation_ms_info:
|
||||
initialization_template = prepare_template(
|
||||
'initialization',
|
||||
# As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
|
||||
# $Time$ shall not be included for @initialization thus
|
||||
# only $Bandwidth$ remains
|
||||
('Bandwidth', ))
|
||||
representation_ms_info['initialization_url'] = initialization_template % {
|
||||
'Bandwidth': bandwidth,
|
||||
}
|
||||
|
||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||
|
||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
@@ -1710,7 +1740,7 @@ class InfoExtractor(object):
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Bandwidth': bandwidth,
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
@@ -1728,7 +1758,7 @@ class InfoExtractor(object):
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Bandwidth': bandwidth,
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
@@ -1751,14 +1781,16 @@ class InfoExtractor(object):
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
s_num = 0
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
s = representation_ms_info['s'][s_num]
|
||||
segment_index = 0
|
||||
timescale = representation_ms_info['timescale']
|
||||
for s in representation_ms_info['s']:
|
||||
duration = float_or_none(s['d'], timescale)
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
fragments.append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||
'url': representation_ms_info['segment_urls'][segment_index],
|
||||
'duration': duration,
|
||||
})
|
||||
segment_index += 1
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||
# No fragments key is present in this case.
|
||||
@@ -1768,7 +1800,7 @@ class InfoExtractor(object):
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
initialization_url = representation_ms_info['initialization_url']
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({'url': initialization_url})
|
||||
|
@@ -255,8 +255,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
|
||||
output += """ScaledBorderAndShadow: no
|
||||
|
||||
output += """
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
"""
|
||||
|
@@ -18,7 +18,7 @@ from ..utils import (
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'info_dict': {
|
||||
@@ -68,6 +68,10 @@ class DouyuTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.douyu.com/xiaocang',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# \"room_id\"
|
||||
'url': 'http://www.douyu.com/t/lpl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
|
||||
@@ -82,7 +86,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
room = self._download_json(
|
||||
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
||||
|
@@ -9,12 +9,13 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
IE_NAME = 'drtv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
@@ -79,9 +80,10 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset.get('Kind') == 'Image':
|
||||
kind = asset.get('Kind')
|
||||
if kind == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
elif asset.get('Kind') == 'VideoResource':
|
||||
elif kind in ('VideoResource', 'AudioResource'):
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
@@ -96,9 +98,13 @@ class DRTVIE(InfoExtractor):
|
||||
preference = -1
|
||||
format_id += '-spoken-subtitles'
|
||||
if target == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
|
||||
video_id, preference, f4m_id=format_id))
|
||||
video_id, preference, f4m_id=format_id)
|
||||
if kind == 'AudioResource':
|
||||
for f in f4m_formats:
|
||||
f['vcodec'] = 'none'
|
||||
formats.extend(f4m_formats)
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@@ -112,6 +118,7 @@ class DRTVIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
'vcodec': 'none' if kind == 'AudioResource' else None,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
@@ -144,3 +151,58 @@ class DRTVIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DRTVLiveIE(InfoExtractor):
|
||||
IE_NAME = 'drtv:live'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.dr.dk/tv/live/dr1',
|
||||
'info_dict': {
|
||||
'id': 'dr1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
|
||||
channel_id)
|
||||
title = self._live_title(channel_data['Title'])
|
||||
|
||||
formats = []
|
||||
for streaming_server in channel_data.get('StreamingServers', []):
|
||||
server = streaming_server.get('Server')
|
||||
if not server:
|
||||
continue
|
||||
link_type = streaming_server.get('LinkType')
|
||||
for quality in streaming_server.get('Qualities', []):
|
||||
for stream in quality.get('Streams', []):
|
||||
stream_path = stream.get('Stream')
|
||||
if not stream_path:
|
||||
continue
|
||||
stream_url = update_url_query(
|
||||
'%s/%s' % (server, stream_path), {'b': ''})
|
||||
if link_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, channel_id, 'mp4',
|
||||
m3u8_id=link_type, fatal=False, live=True))
|
||||
elif link_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(update_url_query(
|
||||
'%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
|
||||
channel_id, f4m_id=link_type, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': title,
|
||||
'thumbnail': channel_data.get('PrimaryImageUri'),
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@@ -2,7 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import strip_jsonp, unified_strdate
|
||||
|
||||
|
||||
class ElPaisIE(InfoExtractor):
|
||||
@@ -29,6 +29,16 @@ class ElPaisIE(InfoExtractor):
|
||||
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
|
||||
'upload_date': '20160303',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html',
|
||||
'md5': '9c79923a118a067e1a45789e1e0b0f9c',
|
||||
'info_dict': {
|
||||
'id': '1485456786_417876',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hallado un barco de la antigua Roma que naufragó en Baleares hace 1.800 años',
|
||||
'description': 'La nave portaba cientos de ánforas y se hundió cerca de la isla de Cabrera por razones desconocidas',
|
||||
'upload_date': '20170127',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -37,8 +47,15 @@ class ElPaisIE(InfoExtractor):
|
||||
|
||||
prefix = self._html_search_regex(
|
||||
r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
|
||||
video_suffix = self._search_regex(
|
||||
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
|
||||
id_multimedia = self._search_regex(
|
||||
r"id_multimedia\s*=\s*'([^']+)'", webpage, 'ID multimedia', default=None)
|
||||
if id_multimedia:
|
||||
url_info = self._download_json(
|
||||
'http://elpais.com/vdpep/1/?pepid=' + id_multimedia, video_id, transform_source=strip_jsonp)
|
||||
video_suffix = url_info['mp4']
|
||||
else:
|
||||
video_suffix = self._search_regex(
|
||||
r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
|
||||
video_url = prefix + video_suffix
|
||||
thumbnail_suffix = self._search_regex(
|
||||
r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
|
||||
|
@@ -82,7 +82,7 @@ from .awaan import (
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienShowIE,
|
||||
AZMedienPlaylistIE,
|
||||
)
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
@@ -103,7 +103,10 @@ from .beatport import BeatportIE
|
||||
from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
@@ -245,7 +248,10 @@ from .dramafever import (
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .drtv import (
|
||||
DRTVIE,
|
||||
DRTVLiveIE,
|
||||
)
|
||||
from .dvtv import DVTVIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
@@ -296,6 +302,10 @@ from .fc2 import (
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
)
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
@@ -415,6 +425,7 @@ from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .itv import ITVIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@@ -553,6 +564,7 @@ from .mtv import (
|
||||
MTVVideoIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
MTV81IE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
@@ -666,6 +678,7 @@ from .nrk import (
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeriesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
@@ -878,10 +891,7 @@ from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import (
|
||||
SportBoxIE,
|
||||
SportBoxEmbedIE,
|
||||
)
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .sportschau import SportschauIE
|
||||
from .srgssr import (
|
||||
@@ -1129,7 +1139,10 @@ from .vk import (
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
)
|
||||
from .vlive import VLiveIE
|
||||
from .vlive import (
|
||||
VLiveIE,
|
||||
VLiveChannelIE
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodplatform import VODPlatformIE
|
||||
from .voicerepublic import VoiceRepublicIE
|
||||
|
@@ -12,14 +12,16 @@ from ..compat import (
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
get_element_by_id,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
@@ -71,7 +73,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
@@ -243,14 +245,30 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
video_data = None
|
||||
|
||||
def extract_video_data(instances):
|
||||
for item in instances:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id') == video_id:
|
||||
return video_item['videoData']
|
||||
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id') == video_id:
|
||||
video_data = video_item['videoData']
|
||||
break
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage,
|
||||
'server js data', default='{}'), video_id, fatal=False)
|
||||
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
|
||||
webpage, 'js data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(try_get(
|
||||
server_js_data, lambda x: x['jsmods']['instances'],
|
||||
list) or [])
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
@@ -300,10 +318,16 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = self._html_search_meta(
|
||||
'description', webpage, 'title')
|
||||
if video_title:
|
||||
video_title = limit_length(video_title, 80)
|
||||
else:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
uploader = clean_html(get_element_by_id(
|
||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
178
youtube_dl/extractor/filmon.py
Normal file
178
youtube_dl/extractor/filmon.py
Normal file
@@ -0,0 +1,178 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
qualities,
|
||||
strip_or_none,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FilmOnIE(InfoExtractor):
|
||||
IE_NAME = 'filmon'
|
||||
_VALID_URL = r'(?:https?://(?:www\.)?filmon\.com/vod/view/|filmon:)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.filmon.com/vod/view/24869-0-plan-9-from-outer-space',
|
||||
'info_dict': {
|
||||
'id': '24869',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plan 9 From Outer Space',
|
||||
'description': 'Dead human, zombies and vampires',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.filmon.com/vod/view/2825-1-popeye-series-1',
|
||||
'info_dict': {
|
||||
'id': '2825',
|
||||
'title': 'Popeye Series 1',
|
||||
'description': 'The original series of Popeye.',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
|
||||
video_id)['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
title = response['title']
|
||||
description = strip_or_none(response.get('description'))
|
||||
|
||||
if response.get('type_id') == 1:
|
||||
entries = [self.url_result('filmon:' + episode_id) for episode_id in response.get('episodes', [])]
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
QUALITY = qualities(('low', 'high'))
|
||||
formats = []
|
||||
for format_id, stream in response.get('streams', {}).items():
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'quality': QUALITY(stream.get('quality')),
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
poster = response.get('poster', {})
|
||||
thumbs = poster.get('thumbs', {})
|
||||
thumbs['poster'] = poster
|
||||
for thumb_id, thumb in thumbs.items():
|
||||
thumb_url = thumb.get('url')
|
||||
if not thumb_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumb_id,
|
||||
'url': thumb_url,
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class FilmOnChannelIE(InfoExtractor):
|
||||
IE_NAME = 'filmon:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?filmon\.com/(?:tv|channel)/(?P<id>[a-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
# VOD
|
||||
'url': 'http://www.filmon.com/tv/sports-haters',
|
||||
'info_dict': {
|
||||
'id': '4190',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sports Haters',
|
||||
'description': 'md5:dabcb4c1d9cfc77085612f1a85f8275d',
|
||||
},
|
||||
}, {
|
||||
# LIVE
|
||||
'url': 'https://www.filmon.com/channel/filmon-sports',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.filmon.com/tv/2894',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_THUMBNAIL_RES = [
|
||||
('logo', 56, 28),
|
||||
('big_logo', 106, 106),
|
||||
('extra_big_logo', 300, 300),
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
channel_data = self._download_json(
|
||||
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
channel_id = compat_str(channel_data['id'])
|
||||
is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox')
|
||||
title = channel_data['title']
|
||||
|
||||
QUALITY = qualities(('low', 'high'))
|
||||
formats = []
|
||||
for stream in channel_data.get('streams', []):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
if not is_live:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
stream_url, channel_id, skip_protocols=['dash', 'rtmp', 'rtsp']))
|
||||
continue
|
||||
quality = stream.get('quality')
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
# this is an m3u8 stream, but we are deliberately not using _extract_m3u8_formats
|
||||
# because it doesn't have bitrate variants anyway
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'quality': QUALITY(quality),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for name, width, height in self._THUMBNAIL_RES:
|
||||
thumbnails.append({
|
||||
'id': name,
|
||||
'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name),
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'display_id': channel_data.get('alias'),
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': channel_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
@@ -43,7 +43,10 @@ class GoIE(InfoExtractor):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id')
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
|
||||
brand = self._BRANDS[sub_domain]
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import determine_ext
|
||||
from .bokecc import BokeCCBaseIE
|
||||
|
||||
@@ -33,9 +36,21 @@ class InfoQIE(BokeCCBaseIE):
|
||||
'ext': 'flv',
|
||||
'description': 'md5:308d981fb28fa42f49f9568322c683ff',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
|
||||
'md5': '0e34642d4d9ef44bf86f66f6399672db',
|
||||
'info_dict': {
|
||||
'id': 'Simple-Made-Easy',
|
||||
'title': 'Simple Made Easy',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:3e0e213a8bbd074796ef89ea35ada25b',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestaudio',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_rtmp_videos(self, webpage):
|
||||
def _extract_rtmp_video(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
|
||||
@@ -47,28 +62,53 @@ class InfoQIE(BokeCCBaseIE):
|
||||
playpath = 'mp4:' + real_id
|
||||
|
||||
return [{
|
||||
'format_id': 'rtmp',
|
||||
'format_id': 'rtmp_video',
|
||||
'url': video_url,
|
||||
'ext': determine_ext(playpath),
|
||||
'play_path': playpath,
|
||||
}]
|
||||
|
||||
def _extract_http_videos(self, webpage):
|
||||
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
|
||||
|
||||
def _extract_cookies(self, webpage):
|
||||
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
|
||||
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
|
||||
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
|
||||
return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id)
|
||||
|
||||
def _extract_http_video(self, webpage):
|
||||
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
|
||||
return [{
|
||||
'format_id': 'http',
|
||||
'format_id': 'http_video',
|
||||
'url': http_video_url,
|
||||
'http_headers': {
|
||||
'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
|
||||
policy, signature, key_pair_id),
|
||||
'Cookie': self._extract_cookies(webpage)
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
http_audio_url = fields['filename']
|
||||
if http_audio_url is None:
|
||||
return []
|
||||
|
||||
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
# so probe URL to make sure
|
||||
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
|
||||
return []
|
||||
|
||||
return [{
|
||||
'format_id': 'http_audio',
|
||||
'url': http_audio_url,
|
||||
'vcodec': 'none',
|
||||
'http_headers': cookies_header,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -80,7 +120,10 @@ class InfoQIE(BokeCCBaseIE):
|
||||
# for China videos, HTTP video URL exists but always fails with 403
|
||||
formats = self._extract_bokecc_formats(webpage, video_id)
|
||||
else:
|
||||
formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
|
||||
formats = (
|
||||
self._extract_rtmp_video(webpage) +
|
||||
self._extract_http_video(webpage) +
|
||||
self._extract_http_audio(webpage, video_id))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -65,7 +65,7 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)var\s+playerOptions\s*=\s*({.+?});',
|
||||
r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
|
||||
playerpage, 'player options', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if options:
|
||||
|
196
youtube_dl/extractor/itv.py
Normal file
196
youtube_dl/extractor/itv.py
Normal file
@@ -0,0 +1,196 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import uuid
|
||||
import xml.etree.ElementTree as etree
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_etree_register_namespace,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class ITVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
||||
'info_dict': {
|
||||
'id': '2a2936a0053',
|
||||
'ext': 'flv',
|
||||
'title': 'Home Movie',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
||||
|
||||
ns_map = {
|
||||
'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
|
||||
'tem': 'http://tempuri.org/',
|
||||
'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
|
||||
'com': 'http://schemas.itv.com/2009/05/Common',
|
||||
}
|
||||
for ns, full_ns in ns_map.items():
|
||||
compat_etree_register_namespace(ns, full_ns)
|
||||
|
||||
def _add_ns(name):
|
||||
return xpath_with_ns(name, ns_map)
|
||||
|
||||
def _add_sub_element(element, name):
|
||||
return etree.SubElement(element, _add_ns(name))
|
||||
|
||||
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
||||
_add_sub_element(req_env, 'soapenv:Header')
|
||||
body = _add_sub_element(req_env, 'soapenv:Body')
|
||||
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
||||
request = _add_sub_element(get_playlist, 'tem:request')
|
||||
_add_sub_element(request, 'itv:ProductionId').text = params['data-video-id']
|
||||
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
||||
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
||||
_add_sub_element(vodcrid, 'com:Id')
|
||||
_add_sub_element(request, 'itv:Partition')
|
||||
user_info = _add_sub_element(get_playlist, 'tem:userInfo')
|
||||
_add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
|
||||
_add_sub_element(user_info, 'itv:DM')
|
||||
_add_sub_element(user_info, 'itv:RevenueScienceValue')
|
||||
_add_sub_element(user_info, 'itv:SessionId')
|
||||
_add_sub_element(user_info, 'itv:SsoToken')
|
||||
_add_sub_element(user_info, 'itv:UserToken')
|
||||
site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
|
||||
_add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
|
||||
_add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
|
||||
_add_sub_element(site_info, 'itv:Category')
|
||||
_add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
|
||||
_add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
|
||||
device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
|
||||
_add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
|
||||
player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
|
||||
_add_sub_element(player_info, 'itv:Version').text = '2'
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'text/xml; charset=utf-8',
|
||||
'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
|
||||
})
|
||||
resp_env = self._download_xml(
|
||||
params['data-playlist-url'], video_id,
|
||||
headers=headers, data=etree.tostring(req_env))
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
|
||||
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
|
||||
formats = []
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
formats.append({
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist')
|
||||
hmac = params.get('data-video-hmac')
|
||||
if ios_playlist_url and hmac:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Apple',
|
||||
'model': 'iPad',
|
||||
'os': {
|
||||
'name': 'iPhone OS',
|
||||
'version': '9.3',
|
||||
'type': 'ios'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
},
|
||||
'variantAvailability': {
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes'],
|
||||
'max': ['hls', 'aes']
|
||||
},
|
||||
'platformTag': 'mobile'
|
||||
}
|
||||
}).encode(), headers=headers, fatal=False)
|
||||
if ios_playlist:
|
||||
video_data = ios_playlist.get('Playlist', {}).get('Video', {})
|
||||
ios_base_url = video_data.get('Base')
|
||||
for media_file in video_data.get('MediaFiles', []):
|
||||
href = media_file.get('Href')
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if not caption_url.text:
|
||||
continue
|
||||
ext = determine_ext(caption_url.text, 'ttml')
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': caption_url.text,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duartion': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
}
|
@@ -266,9 +266,12 @@ class KalturaIE(InfoExtractor):
|
||||
# skip for now.
|
||||
if f.get('fileExt') == 'chun':
|
||||
continue
|
||||
if not f.get('fileExt') and f.get('containerFormat') == 'qt':
|
||||
if not f.get('fileExt'):
|
||||
# QT indicates QuickTime; some videos have broken fileExt
|
||||
f['fileExt'] = 'mov'
|
||||
if f.get('containerFormat') == 'qt':
|
||||
f['fileExt'] = 'mov'
|
||||
else:
|
||||
f['fileExt'] = 'mp4'
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||
@@ -319,6 +322,6 @@ class KalturaIE(InfoExtractor):
|
||||
'thumbnail': info.get('thumbnailUrl'),
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
'uploader_id': info.get('userId'),
|
||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||
'view_count': info.get('plays'),
|
||||
}
|
||||
|
@@ -11,22 +11,22 @@ from ..utils import (
|
||||
|
||||
|
||||
class KonserthusetPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||
'md5': 'e3fd47bf44e864bd23c08e487abe1967',
|
||||
'info_dict': {
|
||||
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Orkesterns instrument: Valthornen',
|
||||
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'duration': 398.8,
|
||||
'duration': 398.76,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -327,6 +327,35 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class MTV81IE(InfoExtractor):
|
||||
IE_NAME = 'mtv81'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/',
|
||||
'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
|
||||
'info_dict': {
|
||||
'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
|
||||
'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
|
||||
'timestamp': 1468846800,
|
||||
'upload_date': '20160718',
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(
|
||||
r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage,
|
||||
'mgid', group='id')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
mgid = self._extract_mgid(webpage)
|
||||
return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
|
||||
|
||||
|
||||
class MTVVideoIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv:video'
|
||||
_VALID_URL = r'''(?x)^https?://
|
||||
|
@@ -17,9 +17,10 @@ class MySpaceIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||
'md5': '9c1483c106f4a695c47d2911feed50a7',
|
||||
'info_dict': {
|
||||
'id': '109594919',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Little Big Town',
|
||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||
'uploader': 'Five Minutes to the Stage',
|
||||
@@ -27,37 +28,30 @@ class MySpaceIE(InfoExtractor):
|
||||
'timestamp': 1414108751,
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# songs
|
||||
{
|
||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||
'md5': '1d7ee4604a3da226dd69a123f748b262',
|
||||
'info_dict': {
|
||||
'id': '93388656',
|
||||
'ext': 'flv',
|
||||
'ext': 'm4a',
|
||||
'title': 'Of weakened soul...',
|
||||
'uploader': 'Killsorrow',
|
||||
'uploader_id': 'killsorrow',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'add_ie': ['Vevo'],
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||
'info_dict': {
|
||||
'id': 'USZM20600099',
|
||||
'ext': 'mp4',
|
||||
'title': 'Animal I Have Become',
|
||||
'uploader': 'Three Days Grace',
|
||||
'timestamp': int,
|
||||
'upload_date': '20060502',
|
||||
'id': 'xqds0B_meys',
|
||||
'ext': 'webm',
|
||||
'title': 'Three Days Grace - Animal I Have Become',
|
||||
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
|
||||
'uploader': 'ThreeDaysGraceVEVO',
|
||||
'uploader_id': 'ThreeDaysGraceVEVO',
|
||||
'upload_date': '20091002',
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
}, {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||
@@ -76,24 +70,46 @@ class MySpaceIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
is_song = mobj.group('mediatype').startswith('music/song')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'playerSwf":"([^"?]*)', webpage, 'player URL')
|
||||
r'videoSwf":"([^"?]*)', webpage, 'player URL', fatal=False)
|
||||
|
||||
def rtmp_format_from_stream_url(stream_url, width=None, height=None):
|
||||
rtmp_url, play_path = stream_url.split(';', 1)
|
||||
return {
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
def formats_from_stream_urls(stream_url, hls_stream_url, http_stream_url, width=None, height=None):
|
||||
formats = []
|
||||
vcodec = 'none' if is_song else None
|
||||
if hls_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'url': hls_stream_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'm4a' if is_song else 'mp4',
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if stream_url and player_url:
|
||||
rtmp_url, play_path = stream_url.split(';', 1)
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'protocol': 'rtmp',
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if http_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': http_stream_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
return formats
|
||||
|
||||
if mobj.group('mediatype').startswith('music/song'):
|
||||
if is_song:
|
||||
# songs don't store any useful info in the 'context' variable
|
||||
song_data = self._search_regex(
|
||||
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
|
||||
@@ -108,8 +124,10 @@ class MySpaceIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
|
||||
song_data, name, default='', group='data')
|
||||
stream_url = search_data('stream-url')
|
||||
if not stream_url:
|
||||
formats = formats_from_stream_urls(
|
||||
search_data('stream-url'), search_data('hls-stream-url'),
|
||||
search_data('http-stream-url'))
|
||||
if not formats:
|
||||
vevo_id = search_data('vevo-id')
|
||||
youtube_id = search_data('youtube-id')
|
||||
if vevo_id:
|
||||
@@ -121,6 +139,7 @@ class MySpaceIE(InfoExtractor):
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Found song but don\'t know how to download it')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
@@ -128,27 +147,16 @@ class MySpaceIE(InfoExtractor):
|
||||
'uploader_id': search_data('artist-username'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(search_data('duration')),
|
||||
'formats': [rtmp_format_from_stream_url(stream_url)]
|
||||
'formats': formats,
|
||||
}
|
||||
else:
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'context = ({.*?});', webpage, 'context'),
|
||||
video_id)['video']
|
||||
formats = []
|
||||
hls_stream_url = video.get('hlsStreamUrl')
|
||||
if hls_stream_url:
|
||||
formats.append({
|
||||
'format_id': 'hls',
|
||||
'url': hls_stream_url,
|
||||
'protocol': 'm3u8_native',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
stream_url = video.get('streamUrl')
|
||||
if stream_url:
|
||||
formats.append(rtmp_format_from_stream_url(
|
||||
stream_url,
|
||||
int_or_none(video.get('width')),
|
||||
int_or_none(video.get('height'))))
|
||||
formats = formats_from_stream_urls(
|
||||
video.get('streamUrl'), video.get('hlsStreamUrl'),
|
||||
video.get('mp4StreamUrl'), int_or_none(video.get('width')),
|
||||
int_or_none(video.get('height')))
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -241,7 +241,7 @@ class NPOIE(NPOBaseIE):
|
||||
if metadata.get('tt888') == 'ja':
|
||||
subtitles['nl'] = [{
|
||||
'ext': 'vtt',
|
||||
'url': 'http://e.omroep.nl/tt888/%s' % video_id,
|
||||
'url': 'http://tt888.omroep.nl/tt888/%s' % video_id,
|
||||
}]
|
||||
|
||||
return {
|
||||
|
@@ -128,6 +128,22 @@ class NRKBaseIE(InfoExtractor):
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
season_number = None
|
||||
episode_number = None
|
||||
if data.get('mediaElementType') == 'Episode':
|
||||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
|
||||
data.get('relativeOriginUrl', '')
|
||||
EPISODENUM_RE = [
|
||||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
|
||||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
|
||||
]
|
||||
season_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'season number',
|
||||
default=None, group='season'))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'episode number',
|
||||
default=None, group='episode'))
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, dict):
|
||||
@@ -140,11 +156,15 @@ class NRKBaseIE(InfoExtractor):
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
description = data.get('description')
|
||||
category = data.get('mediaAnalytics', {}).get('category')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'categories': [category] if category else None,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@@ -227,54 +247,102 @@ class NRKTVIE(NRKBaseIE):
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål - TV',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '43d0be26663d380603a9cf0c24366531',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
'skip': 'particular part is not supported currently',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [{
|
||||
'md5': '9480285eff92d64f06e02a5367970a7a',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part1',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'md5': 'adbd1dbd813edaf532b0a253780719c2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'duration': 6947.52,
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
'id': 'KMTE50001317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anno 13:30',
|
||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||
'duration': 2340,
|
||||
'series': 'Anno',
|
||||
'episode': '13:30',
|
||||
'season_number': 3,
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||
'info_dict': {
|
||||
'id': 'MUHH46000317AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nytt på Nytt 27.01.2017',
|
||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||
'duration': 1796,
|
||||
'series': 'Nytt på nytt',
|
||||
'episode': '27.01.2017',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from Norway',
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
@@ -360,6 +428,64 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
|
||||
class NRKSkoleIE(InfoExtractor):
|
||||
IE_DESC = 'NRK Skole'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
||||
|
@@ -16,18 +16,33 @@ from ..utils import (
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||
'info_dict': {
|
||||
'id': 'nv60p12f',
|
||||
'ext': 'mp4',
|
||||
'title': 'فن الحياة - الحلقة 1',
|
||||
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||
'timestamp': 1465231790,
|
||||
'upload_date': '20160606',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/nv60p12f',
|
||||
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
|
||||
'info_dict': {
|
||||
'id': 'nv60p12f',
|
||||
'ext': 'mp4',
|
||||
'title': 'فن الحياة - الحلقة 1',
|
||||
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
|
||||
'timestamp': 1465231790,
|
||||
'upload_date': '20160606',
|
||||
}
|
||||
},
|
||||
{
|
||||
# Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al
|
||||
'url': 'https://player.piksel.com/v/v80kqp41',
|
||||
'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d',
|
||||
'info_dict': {
|
||||
'id': 'v80kqp41',
|
||||
'ext': 'mp4',
|
||||
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
|
||||
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
|
||||
'timestamp': 1486171129,
|
||||
'upload_date': '20170204',
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
@@ -40,8 +55,10 @@ class PikselIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
app_token = self._search_regex(
|
||||
r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
], webpage, 'app token')
|
||||
response = self._download_json(
|
||||
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||
video_id, query={
|
||||
|
@@ -147,16 +147,12 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2104602',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 18 - Staffel 2',
|
||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||
'upload_date': '20131231',
|
||||
'duration': 5845.04,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
|
||||
@@ -258,7 +254,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
|
||||
'info_dict': {
|
||||
'id': '2572814',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreas Kümmert: Rocket Man',
|
||||
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
||||
'upload_date': '20131017',
|
||||
@@ -272,7 +268,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
|
||||
'info_dict': {
|
||||
'id': '2156342',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kurztrips zum Valentinstag',
|
||||
'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
|
||||
'duration': 307.24,
|
||||
@@ -289,12 +285,13 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
'description': 'md5:63b8963e71f481782aeea877658dec84',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'skip': 'This video is unavailable',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '4187506',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best of Circus HalliGalli',
|
||||
'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
|
||||
'upload_date': '20151229',
|
||||
@@ -375,7 +372,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
|
||||
info = self._extract_video_info(url, clip_id)
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
||||
|
@@ -54,9 +54,8 @@ class RadioCanadaIE(InfoExtractor):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
device_types = ['ipad']
|
||||
if app_code != 'toutv':
|
||||
device_types.append('flash')
|
||||
if not smuggled_data:
|
||||
device_types.append('flash')
|
||||
device_types.append('android')
|
||||
|
||||
formats = []
|
||||
@@ -103,7 +102,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||
continue
|
||||
f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
|
||||
protocol = determine_protocol({'url': f_url})
|
||||
formats.append({
|
||||
f = {
|
||||
'format_id': '%s-%d' % (protocol, tbr),
|
||||
'url': f_url,
|
||||
'ext': 'flv' if protocol == 'rtmp' else ext,
|
||||
@@ -111,7 +110,14 @@ class RadioCanadaIE(InfoExtractor):
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
}
|
||||
mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'url': mobj.group('url') + mobj.group('auth'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
})
|
||||
formats.append(f)
|
||||
if protocol == 'rtsp':
|
||||
base_url = self._search_regex(
|
||||
r'rtsp://([^?]+)', f_url, 'base url', default=None)
|
||||
|
@@ -81,6 +81,9 @@ class RuutuIE(InfoExtractor):
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
proto = compat_urllib_parse_urlparse(video_url).scheme
|
||||
if not child.tag.startswith('HTTP') and proto != 'rtmp':
|
||||
|
@@ -11,60 +11,6 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SportBoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P<display_id>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S',
|
||||
'md5': 'ff56a598c2cf411a9a38a69709e97079',
|
||||
'info_dict': {
|
||||
'id': '80822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
|
||||
'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140928',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player = self._search_regex(
|
||||
r'src="/?(vdl/player/[^"]+)"', webpage, 'player')
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'],
|
||||
webpage, 'title')
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'dateCreated', webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, '/%s' % player),
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
|
||||
class SportBoxEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
|
@@ -100,9 +100,13 @@ class TurnerBaseIE(AdobePassIE):
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
m3u8_id=format_id or 'hls', fatal=False))
|
||||
m3u8_id=format_id or 'hls', fatal=False)
|
||||
if '/secure/' in video_url and '?hdnea=' in video_url:
|
||||
for f in m3u8_formats:
|
||||
f['_seekable'] = False
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(video_url, {'hdcore': '3.7.0'}),
|
||||
|
@@ -447,7 +447,14 @@ class TwitchHighlightsIE(TwitchVideosBaseIE):
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?twitch\.tv/|
|
||||
player\.twitch\.tv/\?.*?\bchannel=
|
||||
)
|
||||
(?P<id>[^/#?]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/shroomztv',
|
||||
@@ -471,8 +478,25 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.twitch.tv/miracle_doto#profile-0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.twitch.tv/?channel=lotsofs',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if any(ie.suitable(url) for ie in (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
TwitchVodIE,
|
||||
TwitchProfileIE,
|
||||
TwitchAllVideosIE,
|
||||
TwitchUploadsIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchHighlightsIE))
|
||||
else super(TwitchStreamIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
|
@@ -21,12 +21,12 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
unified_strdate,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
unescapeHTML,
|
||||
parse_filesize,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -142,10 +142,19 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
note='Downloading %s m3u8 information' % cdn_name,
|
||||
fatal=False))
|
||||
elif files_type == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id,
|
||||
'Downloading %s MPD information' % cdn_name,
|
||||
fatal=False))
|
||||
mpd_pattern = r'/%s/(?:sep/)?video/' % video_id
|
||||
mpd_manifest_urls = []
|
||||
if re.search(mpd_pattern, manifest_url):
|
||||
for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
|
||||
mpd_manifest_urls.append((format_id + suffix, re.sub(
|
||||
mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url)))
|
||||
else:
|
||||
mpd_manifest_urls = [(format_id, manifest_url)]
|
||||
for f_id, m_url in mpd_manifest_urls:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
|
||||
'Downloading %s MPD information' % cdn_name,
|
||||
fatal=False))
|
||||
|
||||
subtitles = {}
|
||||
text_tracks = config['request'].get('text_tracks')
|
||||
@@ -204,11 +213,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
'description': 'md5:2d3305bad981a06ff79f027f19865021',
|
||||
'timestamp': 1355990239,
|
||||
'upload_date': '20121220',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
|
||||
'uploader_id': 'user7108434',
|
||||
'uploader': 'Filippo Valsorda',
|
||||
'duration': 10,
|
||||
'license': 'by-sa',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -249,6 +260,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'timestamp': 1371200155,
|
||||
'upload_date': '20130614',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
|
||||
'uploader_id': 'user18948128',
|
||||
@@ -271,7 +283,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
|
||||
'uploader_id': 'atencio',
|
||||
'uploader': 'Peter Atencio',
|
||||
'upload_date': '20130927',
|
||||
'timestamp': 1380339469,
|
||||
'upload_date': '20130928',
|
||||
'duration': 187,
|
||||
},
|
||||
},
|
||||
@@ -283,6 +296,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The New Vimeo Player (You Know, For Videos)',
|
||||
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
|
||||
'timestamp': 1381846109,
|
||||
'upload_date': '20131015',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
|
||||
'uploader_id': 'staff',
|
||||
@@ -314,6 +328,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'The DMCI',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
|
||||
'uploader_id': 'dmci',
|
||||
'timestamp': 1324343742,
|
||||
'upload_date': '20111220',
|
||||
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
||||
},
|
||||
@@ -329,6 +344,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Casey Donahue',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
|
||||
'uploader_id': 'caseydonahue',
|
||||
'timestamp': 1250886430,
|
||||
'upload_date': '20090821',
|
||||
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
|
||||
},
|
||||
@@ -477,6 +493,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'%s said: %s' % (self.IE_NAME, seed_status['title']),
|
||||
expected=True)
|
||||
|
||||
cc_license = None
|
||||
timestamp = None
|
||||
|
||||
# Extract the config JSON
|
||||
try:
|
||||
try:
|
||||
@@ -490,8 +509,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
vimeo_clip_page_config = self._search_regex(
|
||||
r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
|
||||
'vimeo clip page config')
|
||||
config_url = self._parse_json(
|
||||
vimeo_clip_page_config, video_id)['player']['config_url']
|
||||
page_config = self._parse_json(vimeo_clip_page_config, video_id)
|
||||
config_url = page_config['player']['config_url']
|
||||
cc_license = page_config.get('cc_license')
|
||||
timestamp = try_get(
|
||||
page_config, lambda x: x['clip']['uploaded_on'],
|
||||
compat_str)
|
||||
config_json = self._download_webpage(config_url, video_id)
|
||||
config = json.loads(config_json)
|
||||
except RegexNotFoundError:
|
||||
@@ -560,10 +583,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
self._downloader.report_warning('Cannot find video description')
|
||||
|
||||
# Extract upload date
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
video_upload_date = unified_strdate(mobj.group(1))
|
||||
if not timestamp:
|
||||
timestamp = self._search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage,
|
||||
'timestamp', default=None)
|
||||
|
||||
try:
|
||||
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
|
||||
@@ -600,15 +623,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
formats.extend(info_dict['formats'])
|
||||
self._vimeo_sort_formats(formats)
|
||||
|
||||
if not cc_license:
|
||||
cc_license = self._search_regex(
|
||||
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
|
||||
webpage, 'license', default=None, group='license')
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'upload_date': video_upload_date,
|
||||
'timestamp': unified_timestamp(timestamp),
|
||||
'description': video_description,
|
||||
'webpage_url': url,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'license': cc_license,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@@ -6,8 +6,9 @@ import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,50 +21,16 @@ class VineIE(InfoExtractor):
|
||||
'id': 'b9KOOWX7HUx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chicken.',
|
||||
'alt_title': 'Vine by Jack Dorsey',
|
||||
'alt_title': 'Vine by Jack',
|
||||
'timestamp': 1368997951,
|
||||
'upload_date': '20130519',
|
||||
'uploader': 'Jack Dorsey',
|
||||
'uploader': 'Jack',
|
||||
'uploader_id': '76',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/MYxVapFvz2z',
|
||||
'md5': '7b9a7cbc76734424ff942eb52c8f1065',
|
||||
'info_dict': {
|
||||
'id': 'MYxVapFvz2z',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14',
|
||||
'alt_title': 'Vine by Mars Ruiz',
|
||||
'upload_date': '20140815',
|
||||
'uploader': 'Mars Ruiz',
|
||||
'uploader_id': '1102363502380728320',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/bxVjBbZlPUH',
|
||||
'md5': 'ea27decea3fa670625aac92771a96b73',
|
||||
'info_dict': {
|
||||
'id': 'bxVjBbZlPUH',
|
||||
'ext': 'mp4',
|
||||
'title': '#mw3 #ac130 #killcam #angelofdeath',
|
||||
'alt_title': 'Vine by Z3k3',
|
||||
'upload_date': '20130430',
|
||||
'uploader': 'Z3k3',
|
||||
'uploader_id': '936470460173008896',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/v/e192BnZnZ9V',
|
||||
'info_dict': {
|
||||
@@ -71,6 +38,7 @@ class VineIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'ยิ้ม~ เขิน~ อาย~ น่าร้ากอ้ะ >//< @n_whitewo @orlameena #lovesicktheseries #lovesickseason2',
|
||||
'alt_title': 'Vine by Pimry_zaa',
|
||||
'timestamp': 1436057405,
|
||||
'upload_date': '20150705',
|
||||
'uploader': 'Pimry_zaa',
|
||||
'uploader_id': '1135760698325307392',
|
||||
@@ -82,43 +50,60 @@ class VineIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vine.co/v/MYxVapFvz2z',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/v/bxVjBbZlPUH',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vine.co/oembed/MYxVapFvz2z.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.POST_DATA\s*=\s*({.+?});\s*</script>',
|
||||
webpage, 'vine data'),
|
||||
video_id)
|
||||
data = self._download_json(
|
||||
'https://archive.vine.co/posts/%s.json' % video_id, video_id)
|
||||
|
||||
data = data[list(data.keys())[0]]
|
||||
|
||||
formats = [{
|
||||
'format_id': '%(format)s-%(rate)s' % f,
|
||||
'vcodec': f.get('format'),
|
||||
'quality': f.get('rate'),
|
||||
'url': f['videoUrl'],
|
||||
} for f in data['videoUrls'] if f.get('videoUrl')]
|
||||
def video_url(kind):
|
||||
for url_suffix in ('Url', 'URL'):
|
||||
format_url = data.get('video%s%s' % (kind, url_suffix))
|
||||
if format_url:
|
||||
return format_url
|
||||
|
||||
formats = []
|
||||
for quality, format_id in enumerate(('low', '', 'dash')):
|
||||
format_url = video_url(format_id.capitalize())
|
||||
if not format_url:
|
||||
continue
|
||||
# DASH link returns plain mp4
|
||||
if format_id == 'dash' and determine_ext(format_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id or 'standard',
|
||||
'quality': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
username = data.get('username')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': data.get('description') or self._og_search_title(webpage),
|
||||
'alt_title': 'Vine by %s' % username if username else self._og_search_description(webpage, default=None),
|
||||
'title': data.get('description'),
|
||||
'alt_title': 'Vine by %s' % username if username else None,
|
||||
'thumbnail': data.get('thumbnailUrl'),
|
||||
'upload_date': unified_strdate(data.get('created')),
|
||||
'timestamp': unified_timestamp(data.get('created')),
|
||||
'uploader': username,
|
||||
'uploader_id': data.get('userIdStr'),
|
||||
'view_count': int_or_none(data.get('loops', {}).get('count')),
|
||||
'like_count': int_or_none(data.get('likes', {}).get('count')),
|
||||
'comment_count': int_or_none(data.get('comments', {}).get('count')),
|
||||
'repost_count': int_or_none(data.get('reposts', {}).get('count')),
|
||||
'view_count': int_or_none(data.get('loops')),
|
||||
'like_count': int_or_none(data.get('likes')),
|
||||
'comment_count': int_or_none(data.get('comments')),
|
||||
'repost_count': int_or_none(data.get('reposts')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@@ -281,6 +281,11 @@ class VKIE(VKBaseIE):
|
||||
{
|
||||
'url': 'http://new.vk.com/video205387401_165548505',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# This video is no longer available, because its author has been blocked.
|
||||
'url': 'https://vk.com/video-10639516_456240611',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -328,6 +333,12 @@ class VKIE(VKBaseIE):
|
||||
|
||||
r'<!>Access denied':
|
||||
'Access denied to video %s.',
|
||||
|
||||
r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
|
||||
'Video %s is no longer available, because its author has been blocked.',
|
||||
|
||||
r'<!>This video is no longer available, because its author has been blocked.':
|
||||
'Video %s is no longer available, because its author has been blocked.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
|
@@ -2,17 +2,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class VLiveIE(InfoExtractor):
|
||||
@@ -169,3 +175,89 @@ class VLiveIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class VLiveChannelIE(InfoExtractor):
|
||||
IE_NAME = 'vlive:channel'
|
||||
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://channels.vlive.tv/FCD4B',
|
||||
'info_dict': {
|
||||
'id': 'FCD4B',
|
||||
'title': 'MAMAMOO',
|
||||
},
|
||||
'playlist_mincount': 110
|
||||
}
|
||||
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_code = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
|
||||
|
||||
app_id = None
|
||||
|
||||
app_js_url = self._search_regex(
|
||||
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
|
||||
webpage, 'app js', default=None, group='url')
|
||||
|
||||
if app_js_url:
|
||||
app_js = self._download_webpage(
|
||||
app_js_url, channel_code, 'Downloading app JS', fatal=False)
|
||||
if app_js:
|
||||
app_id = self._search_regex(
|
||||
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
|
||||
app_js, 'app id', default=None)
|
||||
|
||||
app_id = app_id or self._APP_ID
|
||||
|
||||
channel_info = self._download_json(
|
||||
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
|
||||
channel_code, note='Downloading decode channel code',
|
||||
query={
|
||||
'app_id': app_id,
|
||||
'channelCode': channel_code,
|
||||
'_': int(time.time())
|
||||
})
|
||||
|
||||
channel_seq = channel_info['result']['channelSeq']
|
||||
channel_name = None
|
||||
entries = []
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
video_list = self._download_json(
|
||||
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
|
||||
channel_code, note='Downloading channel list page #%d' % page_num,
|
||||
query={
|
||||
'app_id': app_id,
|
||||
'channelSeq': channel_seq,
|
||||
'maxNumOfRows': 1000,
|
||||
'_': int(time.time()),
|
||||
'pageNo': page_num
|
||||
}
|
||||
)
|
||||
|
||||
if not channel_name:
|
||||
channel_name = try_get(
|
||||
video_list,
|
||||
lambda x: x['result']['channelInfo']['channelName'],
|
||||
compat_str)
|
||||
|
||||
videos = try_get(
|
||||
video_list, lambda x: x['result']['videoList'], list)
|
||||
if not videos:
|
||||
break
|
||||
|
||||
for video in videos:
|
||||
video_id = video.get('videoSeq')
|
||||
if not video_id:
|
||||
continue
|
||||
video_id = compat_str(video_id)
|
||||
entries.append(
|
||||
self.url_result(
|
||||
'http://www.vlive.tv/video/%s' % video_id,
|
||||
ie=VLiveIE.ie_key(), video_id=video_id))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_code, channel_name)
|
||||
|
@@ -329,6 +329,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},
|
||||
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
|
||||
@@ -1028,8 +1030,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
funcname = self._search_regex(
|
||||
r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
|
||||
'Initial JS player signature function name')
|
||||
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
jsi = JSInterpreter(jscode)
|
||||
initial_function = jsi.extract_function(funcname)
|
||||
@@ -1050,6 +1053,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if player_url.startswith('//'):
|
||||
player_url = 'https:' + player_url
|
||||
elif not re.match(r'https?://', player_url):
|
||||
player_url = compat_urlparse.urljoin(
|
||||
'https://www.youtube.com', player_url)
|
||||
try:
|
||||
player_id = (player_url, self._signature_cache_id(s))
|
||||
if player_id not in self._player_cache:
|
||||
@@ -1853,13 +1859,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
|
||||
)
|
||||
(
|
||||
(?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,}
|
||||
(?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
|
||||
# Top tracks, they can also include dots
|
||||
|(?:MC)[\w\.]*
|
||||
)
|
||||
.*
|
||||
|
|
||||
((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
|
||||
((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
|
||||
)"""
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
|
||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
||||
@@ -1981,6 +1987,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -2341,18 +2350,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
|
||||
videos = []
|
||||
limit = n
|
||||
|
||||
url_query = {
|
||||
'search_query': query.encode('utf-8'),
|
||||
}
|
||||
url_query.update(self._EXTRA_QUERY_ARGS)
|
||||
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
|
||||
|
||||
for pagenum in itertools.count(1):
|
||||
url_query = {
|
||||
'search_query': query.encode('utf-8'),
|
||||
'page': pagenum,
|
||||
'spf': 'navigate',
|
||||
}
|
||||
url_query.update(self._EXTRA_QUERY_ARGS)
|
||||
result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
|
||||
data = self._download_json(
|
||||
result_url, video_id='query "%s"' % query,
|
||||
note='Downloading page %s' % pagenum,
|
||||
errnote='Unable to download API page')
|
||||
errnote='Unable to download API page',
|
||||
query={'spf': 'navigate'})
|
||||
html_content = data[1]['body']['content']
|
||||
|
||||
if 'class="search-message' in html_content:
|
||||
@@ -2364,6 +2373,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
|
||||
videos += new_videos
|
||||
if not new_videos or len(videos) > limit:
|
||||
break
|
||||
next_link = self._html_search_regex(
|
||||
r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
|
||||
html_content, 'next link', default=None)
|
||||
if next_link is None:
|
||||
break
|
||||
result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
|
||||
|
||||
if len(videos) > n:
|
||||
videos = videos[:n]
|
||||
|
@@ -216,23 +216,23 @@ def parseOpts(overrideArguments=None):
|
||||
network.add_option(
|
||||
'--source-address',
|
||||
metavar='IP', dest='source_address', default=None,
|
||||
help='Client-side IP address to bind to (experimental)',
|
||||
help='Client-side IP address to bind to',
|
||||
)
|
||||
network.add_option(
|
||||
'-4', '--force-ipv4',
|
||||
action='store_const', const='0.0.0.0', dest='source_address',
|
||||
help='Make all connections via IPv4 (experimental)',
|
||||
help='Make all connections via IPv4',
|
||||
)
|
||||
network.add_option(
|
||||
'-6', '--force-ipv6',
|
||||
action='store_const', const='::', dest='source_address',
|
||||
help='Make all connections via IPv6 (experimental)',
|
||||
help='Make all connections via IPv6',
|
||||
)
|
||||
network.add_option(
|
||||
'--geo-verification-proxy',
|
||||
dest='geo_verification_proxy', default=None, metavar='URL',
|
||||
help='Use this proxy to verify the IP address for some geo-restricted sites. '
|
||||
'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
|
||||
'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading.'
|
||||
)
|
||||
network.add_option(
|
||||
'--cn-verification-proxy',
|
||||
@@ -297,7 +297,7 @@ def parseOpts(overrideArguments=None):
|
||||
'--match-filter',
|
||||
metavar='FILTER', dest='match_filter', default=None,
|
||||
help=(
|
||||
'Generic video filter (experimental). '
|
||||
'Generic video filter. '
|
||||
'Specify any key (see help for -o for a list of available keys) to'
|
||||
' match if the key is present, '
|
||||
'!key to check if the key is not present,'
|
||||
@@ -450,7 +450,7 @@ def parseOpts(overrideArguments=None):
|
||||
'--skip-unavailable-fragments',
|
||||
action='store_true', dest='skip_unavailable_fragments', default=True,
|
||||
help='Skip unavailable fragments (DASH and hlsnative only)')
|
||||
general.add_option(
|
||||
downloader.add_option(
|
||||
'--abort-on-unavailable-fragment',
|
||||
action='store_false', dest='skip_unavailable_fragments',
|
||||
help='Abort downloading when some fragment is not available')
|
||||
@@ -470,6 +470,10 @@ def parseOpts(overrideArguments=None):
|
||||
'--playlist-reverse',
|
||||
action='store_true',
|
||||
help='Download playlist videos in reverse order')
|
||||
downloader.add_option(
|
||||
'--playlist-random',
|
||||
action='store_true',
|
||||
help='Download playlist videos in random order')
|
||||
downloader.add_option(
|
||||
'--xattr-set-filesize',
|
||||
dest='xattr_set_filesize', action='store_true',
|
||||
@@ -661,8 +665,12 @@ def parseOpts(overrideArguments=None):
|
||||
help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
|
||||
filesystem.add_option(
|
||||
'--autonumber-size',
|
||||
dest='autonumber_size', metavar='NUMBER',
|
||||
help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
|
||||
dest='autonumber_size', metavar='NUMBER', default=5, type=int,
|
||||
help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given (default is %default)')
|
||||
filesystem.add_option(
|
||||
'--autonumber-start',
|
||||
dest='autonumber_start', metavar='NUMBER', default=1, type=int,
|
||||
help='Specify the start value for %(autonumber)s (default is %default)')
|
||||
filesystem.add_option(
|
||||
'--restrict-filenames',
|
||||
action='store_true', dest='restrictfilenames', default=False,
|
||||
|
@@ -2103,11 +2103,18 @@ def strip_jsonp(code):
|
||||
|
||||
|
||||
def js_to_json(code):
|
||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
|
||||
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
|
||||
INTEGER_TABLE = (
|
||||
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
|
||||
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
||||
)
|
||||
|
||||
def fix_kv(m):
|
||||
v = m.group(0)
|
||||
if v in ('true', 'false', 'null'):
|
||||
return v
|
||||
elif v.startswith('/*') or v == ',':
|
||||
elif v.startswith('/*') or v.startswith('//') or v == ',':
|
||||
return ""
|
||||
|
||||
if v[0] in ("'", '"'):
|
||||
@@ -2118,11 +2125,6 @@ def js_to_json(code):
|
||||
'\\x': '\\u00',
|
||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
||||
|
||||
INTEGER_TABLE = (
|
||||
(r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
|
||||
(r'^(0+[0-7]+)\s*:?$', 8),
|
||||
)
|
||||
|
||||
for regex, base in INTEGER_TABLE:
|
||||
im = re.match(regex, v)
|
||||
if im:
|
||||
@@ -2134,11 +2136,11 @@ def js_to_json(code):
|
||||
return re.sub(r'''(?sx)
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
||||
/\*.*?\*/|,(?=\s*[\]}])|
|
||||
{comment}|,(?={skip}[\]}}])|
|
||||
[a-zA-Z_][.a-zA-Z_0-9]*|
|
||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
|
||||
[0-9]+(?=\s*:)
|
||||
''', fix_kv, code)
|
||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
||||
[0-9]+(?={skip}:)
|
||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
||||
|
||||
|
||||
def qualities(quality_ids):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.01.28'
|
||||
__version__ = '2017.02.04.1'
|
||||
|
Reference in New Issue
Block a user