Compare commits

...

103 Commits

Author SHA1 Message Date
Sergey M․
4372436504 release 2017.04.09 2017-04-09 00:01:28 +07:00
Sergey M․
eb8cc8ea3b [ChangeLog] Actualize 2017-04-08 23:59:17 +07:00
Sergey M․
41b263ac8a [canvas] Add IE_DESC (closes #12478) 2017-04-08 22:45:45 +07:00
Sergey M․
ca8fca9d9d [vrt] Add IE_DESC (closes #12477) 2017-04-08 22:44:31 +07:00
midas02
e129fa0846 [vier] Add IE_DESC 2017-04-08 22:43:29 +07:00
Sergey M․
2bd875edfe [medici] Add extractor (closes #3406) 2017-04-08 22:38:37 +07:00
Steven Maude
95152630db [rbmaradio] Add support for redbullradio.com URLs 2017-04-08 21:39:07 +07:00
Sergey M․
04e431cf97 [npo:live] Improve (closes #12555) 2017-04-08 21:31:22 +07:00
Aldo Gunsing
1591ba258a [npo:live] Add support for default url 2017-04-08 21:30:38 +07:00
Sergey M․
29c6726646 [mixcloud] Fix view count extraction and modernize 2017-04-08 21:11:08 +07:00
Sergey M․
a66e25859a [mixcloud:playlist] Relax title extraction and fix description extraction (closes #12582) 2017-04-08 21:04:09 +07:00
Kfir Breger
c93c0fc2fd [mixcloud:playlist] Fix title extraction 2017-04-08 20:47:07 +07:00
Sergey M․
90e3f18fc1 [thesun] Extract playlists (closes #11298, closes #12674) 2017-04-08 20:08:31 +07:00
Entropy
5f3e0b69ef [TheSun] Add new extractor 2017-04-08 19:54:04 +07:00
Sergey M․
28b674ca23 [ceskateleveize:porady] Add extractor (closes #7411, closes #12645) 2017-04-08 19:46:42 +07:00
Sergey M․
e18f1da97a [ceskateleveize] Improve extraction and remove URL replacement hacks 2017-04-08 19:41:14 +07:00
Sergey M․
78280352ca [kaltura] Cleanup regexes from redundant escaping 2017-04-08 16:48:27 +07:00
John Hawkinson
a01825a541 [kaltura] Add support for iframe embeds 2017-04-08 16:40:57 +07:00
Sergey M․
f8f2da25ab [wshh] Extract html5 entries and delegate to generic extractor (closes #12676) 2017-04-08 16:01:56 +07:00
Yen Chi Hsuan
4c03973296 [airmozilla] Fix extraction (closes #12670) 2017-04-08 15:39:58 +08:00
Sergey M․
60e5016199 [test_download] Remove unused import 2017-04-08 14:19:01 +07:00
Sergey M․
c4d6fc6d65 [test_subtitles] Fix raiplay test 2017-04-08 14:11:20 +07:00
Sergey M․
1b3feca0a7 [raiplay] Extract subtitles 2017-04-08 14:11:03 +07:00
Sergey M․
80b2fdf9ac [test_download] Match info dicts against tests before matching info file 2017-04-08 14:10:12 +07:00
John Hawkinson
3bef10a50c [test_download] typo in comment 2017-04-08 13:07:25 +07:00
John Hawkinson
a84da06f49 [test_download] Improve diagnostic on wrong 'id' 2017-04-08 13:07:25 +07:00
Sergey M․
3461f5db06 [xfileshare] Add support for vidlo.us (closes #12660) 2017-04-06 23:39:09 +07:00
Sergey M․
0378b8b917 [xfileshare] Add support for vidbom.com (closes #12661) 2017-04-06 23:38:16 +07:00
Sergey M․
7f04386b89 [aenetworks] Add more video URL regexes (closes #12657) 2017-04-06 02:36:48 +07:00
Remita Amine
fac39cccd4 [odnoklassniki] fix format sorting for 1080p quality 2017-04-03 23:39:56 +01:00
Remita Amine
b68e00b08a [rtl2] add support for you.rtl2.de(closes #10257) 2017-04-03 21:36:35 +01:00
Sergey M․
2ab0bfcd81 [vshare] Add extractor (closes #12278) 2017-04-04 03:05:18 +07:00
Sergey M․
b022f4f600 release 2017.04.03 2017-04-03 03:53:55 +07:00
Sergey M․
e2435ba5f3 [ChangeLog] Actualize 2017-04-03 03:52:44 +07:00
Remita Amine
a9bb61a425 [discoveryvr] Add new extractor(closes #12578) 2017-04-02 09:22:09 +01:00
Remita Amine
dbf70c489f [tv5mondeplus] clean description and use stable id 2017-04-02 00:26:48 +01:00
Remita Amine
61e2331ad8 [tv5mondeplus] Add new extractor(closes #11386) 2017-04-01 23:49:40 +01:00
Sergey M․
fd47550885 [extractor/common] Add coding cookie 2017-04-02 04:42:10 +07:00
Sergey M․
4457823dda [extractor/common] Move censorship checks to a separate method and add check for just another ISP 2017-04-02 03:57:44 +07:00
Sergey M․
b3633fa0ce [pericope] Add support for pscp.tv URLs 2017-04-02 03:20:28 +07:00
Sergey M․
b56e41a701 release 2017.04.02 2017-04-02 02:39:15 +07:00
Sergey M․
a76c25146a [ChangeLog] Actualize 2017-04-02 02:37:18 +07:00
Sergey M․
361f293ab8 [rai] Skip not found content item id 2017-04-02 02:24:13 +07:00
Sergey M․
b8d8cced9b [rai] Improve extraction (closes #11790)
* Fix georestriction detection
* Detect live streams
+ Extract relinker metadata
* Improve ContentItem detection
+ Extract series metadata
* Fix tests
2017-04-02 02:14:42 +07:00
Timendum
51342717cd [rai] Fix extraction 2017-04-02 02:10:53 +07:00
Remita Amine
48ab554feb [vrv] add support for series pages 2017-04-01 18:09:36 +01:00
Remita Amine
a6f3a162f3 [limelight] improve extraction for audio only formats 2017-04-01 15:35:39 +01:00
Remita Amine
91399b2fcc [funimation] fix extraction(closes #10696)(#11773) 2017-04-01 13:33:04 +01:00
Sergey M․
eecea00d36 [xfileshare] Add support for vidabc.com (closes #12589) 2017-04-01 18:56:35 +07:00
Sergey M․
2cd668ee59 [xfileshare] Improve extraction and extract hls formats 2017-04-01 18:55:48 +07:00
Remita Amine
ca77b92f94 [crunchyroll] pass geo verifcation proxy 2017-04-01 09:33:23 +01:00
Remita Amine
e97fc8d6b8 [cwtv] extract ISM formats 2017-04-01 07:50:24 +01:00
Remita Amine
be61efdf17 [tvplay] Bypass geo restriction 2017-04-01 07:26:40 +01:00
Remita Amine
77c8ebe631 [vrv] Add new extractor 2017-03-31 23:29:23 +01:00
Sergey M․
7453999580 [packtpub] Add extractor (closes #12610) 2017-04-01 00:25:27 +07:00
Sergey M․
1640eb0961 [YoutubeDL] Return early when extraction of url_transparent fails 2017-03-31 23:57:35 +07:00
Remita Amine
3e943cfe09 [generic] pass base_url to _parse_jwplayer_data 2017-03-31 14:54:06 +01:00
Remita Amine
82be732b17 [adn] Add new extractor 2017-03-31 12:24:23 +01:00
Sergey M․
639e5b2a84 [allocine] Extract more metadata 2017-03-29 04:43:12 +07:00
plroman
128244657b [allocine] Fix extraction 2017-03-29 05:23:20 +08:00
Sergey M․
12ee65ea0d [options] Mention ISM for --fragment-retries and --skip-unavailable-fragments 2017-03-28 23:35:48 +07:00
Tithen-Firion
aea1dccbd0 [openload] fix extractor 2017-03-29 00:00:09 +08:00
Sergey M․
9e691da067 release 2017.03.26 2017-03-26 08:11:40 +07:00
Sergey M․
82eefd0be0 [ChangeLog] Actualize 2017-03-26 23:39:12 +07:00
Yen Chi Hsuan
f7923a4c39 [ChangeLog] Update after #12307 2017-03-26 22:07:12 +08:00
Yen Chi Hsuan
cc63259d18 Merge pull request #12307 from rndusr/fix/str-item-assignment
Fix "'str' object does not support item assignment"
2017-03-26 21:51:09 +08:00
Remita Amine
2bfaf89b6c [downloader/hls] move check for m3u8 live streams to get_suitable_downloader 2017-03-25 23:07:05 +01:00
Random User
4f06c1c9fc Merge branch 'master' of github.com-rndusr:rg3/youtube-dl into fix/str-item-assignment 2017-03-25 21:36:59 +01:00
Sergey M․
942b44a052 [test_compat] Do not use dash in env variables' names 2017-03-26 03:24:25 +07:00
Sergey M․
a426ef6d78 [test_utils] Do not use dash in env variables' names 2017-03-26 03:22:48 +07:00
Sergey M․
41c5e60dd5 [test_utils] Fix expand_path tests 2017-03-26 03:07:56 +07:00
Sergey M․
d212c93d16 [pluralsight] PEP 8 2017-03-26 02:34:25 +07:00
Sergey M․
15495cf3e5 [franceculture] PEP 8 2017-03-26 02:32:46 +07:00
Sergey M․
5b7cc56b05 [atresplayer] PEP 8 2017-03-26 02:32:14 +07:00
Sergey M․
590bc6f6a1 Use expand_path where appropriate (closes #12556) 2017-03-26 02:31:16 +07:00
Sergey M․
51098426b8 [utils] Introduce expand_path 2017-03-26 02:30:10 +07:00
Random User
c73e330e7a _find_jwplayer_data() returns dict or None
This simplifies code for callers of `_find_jwplayer_data()` which no longer have
to run `_parse_json()` on the return value.

It also makes sure that `_find_jwplayer_data()` returns either a `dict` or
`None` and nothing else.
2017-03-25 19:38:30 +01:00
Remita Amine
fb4fc44928 [downloader/hls] immediately delegate downloading to ffmpeg in case live stream 2017-03-25 19:38:23 +01:00
Random User
03486dbb01 Add test for JWPlayer where config is passed as variable 2017-03-25 19:37:45 +01:00
Yen Chi Hsuan
51ef4919df [afreecatv] Fix extraction (closes #12179) 2017-03-26 01:32:07 +08:00
Remita Amine
d66d43c554 [atvat] Add new extractor(closes #5325) 2017-03-25 18:13:58 +01:00
Sergey M․
610a6d1053 [atresplayer] Do not extract ISM formats
As per @remitamine: the ISM downloader does not support videos served from wowza servers(it will produce broken files)
2017-03-25 21:40:54 +07:00
John Hawkinson
c6c22e984d [test_download] Print additional IEs in summary output 2017-03-25 22:36:40 +08:00
Remita Amine
d97729c83a [fox] remove unused import 2017-03-25 14:28:53 +01:00
gkoelln
7aa0ee321b [fox] Add metadata extraction
Add series, season number, episode number and episode.
2017-03-25 21:12:25 +08:00
John Hawkinson
e8e4cc5a6a [generic] Replace LazyYT test with skiplagged
discourse.ubuntu.com has gone away, repalce with skiplagged.com.
Be nice to have a non-frontpage URL that might be more stable,
though I don't have one. Maybe this should move to html
in test/test_InfoExtractor.py?
2017-03-25 19:53:32 +07:00
Sergey M․
c7301e677b [atresplayer] Extract DASH and ISM formats 2017-03-25 18:03:46 +07:00
zurfyx
048086920b [atresplayer] Extract HD manifest 2017-03-25 17:52:04 +07:00
zurfyx
1088d76da6 [atresplayer] Fix login error detection 2017-03-25 17:47:35 +07:00
Remita Amine
31a1214076 [franceculture] fix extraction(closes #12547) 2017-03-25 07:04:48 +01:00
Sergey M․
d0ba55871e [youtube] Improve _VALID_URLs (closes #12538) 2017-03-25 01:18:33 +07:00
Sergey M․
54b960f340 [generic] Do not follow redirects to the same URL 2017-03-24 00:45:24 +07:00
Sergey M․
a3ccd6bd11 release 2017.03.24 2017-03-24 00:24:23 +07:00
Sergey M․
7963b6cba8 [ChangeLog] Actualize 2017-03-24 00:19:58 +07:00
Sergey M․
bea7af6947 [channel9] Remove expired comment and sort imports 2017-03-23 23:58:12 +07:00
Sergey M․
a5d783f525 [channel9] Extract more formats 2017-03-23 23:47:43 +07:00
Remita Amine
d0572557c2 [ninecninemedia] remove mp4 url extraction request 2017-03-23 13:53:07 +01:00
Remita Amine
52d5ecabd5 [bellmedia] add support for etalk.ca(closes #12447) 2017-03-23 13:52:45 +01:00
Remita Amine
b0f7f21cb9 [channel9] fix extraction(closes #11323) 2017-03-23 09:22:37 +01:00
Sergey M․
579c99a284 [cloudy] Fix extraction (closes #12525) 2017-03-22 23:48:06 +07:00
Remita Amine
ca5ed022e9 [hbo] add support for free episode urls and new formats extraction(closes #12519) 2017-03-22 17:28:53 +01:00
Sergey M․
391d076d7c [condenast] Fix extraction and style (closes #12526) 2017-03-22 23:22:14 +07:00
Sergey M․
c183e14f89 [viu] Relax _VALID_URL (closes #12529) 2017-03-22 22:26:59 +07:00
69 changed files with 2314 additions and 1034 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.03.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.03.22**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.09**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.03.22
[debug] youtube-dl version 2017.04.09
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,3 +1,94 @@
version 2017.04.09
Extractors
+ [medici] Add support for medici.tv (#3406)
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
+ [npo:live] Add support for default URL (#12555)
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
* [ceskateleveize] Improve extraction and remove URL replacement hacks
+ [kaltura] Add support for iframe embeds (#12679)
* [airmozilla] Fix extraction (#12670)
* [wshh] Extract html5 entries and delegate to generic extractor (12676)
+ [raiplay] Extract subtitles
+ [xfileshare] Add support for vidlo.us (#12660)
+ [xfileshare] Add support for vidbom.com (#12661)
+ [aenetworks] Add more video URL regular expressions (#12657)
+ [odnoklassniki] Fix format sorting for 1080p quality
+ [rtl2] Add support for you.rtl2.de (#10257)
+ [vshare] Add support for vshare.io (#12278)
version 2017.04.03
Core
+ [extractor/common] Add censorship check for TransTelekom ISP
* [extractor/common] Move censorship checks to a separate method
Extractors
+ [discoveryvr] Add support for discoveryvr.com (#12578)
+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
version 2017.04.02
Core
* [YoutubeDL] Return early when extraction of url_transparent fails
Extractors
* [rai] Fix and improve extraction (#11790)
+ [vrv] Add support for series pages
* [limelight] Improve extraction for audio only formats
* [funimation] Fix extraction (#10696, #11773)
+ [xfileshare] Add support for vidabc.com (#12589)
+ [xfileshare] Improve extraction and extract hls formats
+ [crunchyroll] Pass geo verifcation proxy
+ [cwtv] Extract ISM formats
+ [tvplay] Bypass geo restriction
+ [vrv] Add support for vrv.co
+ [packtpub] Add support for packtpub.com (#12610)
+ [generic] Pass base_url to _parse_jwplayer_data
+ [adn] Add support for animedigitalnetwork.fr (#4866)
+ [allocine] Extract more metadata
* [allocine] Fix extraction (#12592)
* [openload] Fix extraction
version 2017.03.26
Core
* Don't raise an error if JWPlayer config data is not a Javascript object
literal. _find_jwplayer_data now returns a dict rather than an str. (#12307)
* Expand environment variables for options representing paths (#12556)
+ [utils] Introduce expand_path
* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams
Extractors
* [afreecatv] Fix extraction (#12179)
+ [atvat] Add support for atv.at (#5325)
+ [fox] Add metadata extraction (#12391)
+ [atresplayer] Extract DASH formats
+ [atresplayer] Extract HD manifest (#12548)
* [atresplayer] Fix login error detection (#12548)
* [franceculture] Fix extraction (#12547)
* [youtube] Improve URL regular expression (#12538)
* [generic] Do not follow redirects to the same URL
version 2017.03.24
Extractors
- [9c9media] Remove mp4 URL extraction request
+ [bellmedia] Add support for etalk.ca and space.ca (#12447)
* [channel9] Fix extraction (#11323)
* [cloudy] Fix extraction (#12525)
+ [hbo] Add support for free episode URLs and new formats extraction (#12519)
* [condenast] Fix extraction and style (#12526)
* [viu] Relax URL regular expression (#12529)
version 2017.03.22
Extractors

View File

@@ -181,10 +181,10 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
-R, --retries RETRIES Number of retries (default is 10), or
"infinite".
--fragment-retries RETRIES Number of retries for a fragment (default
is 10), or "infinite" (DASH and hlsnative
only)
--skip-unavailable-fragments Skip unavailable fragments (DASH and
hlsnative only)
is 10), or "infinite" (DASH, hlsnative and
ISM)
--skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative
and ISM)
--abort-on-unavailable-fragment Abort downloading when some fragment is not
available
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)

View File

@@ -28,6 +28,7 @@
- **acast**
- **acast:channel**
- **AddAnime**
- **ADN**: Anime Digital Network
- **AdobeTV**
- **AdobeTVChannel**
- **AdobeTVShow**
@@ -67,6 +68,7 @@
- **arte.tv:playlist**
- **AtresPlayer**
- **ATTTechChannel**
- **ATVAt**
- **AudiMedia**
- **AudioBoom**
- **audiomack**
@@ -125,7 +127,7 @@
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas**
- **Canvas**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
@@ -143,6 +145,7 @@
- **CCTV**: 央视网
- **CDA**
- **CeskaTelevize**
- **CeskaTelevizePorady**
- **channel9**: Channel 9
- **CharlieRose**
- **Chaturbate**
@@ -211,6 +214,7 @@
- **DiscoveryGo**
- **DiscoveryGoPlaylist**
- **DiscoveryNetworksDe**
- **DiscoveryVR**
- **Disney**
- **Dotsub**
- **DouyuTV**: 斗鱼
@@ -312,8 +316,8 @@
- **GPUTechConf**
- **Groupon**
- **Hark**
- **HBO**
- **HBOEpisode**
- **hbo**
- **hbo:episode**
- **HearThisAt**
- **Heise**
- **HellPorno**
@@ -428,6 +432,7 @@
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Medialaan**
- **Medici**
- **Meipai**: 美拍
- **MelonVOD**
- **META**
@@ -571,6 +576,8 @@
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
- **PacktPub**
- **PacktPubCourse**
- **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV
- **parliamentlive.tv**: UK parliament videos
@@ -628,7 +635,7 @@
- **radiofrance**
- **RadioJavan**
- **Rai**
- **RaiTV**
- **RaiPlay**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
@@ -653,7 +660,9 @@
- **rte**: Raidió Teilifís Éireann TV
- **rte:radio**: Raidió Teilifís Éireann radio
- **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2**
- **rtl2**
- **rtl2:you**
- **rtl2:you:series**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
@@ -775,6 +784,7 @@
- **TheScene**
- **TheSixtyOne**
- **TheStar**
- **TheSun**
- **TheWeatherChannel**
- **ThisAmericanLife**
- **ThisAV**
@@ -811,6 +821,7 @@
- **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
- **TVANouvelles**
- **TVANouvellesArticle**
@@ -887,7 +898,7 @@
- **vidme:user**
- **vidme:user:likes**
- **Vidzi**
- **vier**
- **vier**: vier.be and vijf.be
- **vier:videos**
- **ViewLift**
- **ViewLiftEmbed**
@@ -924,7 +935,10 @@
- **Vporn**
- **vpro**: npo.nl and ntr.nl
- **Vrak**
- **VRT**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **vrv**
- **vrv:series**
- **VShare**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
@@ -952,7 +966,7 @@
- **WSJ**: Wall Street Journal
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
- **XHamster**
- **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑

View File

@@ -27,11 +27,11 @@ from youtube_dl.compat import (
class TestCompat(unittest.TestCase):
def test_compat_getenv(self):
test_str = 'тест'
compat_setenv('YOUTUBE-DL-TEST', test_str)
self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)
compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
def test_compat_setenv(self):
test_var = 'YOUTUBE-DL-TEST'
test_var = 'YOUTUBE_DL_COMPAT_SETENV'
test_str = 'тест'
compat_setenv(test_var, test_str)
compat_getenv(test_var)

View File

@@ -71,6 +71,18 @@ class TestDownload(unittest.TestCase):
maxDiff = None
def __str__(self):
"""Identify each test with the `add_ie` attribute, if available."""
def strclass(cls):
"""From 2.7's unittest; 2.6 had _strclass so we can't import it."""
return '%s.%s' % (cls.__module__, cls.__name__)
add_ie = getattr(self, self._testMethodName).add_ie
return '%s (%s)%s:' % (self._testMethodName,
strclass(self.__class__),
' [%s]' % add_ie if add_ie else '')
def setUp(self):
self.defs = defs
@@ -139,7 +151,7 @@ def generator(test_case, tname):
try_num = 1
while True:
try:
# We're not using .download here sine that is just a shim
# We're not using .download here since that is just a shim
# for outside error handling, and returns the exit code
# instead of the result dict.
res_dict = ydl.extract_info(
@@ -187,7 +199,9 @@ def generator(test_case, tname):
self.assertEqual(
test_case['playlist_duration_sum'], got_duration)
for tc in test_cases:
for tc_num, tc in enumerate(test_cases):
tc_res_dict = res_dict['entries'][tc_num] if is_playlist else res_dict
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
@@ -233,6 +247,8 @@ for n, test_case in enumerate(defs):
i += 1
test_method = generator(test_case, tname)
test_method.__name__ = str(tname)
ie_list = test_case.get('add_ie')
test_method.add_ie = ie_list and ','.join(ie_list)
setattr(TestDownload, test_method.__name__, test_method)
del test_method

View File

@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
NPOIE,
ComedyCentralIE,
NRKTVIE,
RaiTVIE,
RaiPlayIE,
VikiIE,
ThePlatformIE,
ThePlatformFeedIE,
@@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
class TestRaiSubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiTVIE
class TestRaiPlaySubtitles(BaseTestSubtitles):
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiPlayIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True

View File

@@ -56,6 +56,7 @@ from youtube_dl.utils import (
read_batch_urls,
sanitize_filename,
sanitize_path,
expand_path,
prepend_extension,
replace_extension,
remove_start,
@@ -95,6 +96,8 @@ from youtube_dl.utils import (
from youtube_dl.compat import (
compat_chr,
compat_etree_fromstring,
compat_getenv,
compat_setenv,
compat_urlparse,
compat_parse_qs,
)
@@ -214,6 +217,18 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_path('./abc'), 'abc')
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
def test_expand_path(self):
def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
self.assertEqual(expand_path('~'), compat_getenv('HOME'))
self.assertEqual(
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
'%s/expanded' % compat_getenv('HOME'))
def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')

View File

@@ -29,7 +29,6 @@ import random
from .compat import (
compat_basestring,
compat_cookiejar,
compat_expanduser,
compat_get_terminal_size,
compat_http_client,
compat_kwargs,
@@ -54,6 +53,7 @@ from .utils import (
encode_compat_str,
encodeFilename,
error_to_compat_str,
expand_path,
ExtractorError,
format_bytes,
formatSeconds,
@@ -672,7 +672,7 @@ class YoutubeDL(object):
FORMAT_RE.format(numeric_field),
r'%({0})s'.format(numeric_field), outtmpl)
tmpl = compat_expanduser(outtmpl)
tmpl = expand_path(outtmpl)
filename = tmpl % template_dict
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
@@ -837,6 +837,12 @@ class YoutubeDL(object):
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
# extract_info may return None when ignoreerrors is enabled and
# extraction failed with an error, don't crash and return early
# in this case
if not info:
return info
force_properties = dict(
(k, v) for k, v in ie_result.items() if v is not None)
for f in ('_type', 'url', 'ie_key'):
@@ -2170,7 +2176,7 @@ class YoutubeDL(object):
if opts_cookiefile is None:
self.cookiejar = compat_cookiejar.CookieJar()
else:
opts_cookiefile = compat_expanduser(opts_cookiefile)
opts_cookiefile = expand_path(opts_cookiefile)
self.cookiejar = compat_cookiejar.MozillaCookieJar(
opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):

View File

@@ -16,7 +16,6 @@ from .options import (
parseOpts,
)
from .compat import (
compat_expanduser,
compat_getpass,
compat_shlex_split,
workaround_optparse_bug9161,
@@ -26,6 +25,7 @@ from .utils import (
decodeOption,
DEFAULT_OUTTMPL,
DownloadError,
expand_path,
match_filter_func,
MaxDownloadsReached,
preferredencoding,
@@ -88,7 +88,7 @@ def _real_main(argv=None):
batchfd = sys.stdin
else:
batchfd = io.open(
compat_expanduser(opts.batchfile),
expand_path(opts.batchfile),
'r', encoding='utf-8', errors='ignore')
batch_urls = read_batch_urls(batchfd)
if opts.verbose:
@@ -238,7 +238,7 @@ def _real_main(argv=None):
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
any_printing = opts.print_json
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
# PostProcessors
postprocessors = []
@@ -449,7 +449,7 @@ def _real_main(argv=None):
try:
if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename))
retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
else:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:

View File

@@ -8,8 +8,11 @@ import re
import shutil
import traceback
from .compat import compat_expanduser, compat_getenv
from .utils import write_json_file
from .compat import compat_getenv
from .utils import (
expand_path,
write_json_file,
)
class Cache(object):
@@ -21,7 +24,7 @@ class Cache(object):
if res is None:
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'youtube-dl')
return compat_expanduser(res)
return expand_path(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \

View File

@@ -43,6 +43,9 @@ def get_suitable_downloader(info_dict, params={}):
if ed.can_download(info_dict):
return ed
if protocol.startswith('m3u8') and info_dict.get('is_live'):
return FFmpegFD
if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
return HlsFD

136
youtube_dl/extractor/adn.py Normal file
View File

@@ -0,0 +1,136 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import json
import os
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..compat import compat_ord
from ..utils import (
bytes_to_intlist,
ExtractorError,
float_or_none,
intlist_to_bytes,
srt_subtitles_timecode,
strip_or_none,
)
class ADNIE(InfoExtractor):
IE_DESC = 'Anime Digital Network'
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'md5': 'e497370d847fd79d9d4c74be55575c7a',
'info_dict': {
'id': '7778',
'ext': 'mp4',
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
}
}
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
return None
enc_subtitles = self._download_webpage(
'http://animedigitalnetwork.fr/' + sub_path,
video_id, fatal=False)
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\xb5@\xcfq\xa3\x98"N\xe4\xf3\x12\x98}}\x16\xd8'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
dec_subtitles[:-compat_ord(dec_subtitles[-1])],
None, fatal=False)
if not subtitles_json:
return None
subtitles = {}
for sub_lang, sub in subtitles_json.items():
srt = ''
for num, current in enumerate(sub):
start, end, text = (
float_or_none(current.get('startTime')),
float_or_none(current.get('endTime')),
current.get('text'))
if start is None or end is None or text is None:
continue
srt += os.linesep.join(
(
'%d' % num,
'%s --> %s' % (
srt_subtitles_timecode(start),
srt_subtitles_timecode(end)),
text,
os.linesep,
))
if sub_lang == 'vostf':
sub_lang = 'fr'
subtitles.setdefault(sub_lang, []).extend([{
'ext': 'json',
'data': json.dumps(sub),
}, {
'ext': 'srt',
'data': srt,
}])
return subtitles
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(self._search_regex(
r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
video_info = {}
video_info_str = self._search_regex(
r'videoInfo\s*=\s*({.+});', webpage,
'video info', fatal=False)
if video_info_str:
video_info = self._parse_json(
video_info_str, video_id, fatal=False) or {}
options = player_config.get('options') or {}
metas = options.get('metas') or {}
title = metas.get('title') or video_info['title']
links = player_config.get('links') or {}
formats = []
for format_id, qualities in links.items():
for load_balancer_url in qualities.values():
load_balancer_data = self._download_json(
load_balancer_url, video_id, fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
continue
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False)
if format_id == 'vf':
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
error = options.get('error')
if not formats and error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
'thumbnail': video_info.get('image'),
'formats': formats,
'subtitles': self.extract_subtitles(player_config.get('subtitles'), video_id),
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
'series': video_info.get('playlistTitle'),
}

View File

@@ -107,7 +107,10 @@ class AENetworksIE(AENetworksBaseIE):
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
[r"media_url\s*=\s*'(?P<url>[^']+)'",
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)

View File

@@ -4,15 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..compat import compat_xpath
from ..utils import (
ExtractorError,
int_or_none,
update_url_query,
xpath_element,
xpath_text,
)
@@ -43,7 +38,8 @@ class AfreecaTVIE(InfoExtractor):
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160503',
}
},
'skip': 'Video is gone',
}, {
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
'info_dict': {
@@ -71,6 +67,19 @@ class AfreecaTVIE(InfoExtractor):
'upload_date': '20160502',
},
}],
'skip': 'Video is gone',
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'flv',
'uploader': '윈아디',
'uploader_id': 'badkids',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
},
'params': {
'skip_download': True, # requires rtmpdump
},
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
@@ -90,40 +99,33 @@ class AfreecaTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
parsed_url = compat_urllib_parse_urlparse(url)
info_url = compat_urlparse.urlunparse(parsed_url._replace(
netloc='afbbs.afreecatv.com:8080',
path='/api/video/get_video_info.php'))
video_xml = self._download_xml(
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, query={'nTitleNo': video_id})
if xpath_element(video_xml, './track/video/file') is None:
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
if video_element is None or video_element.text is None:
raise ExtractorError('Specified AfreecaTV video does not exist',
expected=True)
title = xpath_text(video_xml, './track/title', 'title')
video_url_raw = video_element.text
app, playpath = video_url_raw.split('mp4:')
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(video_xml, './track/duration',
'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
entries = []
for i, video_file in enumerate(video_xml.findall('./track/video/file')):
video_key = self.parse_video_key(video_file.get('key', ''))
if not video_key:
continue
entries.append({
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
'title': title,
'upload_date': video_key.get('upload_date'),
'duration': int_or_none(video_file.get('duration')),
'url': video_file.text,
})
info = {
return {
'id': video_id,
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
'title': title,
'uploader': uploader,
'uploader_id': uploader_id,
@@ -131,20 +133,6 @@ class AfreecaTVIE(InfoExtractor):
'thumbnail': thumbnail,
}
if len(entries) > 1:
info['_type'] = 'multi_video'
info['entries'] = entries
elif len(entries) == 1:
info['url'] = entries[0]['url']
info['upload_date'] = entries[0].get('upload_date')
else:
raise ExtractorError(
'No files found for the specified AfreecaTV video, either'
' the URL is incorrect or the video has been made private.',
expected=True)
return info
class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global'

View File

@@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'md5': '8d02f53ee39cf006009180e21df1f3ba',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
'thumbnail': r're:https?://.*/poster\.jpg',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
@@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
jwconfig = self._parse_json(self._search_regex(
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
@@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
return {
info_dict.update({
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage),
'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}
})
return info_dict

View File

@@ -2,9 +2,13 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
remove_end,
int_or_none,
qualities,
remove_end,
try_get,
unified_timestamp,
url_basename,
)
@@ -22,6 +26,10 @@ class AllocineIE(InfoExtractor):
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': r're:http://.*\.jpg',
'duration': 39,
'timestamp': 1404273600,
'upload_date': '20140702',
'view_count': int,
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
@@ -33,6 +41,10 @@ class AllocineIE(InfoExtractor):
'title': 'Planes 2 Bande-annonce VF',
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
'thumbnail': r're:http://.*\.jpg',
'duration': 69,
'timestamp': 1385659800,
'upload_date': '20131128',
'view_count': int,
},
}, {
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
@@ -44,6 +56,10 @@ class AllocineIE(InfoExtractor):
'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
'thumbnail': r're:http://.*\.jpg',
'duration': 144,
'timestamp': 1397589900,
'upload_date': '20140415',
'view_count': int,
},
}, {
'url': 'http://www.allocine.fr/video/video-19550147/',
@@ -69,34 +85,37 @@ class AllocineIE(InfoExtractor):
r'data-model="([^"]+)"', webpage, 'data model', default=None)
if model:
model_data = self._parse_json(model, display_id)
for video_url in model_data['sources'].values():
video = model_data['videos'][0]
title = video['title']
for video_url in video['sources'].values():
video_id, format_id = url_basename(video_url).split('_')[:2]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': video_url,
})
title = model_data['title']
duration = int_or_none(video.get('duration'))
view_count = int_or_none(video.get('view_count'))
timestamp = unified_timestamp(try_get(
video, lambda x: x['added_at']['date'], compat_str))
else:
video_id = display_id
media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
title = remove_end(
self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
' - AlloCiné')
for key, value in media_data['video'].items():
if not key.endswith('Path'):
continue
format_id = key[:-len('Path')]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
'url': value,
})
title = remove_end(self._html_search_regex(
r'(?s)<title>(.+?)</title>', webpage, 'title'
).strip(), ' - AlloCiné')
duration, view_count, timestamp = [None] * 3
self._sort_formats(formats)
@@ -104,7 +123,10 @@ class AllocineIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'formats': formats,
}

View File

@@ -93,8 +93,7 @@ class ArkenaIE(InfoExtractor):
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
if kind == 'm3u8' or 'm3u8' in exts:
formats.extend(self._extract_m3u8_formats(
f_url, video_id, 'mp4',
entry_protocol='m3u8' if is_live else 'm3u8_native',
f_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=kind, fatal=False, live=is_live))
elif kind == 'flash' or 'f4m' in exts:
formats.extend(self._extract_f4m_formats(

View File

@@ -90,7 +90,8 @@ class AtresPlayerIE(InfoExtractor):
request, None, 'Logging in as %s' % username)
error = self._html_search_regex(
r'(?s)<ul class="list_error">(.+?)</ul>', response, 'error', default=None)
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
response, 'error', default=None)
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
@@ -155,13 +156,17 @@ class AtresPlayerIE(InfoExtractor):
if format_id == 'token' or not video_url.startswith('http'):
continue
if 'geodeswowsmpra3player' in video_url:
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
# this videos are protected by DRM, the f4m downloader doesn't support them
continue
else:
f4m_url = video_url[:-9] + '/manifest.f4m'
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
video_url_hd = video_url.replace('free_es', 'es')
formats.extend(self._extract_f4m_formats(
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
fatal=False))
formats.extend(self._extract_mpd_formats(
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
fatal=False))
self._sort_formats(formats)
path_data = player.get('pathData')

View File

@@ -0,0 +1,73 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
unescapeHTML,
)
class ATVAtIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)'
_TESTS = [{
'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/',
'md5': 'c3b6b975fb3150fc628572939df205f2',
'info_dict': {
'id': '1698447',
'ext': 'mp4',
'title': 'DI, 21.03.17 | 20:05 Uhr 1/1',
}
}, {
'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex(
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
webpage, 'player data')), display_id)['config']['initial_video']
video_id = video_data['id']
video_title = video_data['title']
parts = []
for part in video_data.get('parts', []):
part_id = part['id']
part_title = part['title']
formats = []
for source in part.get('sources', []):
source_url = source.get('src')
if not source_url:
continue
ext = determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, part_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': source.get('delivery'),
'url': source_url,
})
self._sort_formats(formats)
parts.append({
'id': part_id,
'title': part_title,
'thumbnail': part.get('preview_image_url'),
'duration': int_or_none(part.get('duration')),
'is_live': part.get('is_livestream'),
'formats': formats,
})
return {
'_type': 'multi_video',
'id': video_id,
'title': video_title,
'entries': parts,
}

View File

@@ -21,10 +21,11 @@ class BellMediaIE(InfoExtractor):
animalplanet|
bravo|
mtv|
space
space|
etalk
)\.ca|
much\.com
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
@@ -58,6 +59,9 @@ class BellMediaIE(InfoExtractor):
}, {
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
'only_matching': True,
}, {
'url': 'http://www.etalk.ca/video?videoid=663455',
'only_matching': True,
}]
_DOMAINS = {
'thecomedynetwork': 'comedy',
@@ -65,6 +69,7 @@ class BellMediaIE(InfoExtractor):
'sciencechannel': 'discsci',
'investigationdiscovery': 'invdisc',
'animalplanet': 'aniplan',
'etalk': 'ctv',
}
def _real_extract(self, url):

View File

@@ -7,6 +7,7 @@ from ..utils import float_or_none
class CanvasIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',

View File

@@ -12,13 +12,14 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
unescapeHTML,
urlencode_postdata,
USER_AGENTS,
)
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
@@ -62,40 +63,12 @@ class CeskaTelevizeIE(InfoExtractor):
},
'skip': 'Georestricted to Czech Republic',
}, {
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
}]
def _real_extract(self, url):
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
@@ -103,13 +76,28 @@ class CeskaTelevizeIE(InfoExtractor):
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
typ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
type_ = None
episode_id = None
playlist = self._parse_json(
self._search_regex(
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
default='{}'), playlist_id)
if playlist:
type_ = playlist.get('type')
episode_id = playlist.get('id')
if not type_:
type_ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
webpage, 'type')
if not episode_id:
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
webpage, 'episode_id')
data = {
'playlist[0][type]': typ,
'playlist[0][type]': type_,
'playlist[0][id]': episode_id,
'requestUrl': compat_urllib_parse_urlparse(url).path,
'requestSource': 'iVysilani',
@@ -160,8 +148,7 @@ class CeskaTelevizeIE(InfoExtractor):
for format_id, stream_url in item.get('streamUrls', {}).items():
if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4',
entry_protocol='m3u8' if is_live else 'm3u8_native',
stream_url, playlist_id, 'mp4', 'm3u8_native',
m3u8_id='hls-%s' % format_id, fatal=False)
else:
stream_formats = self._extract_mpd_formats(
@@ -246,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
yield line
return '\r\n'.join(_fix_subtitle(subtitles))
class CeskaTelevizePoradyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = unescapeHTML(self._search_regex(
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'iframe player url', group='url'))
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())

View File

@@ -4,62 +4,62 @@ import re
from .common import InfoExtractor
from ..utils import (
clean_html,
ExtractorError,
parse_filesize,
int_or_none,
parse_iso8601,
qualities,
unescapeHTML,
)
class Channel9IE(InfoExtractor):
'''
Common extractor for channel9.msdn.com.
The type of provided URL (video or playlist) is determined according to
meta Search.PageType from web page HTML rather than URL itself, as it is
not always possible to do.
'''
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
_VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
_VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
_TESTS = [{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
'md5': 'bbd75296ba47916b754e73c3a4bbdf10',
'md5': '32083d4eaf1946db6d454313f44510ca',
'info_dict': {
'id': 'Events/TechEd/Australia/2013/KOS002',
'ext': 'mp4',
'id': '6c413323-383a-49dc-88f9-a22800cab024',
'ext': 'wmv',
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:c08d72240b7c87fcecafe2692f80e35f',
'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
'duration': 4576,
'thumbnail': r're:http://.*\.jpg',
'thumbnail': r're:https?://.*\.jpg',
'timestamp': 1377717420,
'upload_date': '20130828',
'session_code': 'KOS002',
'session_day': 'Day 1',
'session_room': 'Arena 1A',
'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug',
'Mads Kristensen'],
'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
},
}, {
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
'md5': 'b43ee4529d111bc37ba7ee4f34813e68',
'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
'info_dict': {
'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing',
'ext': 'mp4',
'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
'ext': 'wmv',
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b',
'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
'duration': 1540,
'thumbnail': r're:http://.*\.jpg',
'thumbnail': r're:https?://.*\.jpg',
'timestamp': 1386381991,
'upload_date': '20131207',
'authors': ['Mike Wilmot'],
},
}, {
# low quality mp4 is best
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
'info_dict': {
'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
'ext': 'mp4',
'title': 'Ranges for the Standard Library',
'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d',
'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
'duration': 5646,
'thumbnail': r're:http://.*\.jpg',
'thumbnail': r're:https?://.*\.jpg',
'upload_date': '20150930',
'timestamp': 1443640735,
},
'params': {
'skip_download': True,
@@ -70,7 +70,7 @@ class Channel9IE(InfoExtractor):
'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
'title': 'Channel 9',
},
'playlist_count': 2,
'playlist_mincount': 100,
}, {
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
'only_matching': True,
@@ -81,189 +81,6 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
def _formats_from_html(self, html):
FORMAT_REGEX = r'''
(?x)
<a\s+href="(?P<url>[^"]+)">(?P<quality>[^<]+)</a>\s*
<span\s+class="usage">\((?P<note>[^\)]+)\)</span>\s*
(?:<div\s+class="popup\s+rounded">\s*
<h3>File\s+size</h3>\s*(?P<filesize>.*?)\s*
</div>)? # File size part may be missing
'''
quality = qualities((
'MP3', 'MP4',
'Low Quality WMV', 'Low Quality MP4',
'Mid Quality WMV', 'Mid Quality MP4',
'High Quality WMV', 'High Quality MP4'))
formats = [{
'url': x.group('url'),
'format_id': x.group('quality'),
'format_note': x.group('note'),
'format': '%s (%s)' % (x.group('quality'), x.group('note')),
'filesize_approx': parse_filesize(x.group('filesize')),
'quality': quality(x.group('quality')),
'vcodec': 'none' if x.group('note') == 'Audio only' else None,
} for x in list(re.finditer(FORMAT_REGEX, html))]
self._sort_formats(formats)
return formats
def _extract_title(self, html):
title = self._html_search_meta('title', html, 'title')
if title is None:
title = self._og_search_title(html)
TITLE_SUFFIX = ' (Channel 9)'
if title is not None and title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)]
return title
def _extract_description(self, html):
DESCRIPTION_REGEX = r'''(?sx)
<div\s+class="entry-content">\s*
<div\s+id="entry-body">\s*
(?P<description>.+?)\s*
</div>\s*
</div>
'''
m = re.search(DESCRIPTION_REGEX, html)
if m is not None:
return m.group('description')
return self._html_search_meta('description', html, 'description')
def _extract_duration(self, html):
m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html)
return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None
def _extract_slides(self, html):
m = re.search(r'<a href="(?P<slidesurl>[^"]+)" class="slides">Slides</a>', html)
return m.group('slidesurl') if m is not None else None
def _extract_zip(self, html):
m = re.search(r'<a href="(?P<zipurl>[^"]+)" class="zip">Zip</a>', html)
return m.group('zipurl') if m is not None else None
def _extract_avg_rating(self, html):
m = re.search(r'<p class="avg-rating">Avg Rating: <span>(?P<avgrating>[^<]+)</span></p>', html)
return float(m.group('avgrating')) if m is not None else 0
def _extract_rating_count(self, html):
m = re.search(r'<div class="rating-count">\((?P<ratingcount>[^<]+)\)</div>', html)
return int(self._fix_count(m.group('ratingcount'))) if m is not None else 0
def _extract_view_count(self, html):
m = re.search(r'<li class="views">\s*<span class="count">(?P<viewcount>[^<]+)</span> Views\s*</li>', html)
return int(self._fix_count(m.group('viewcount'))) if m is not None else 0
def _extract_comment_count(self, html):
m = re.search(r'<li class="comments">\s*<a href="#comments">\s*<span class="count">(?P<commentcount>[^<]+)</span> Comments\s*</a>\s*</li>', html)
return int(self._fix_count(m.group('commentcount'))) if m is not None else 0
def _fix_count(self, count):
return int(str(count).replace(',', '')) if count is not None else None
def _extract_authors(self, html):
m = re.search(r'(?s)<li class="author">(.*?)</li>', html)
if m is None:
return None
return re.findall(r'<a href="/Niners/[^"]+">([^<]+)</a>', m.group(1))
def _extract_session_code(self, html):
m = re.search(r'<li class="code">\s*(?P<code>.+?)\s*</li>', html)
return m.group('code') if m is not None else None
def _extract_session_day(self, html):
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
return m.group('day').strip() if m is not None else None
def _extract_session_room(self, html):
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
return m.group('room') if m is not None else None
def _extract_session_speakers(self, html):
return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html)
def _extract_content(self, html, content_path):
# Look for downloadable content
formats = self._formats_from_html(html)
slides = self._extract_slides(html)
zip_ = self._extract_zip(html)
# Nothing to download
if len(formats) == 0 and slides is None and zip_ is None:
self._downloader.report_warning('None of recording, slides or zip are available for %s' % content_path)
return
# Extract meta
title = self._extract_title(html)
description = self._extract_description(html)
thumbnail = self._og_search_thumbnail(html)
duration = self._extract_duration(html)
avg_rating = self._extract_avg_rating(html)
rating_count = self._extract_rating_count(html)
view_count = self._extract_view_count(html)
comment_count = self._extract_comment_count(html)
common = {
'_type': 'video',
'id': content_path,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'avg_rating': avg_rating,
'rating_count': rating_count,
'view_count': view_count,
'comment_count': comment_count,
}
result = []
if slides is not None:
d = common.copy()
d.update({'title': title + '-Slides', 'url': slides})
result.append(d)
if zip_ is not None:
d = common.copy()
d.update({'title': title + '-Zip', 'url': zip_})
result.append(d)
if len(formats) > 0:
d = common.copy()
d.update({'title': title, 'formats': formats})
result.append(d)
return result
def _extract_entry_item(self, html, content_path):
contents = self._extract_content(html, content_path)
if contents is None:
return contents
if len(contents) > 1:
raise ExtractorError('Got more than one entry')
result = contents[0]
result['authors'] = self._extract_authors(html)
return result
def _extract_session(self, html, content_path):
contents = self._extract_content(html, content_path)
if contents is None:
return contents
session_meta = {
'session_code': self._extract_session_code(html),
'session_day': self._extract_session_day(html),
'session_room': self._extract_session_room(html),
'session_speakers': self._extract_session_speakers(html),
}
for content in contents:
content.update(session_meta)
return self.playlist_result(contents)
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id
@@ -274,9 +91,7 @@ class Channel9IE(InfoExtractor):
return self.playlist_result(entries, video_id, title_text)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
content_path = mobj.group('contentpath')
rss = mobj.group('rss')
content_path, rss = re.match(self._VALID_URL, url).groups()
if rss:
return self._extract_list(content_path, url)
@@ -284,17 +99,158 @@ class Channel9IE(InfoExtractor):
webpage = self._download_webpage(
url, content_path, 'Downloading web page')
page_type = self._search_regex(
r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2',
webpage, 'page type', default=None, group='pagetype')
if page_type:
if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content
return self._extract_entry_item(webpage, content_path)
elif page_type == 'Session': # Event session page, may contain downloadable content
return self._extract_session(webpage, content_path)
elif page_type == 'Event':
return self._extract_list(content_path)
episode_data = self._search_regex(
r"data-episode='([^']+)'", webpage, 'episode data', default=None)
if episode_data:
episode_data = self._parse_json(unescapeHTML(
episode_data), content_path)
content_id = episode_data['contentId']
is_session = '/Sessions(' in episode_data['api']
content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
if is_session:
content_url += '?$expand=Speakers'
else:
raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True)
else: # Assuming list
content_url += '?$expand=Authors'
content_data = self._download_json(content_url, content_id)
title = content_data['Title']
QUALITIES = (
'mp3',
'wmv', 'mp4',
'wmv-low', 'mp4-low',
'wmv-mid', 'mp4-mid',
'wmv-high', 'mp4-high',
)
quality_key = qualities(QUALITIES)
def quality(quality_id, format_url):
return (len(QUALITIES) if '_Source.' in format_url
else quality_key(quality_id))
formats = []
urls = set()
SITE_QUALITIES = {
'MP3': 'mp3',
'MP4': 'mp4',
'Low Quality WMV': 'wmv-low',
'Low Quality MP4': 'mp4-low',
'Mid Quality WMV': 'wmv-mid',
'Mid Quality MP4': 'mp4-mid',
'High Quality WMV': 'wmv-high',
'High Quality MP4': 'mp4-high',
}
formats_select = self._search_regex(
r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
'formats select', default=None)
if formats_select:
for mobj in re.finditer(
r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
formats_select):
format_url = mobj.group('url')
if format_url in urls:
continue
urls.add(format_url)
format_id = mobj.group('format')
quality_id = SITE_QUALITIES.get(format_id, format_id)
formats.append({
'url': format_url,
'format_id': quality_id,
'quality': quality(quality_id, format_url),
'vcodec': 'none' if quality_id == 'mp3' else None,
})
API_QUALITIES = {
'VideoMP4Low': 'mp4-low',
'VideoWMV': 'wmv-mid',
'VideoMP4Medium': 'mp4-mid',
'VideoMP4High': 'mp4-high',
'VideoWMVHQ': 'wmv-hq',
}
for format_id, q in API_QUALITIES.items():
q_url = content_data.get(format_id)
if not q_url or q_url in urls:
continue
urls.add(q_url)
formats.append({
'url': q_url,
'format_id': q,
'quality': quality(q, q_url),
})
self._sort_formats(formats)
slides = content_data.get('Slides')
zip_file = content_data.get('ZipFile')
if not formats and not slides and not zip_file:
raise ExtractorError(
'None of recording, slides or zip are available for %s' % content_path)
subtitles = {}
for caption in content_data.get('Captions', []):
caption_url = caption.get('Url')
if not caption_url:
continue
subtitles.setdefault(caption.get('Language', 'en'), []).append({
'url': caption_url,
'ext': 'vtt',
})
common = {
'id': content_id,
'title': title,
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
'avg_rating': int_or_none(content_data.get('Rating')),
'rating_count': int_or_none(content_data.get('RatingCount')),
'view_count': int_or_none(content_data.get('Views')),
'comment_count': int_or_none(content_data.get('CommentCount')),
'subtitles': subtitles,
}
if is_session:
speakers = []
for s in content_data.get('Speakers', []):
speaker_name = s.get('FullName')
if not speaker_name:
continue
speakers.append(speaker_name)
common.update({
'session_code': content_data.get('Code'),
'session_room': content_data.get('Room'),
'session_speakers': speakers,
})
else:
authors = []
for a in content_data.get('Authors', []):
author_name = a.get('DisplayName')
if not author_name:
continue
authors.append(author_name)
common['authors'] = authors
contents = []
if slides:
d = common.copy()
d.update({'title': title + '-Slides', 'url': slides})
contents.append(d)
if zip_file:
d = common.copy()
d.update({'title': title + '-Zip', 'url': zip_file})
contents.append(d)
if formats:
d = common.copy()
d.update({'title': title, 'formats': formats})
contents.append(d)
return self.playlist_result(contents)
else:
return self._extract_list(content_path)

View File

@@ -1,97 +1,56 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_HTTPError,
)
from ..utils import (
ExtractorError,
HEADRequest,
remove_end,
str_to_int,
unified_strdate,
)
class CloudyIE(InfoExtractor):
_IE_DESC = 'cloudy.ec'
_VALID_URL = r'''(?x)
https?://(?:www\.)?cloudy\.ec/
(?:v/|embed\.php\?id=)
(?P<id>[A-Za-z0-9]+)
'''
_EMBED_URL = 'http://www.cloudy.ec/embed.php?id=%s'
_API_URL = 'http://www.cloudy.ec/api/player.api.php'
_MAX_TRIES = 2
_TEST = {
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac',
'md5': '5cb253ace826a42f35b4740539bedf07',
'md5': '29832b05028ead1b58be86bf319397ca',
'info_dict': {
'id': 'af511e2527aac',
'ext': 'flv',
'ext': 'mp4',
'title': 'Funny Cats and Animals Compilation june 2013',
'upload_date': '20130913',
'view_count': int,
}
}
def _extract_video(self, video_id, file_key, error_url=None, try_num=0):
if try_num > self._MAX_TRIES - 1:
raise ExtractorError('Unable to extract video URL', expected=True)
form = {
'file': video_id,
'key': file_key,
}
if error_url:
form.update({
'numOfErrors': try_num,
'errorCode': '404',
'errorUrl': error_url,
})
player_data = self._download_webpage(
self._API_URL, video_id, 'Downloading player data', query=form)
data = compat_parse_qs(player_data)
try_num += 1
if 'error' in data:
raise ExtractorError(
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
expected=True)
title = data.get('title', [None])[0]
if title:
title = remove_end(title, '&asdasdas').strip()
video_url = data.get('url', [None])[0]
if video_url:
try:
self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
self.report_warning('Invalid video URL, requesting another', video_id)
return self._extract_video(video_id, file_key, video_url, try_num)
return {
'id': video_id,
'url': video_url,
'title': title,
}
}, {
'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
url = self._EMBED_URL % video_id
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(
'http://www.cloudy.ec/embed.php?id=%s' % video_id, video_id)
file_key = self._search_regex(
[r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
webpage, 'file_key')
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
return self._extract_video(video_id, file_key)
webpage = self._download_webpage(
'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False)
if webpage:
info.update({
'title': self._search_regex(
r'<h\d[^>]*>([^<]+)<', webpage, 'title'),
'upload_date': unified_strdate(self._search_regex(
r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage,
'upload date', fatal=False)),
'view_count': str_to_int(self._search_regex(
r'([\d,.]+) views<', webpage, 'view count', fatal=False)),
})
if not info.get('title'):
info['title'] = video_id
info['id'] = video_id
return info

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
@@ -547,6 +548,34 @@ class InfoExtractor(object):
return encoding
def __check_blocked(self, content):
first_block = content[:512]
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in first_block):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in first_block:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
'blocklist.rkn.gov.ru' in content):
raise ExtractorError(
'Access to this webpage has been blocked by decision of the Russian government. '
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
@@ -588,25 +617,7 @@ class InfoExtractor(object):
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in content[:512]):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in content[:512]:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
self.__check_blocked(content)
return content
@@ -2169,18 +2180,24 @@ class InfoExtractor(object):
})
return formats
@staticmethod
def _find_jwplayer_data(webpage):
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
mobj = re.search(
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
webpage)
if mobj:
return mobj.group('options')
try:
jwplayer_data = self._parse_json(mobj.group('options'),
video_id=video_id,
transform_source=transform_source)
except ExtractorError:
pass
else:
if isinstance(jwplayer_data, dict):
return jwplayer_data
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
jwplayer_data = self._parse_json(
self._find_jwplayer_data(webpage), video_id,
transform_source=js_to_json)
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)

View File

@@ -9,13 +9,14 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
orderedSet,
remove_end,
extract_attributes,
mimetype2ext,
determine_ext,
extract_attributes,
int_or_none,
js_to_json,
mimetype2ext,
orderedSet,
parse_iso8601,
remove_end,
)
@@ -66,6 +67,16 @@ class CondeNastIE(InfoExtractor):
'upload_date': '20130314',
'timestamp': 1363219200,
}
}, {
'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series',
'info_dict': {
'id': '58d1865bfd2e6126e2000015',
'ext': 'mp4',
'title': 'The Only True Surprise? Trumps an Idiot',
'uploader': 'gq',
'upload_date': '20170321',
'timestamp': 1490126427,
},
}, {
# JS embed
'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js',
@@ -114,26 +125,33 @@ class CondeNastIE(InfoExtractor):
})
video_id = query['videoId']
video_info = None
info_page = self._download_webpage(
info_page = self._download_json(
'http://player.cnevids.com/player/video.js',
video_id, 'Downloading video info', query=query, fatal=False)
video_id, 'Downloading video info', fatal=False, query=query)
if info_page:
video_info = self._parse_json(self._search_regex(
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
else:
video_info = info_page.get('video')
if not video_info:
info_page = self._download_webpage(
'http://player.cnevids.com/player/loader.js',
video_id, 'Downloading loader info', query=query)
video_info = self._parse_json(self._search_regex(
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
video_info = self._parse_json(
self._search_regex(
r'(?s)var\s+config\s*=\s*({.+?});', info_page, 'config'),
video_id, transform_source=js_to_json)['video']
title = video_info['title']
formats = []
for fdata in video_info.get('sources', [{}])[0]:
for fdata in video_info['sources']:
src = fdata.get('src')
if not src:
continue
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
quality = fdata.get('quality')
formats.append({
'format_id': ext + ('-%s' % quality if quality else ''),
@@ -169,7 +187,6 @@ class CondeNastIE(InfoExtractor):
path=remove_end(parsed_url.path, '.js').replace('/embedjs/', '/embed/')))
url_type = 'embed'
self.to_screen('Extracting from %s with the Condé Nast extractor' % self._SITES[site])
webpage = self._download_webpage(url, item_id)
if url_type == 'series':

View File

@@ -390,7 +390,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
else:
webpage_url = 'http://www.' + mobj.group('url')
webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
webpage = self._download_webpage(
self._add_skip_wall(webpage_url), video_id,
headers=self.geo_verification_headers())
note_m = self._html_search_regex(
r'<div class="showmedia-trailer-notice">(.+?)</div>',
webpage, 'trailer-notice', default='')
@@ -565,7 +567,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(self._add_skip_wall(url), show_id)
webpage = self._download_webpage(
self._add_skip_wall(url), show_id,
headers=self.geo_verification_headers())
title = self._html_search_regex(
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
webpage, 'title')

View File

@@ -82,6 +82,11 @@ class CWTVIE(InfoExtractor):
'url': quality_url,
'tbr': tbr,
})
video_metadata = video_data['assetFields']
ism_url = video_metadata.get('smoothStreamingUrl')
if ism_url:
formats.extend(self._extract_ism_formats(
ism_url, video_id, ism_id='mss', fatal=False))
self._sort_formats(formats)
thumbnails = [{
@@ -90,8 +95,6 @@ class CWTVIE(InfoExtractor):
'height': image.get('height'),
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
video_metadata = video_data['assetFields']
subtitles = {
'en': [{
'url': video_metadata['UnicornCcUrl'],

View File

@@ -0,0 +1,59 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import parse_duration
class DiscoveryVRIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
'md5': '32b1929798c464a54356378b7912eca4',
'info_dict': {
'id': 'discovery-vr-an-introduction',
'ext': 'mp4',
'title': 'Discovery VR - An Introduction',
'description': 'md5:80d418a10efb8899d9403e61d8790f06',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
bootstrap_data = self._search_regex(
r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
webpage, 'bootstrap data')
bootstrap_data = self._parse_json(
bootstrap_data.encode('utf-8').decode('unicode_escape'),
display_id)
videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
video_data = next(video for video in videos if video.get('slug') == display_id)
series = video_data.get('showTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
formats = []
for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
f_url = video_data.get(f)
if not f_url:
continue
formats.append({
'format_id': format_id,
'url': f_url,
})
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail'),
'duration': parse_duration(video_data.get('runTime')),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -19,6 +19,7 @@ from .acast import (
ACastChannelIE,
)
from .addanime import AddAnimeIE
from .adn import ADNIE
from .adobetv import (
AdobeTVIE,
AdobeTVShowIE,
@@ -71,6 +72,7 @@ from .arte import (
)
from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE
from .atvat import ATVAtIE
from .audimedia import AudiMediaIE
from .audioboom import AudioBoomIE
from .audiomack import AudiomackIE, AudiomackAlbumIE
@@ -163,7 +165,10 @@ from .ccc import CCCIE
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
from .ceskatelevize import (
CeskaTelevizeIE,
CeskaTelevizePoradyIE,
)
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
@@ -271,6 +276,7 @@ from .discoverygo import (
DiscoveryGoPlaylistIE,
)
from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE
@@ -535,6 +541,7 @@ from .mangomolo import (
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .medici import MediciIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
@@ -727,6 +734,10 @@ from .orf import (
ORFFM4IE,
ORFIPTVIE,
)
from .packtpub import (
PacktPubIE,
PacktPubCourseIE,
)
from .pandatv import PandaTVIE
from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
@@ -796,7 +807,7 @@ from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import (
RaiTVIE,
RaiPlayIE,
RaiIE,
)
from .rbmaradio import RBMARadioIE
@@ -827,7 +838,11 @@ from .rozhlas import RozhlasIE
from .rtbf import RTBFIE
from .rte import RteIE, RteRadioIE
from .rtlnl import RtlNlIE
from .rtl2 import RTL2IE
from .rtl2 import (
RTL2IE,
RTL2YouIE,
RTL2YouSeriesIE,
)
from .rtp import RTPIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
@@ -969,6 +984,7 @@ from .theplatform import (
from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
@@ -1017,6 +1033,7 @@ from .tv2 import (
)
from .tv3 import TV3IE
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE
from .tvanouvelles import (
TVANouvellesIE,
@@ -1176,6 +1193,11 @@ from .voxmedia import VoxMediaIE
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
from .vrv import (
VRVIE,
VRVSeriesIE,
)
from .vshare import VShareIE
from .medialaan import MedialaanIE
from .vube import VubeIE
from .vuclip import VuClipIE

View File

@@ -54,7 +54,7 @@ class EyedoTVIE(InfoExtractor):
'id': video_id,
'title': title,
'formats': self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
m3u8_url, video_id, 'mp4', 'm3u8_native'),
'description': xpath_text(video_data, _add_ns('Description')),
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
'uploader': xpath_text(video_data, _add_ns('Createur')),

View File

@@ -47,9 +47,12 @@ class FOXIE(AdobePassIE):
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
return {
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update({
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
'id': video_id,
}
})
return info

View File

@@ -4,7 +4,8 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
unified_strdate,
extract_attributes,
int_or_none,
)
@@ -19,6 +20,7 @@ class FranceCultureIE(InfoExtractor):
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
'timestamp': 1393642916,
'vcodec': 'none',
}
}
@@ -28,30 +30,34 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
webpage, 'video path')
video_data = extract_attributes(self._search_regex(
r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
webpage, 'video data'))
title = self._og_search_title(webpage)
video_url = video_data['data-asset-source']
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
upload_date = unified_strdate(self._search_regex(
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
webpage, 'upload date', fatal=False))
description = self._html_search_regex(
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
webpage, 'description', default=None)
thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',
r'(?s)<span class="author">(.*?)</span>',
webpage, 'uploader', default=None)
vcodec = 'none' if determine_ext(video_url.lower()) == 'mp3' else None
ext = determine_ext(video_url.lower())
return {
'id': display_id,
'display_id': display_id,
'url': video_url,
'title': title,
'description': description,
'thumbnail': thumbnail,
'vcodec': vcodec,
'ext': ext,
'vcodec': 'none' if ext == 'mp3' else None,
'uploader': uploader,
'upload_date': upload_date,
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
'duration': int_or_none(video_data.get('data-duration')),
}

View File

@@ -56,9 +56,8 @@ class FreshLiveIE(InfoExtractor):
is_live = info.get('liveStreamUrl') is not None
formats = self._extract_m3u8_formats(
stream_url, video_id, ext='mp4',
entry_protocol='m3u8' if is_live else 'm3u8_native',
m3u8_id='hls')
stream_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls')
if is_live:
title = self._live_title(title)

View File

@@ -7,9 +7,9 @@ from ..compat import (
compat_urllib_parse_unquote_plus,
)
from ..utils import (
clean_html,
determine_ext,
int_or_none,
js_to_json,
sanitized_Request,
ExtractorError,
urlencode_postdata
@@ -17,34 +17,26 @@ from ..utils import (
class FunimationIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funimation\.com/shows/[^/]+/videos/(?:official|promotional)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'funimation'
_TESTS = [{
'url': 'http://www.funimation.com/shows/air/videos/official/breeze',
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
'info_dict': {
'id': '658',
'display_id': 'breeze',
'ext': 'mp4',
'title': 'Air - 1 - Breeze',
'description': 'md5:1769f43cd5fc130ace8fd87232207892',
'thumbnail': r're:https?://.*\.jpg',
},
'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed',
}, {
'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play',
'info_dict': {
'id': '31128',
'id': '91144',
'display_id': 'role-play',
'ext': 'mp4',
'title': '.hack//SIGN - 1 - Role Play',
'title': '.hack//SIGN - Role Play',
'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd',
'thumbnail': r're:https?://.*\.jpg',
},
'skip': 'Access without user interaction is forbidden by CloudFlare',
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview',
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
'info_dict': {
'id': '9635',
'display_id': 'broadcast-dub-preview',
@@ -54,25 +46,13 @@ class FunimationIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(?:jpg|png)',
},
'skip': 'Access without user interaction is forbidden by CloudFlare',
}, {
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
'only_matching': True,
}]
_LOGIN_URL = 'http://www.funimation.com/login'
def _download_webpage(self, *args, **kwargs):
try:
return super(FunimationIE, self)._download_webpage(*args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
response = ee.cause.read()
if b'>Please complete the security check to access<' in response:
raise ExtractorError(
'Access to funimation.com is blocked by CloudFlare. '
'Please browse to http://www.funimation.com/, solve '
'the reCAPTCHA, export browser cookies to a text file,'
' and then try again with --cookies YOUR_COOKIE_FILE.',
expected=True)
raise
def _extract_cloudflare_session_ua(self, url):
ci_session_cookie = self._get_cookies(url).get('ci_session')
if ci_session_cookie:
@@ -114,119 +94,74 @@ class FunimationIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
def _search_kane(name):
return self._search_regex(
r"KANE_customdimensions\.%s\s*=\s*'([^']+)';" % name,
webpage, name, default=None)
title_data = self._parse_json(self._search_regex(
r'TITLE_DATA\s*=\s*({[^}]+})',
webpage, 'title data', default=''),
display_id, js_to_json, fatal=False) or {}
video_id = title_data.get('id') or self._search_regex([
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
r'<iframe[^>]+src="/player/(\d+)"',
], webpage, 'video_id', default=None)
if not video_id:
player_url = self._html_search_meta([
'al:web:url',
'og:video:url',
'og:video:secure_url',
], webpage, fatal=True)
video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id')
title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage)
series = _search_kane('showName')
if series:
title = '%s - %s' % (series, title)
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
try:
sources = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
video_id)['items']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
raise ExtractorError('%s said: %s' % (
self.IE_NAME, error.get('detail') or error.get('title')), expected=True)
raise
errors = []
formats = []
ERRORS_MAP = {
'ERROR_MATURE_CONTENT_LOGGED_IN': 'matureContentLoggedIn',
'ERROR_MATURE_CONTENT_LOGGED_OUT': 'matureContentLoggedOut',
'ERROR_SUBSCRIPTION_LOGGED_OUT': 'subscriptionLoggedOut',
'ERROR_VIDEO_EXPIRED': 'videoExpired',
'ERROR_TERRITORY_UNAVAILABLE': 'territoryUnavailable',
'SVODBASIC_SUBSCRIPTION_IN_PLAYER': 'basicSubscription',
'SVODNON_SUBSCRIPTION_IN_PLAYER': 'nonSubscription',
'ERROR_PLAYER_NOT_RESPONDING': 'playerNotResponding',
'ERROR_UNABLE_TO_CONNECT_TO_CDN': 'unableToConnectToCDN',
'ERROR_STREAM_NOT_FOUND': 'streamNotFound',
}
USER_AGENTS = (
# PC UA is served with m3u8 that provides some bonus lower quality formats
('pc', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'),
# Mobile UA allows to extract direct links and also does not fail when
# PC UA fails with hulu error (e.g.
# http://www.funimation.com/shows/hacksign/videos/official/role-play)
('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'),
)
user_agent = self._extract_cloudflare_session_ua(url)
if user_agent:
USER_AGENTS = ((None, user_agent),)
for kind, user_agent in USER_AGENTS:
request = sanitized_Request(url)
request.add_header('User-Agent', user_agent)
webpage = self._download_webpage(
request, display_id,
'Downloading %s webpage' % kind if kind else 'Downloading webpage')
playlist = self._parse_json(
self._search_regex(
r'var\s+playersData\s*=\s*(\[.+?\]);\n',
webpage, 'players data'),
display_id)[0]['playlist']
items = next(item['items'] for item in playlist if item.get('items'))
item = next(item for item in items if item.get('itemAK') == display_id)
error_messages = {}
video_error_messages = self._search_regex(
r'var\s+videoErrorMessages\s*=\s*({.+?});\n',
webpage, 'error messages', default=None)
if video_error_messages:
error_messages_json = self._parse_json(video_error_messages, display_id, fatal=False)
if error_messages_json:
for _, error in error_messages_json.items():
type_ = error.get('type')
description = error.get('description')
content = error.get('content')
if type_ == 'text' and description and content:
error_message = ERRORS_MAP.get(description)
if error_message:
error_messages[error_message] = content
for video in item.get('videoSet', []):
auth_token = video.get('authToken')
if not auth_token:
continue
funimation_id = video.get('FUNImationID') or video.get('videoId')
preference = 1 if video.get('languageMode') == 'dub' else 0
if not auth_token.startswith('?'):
auth_token = '?%s' % auth_token
for quality, height in (('sd', 480), ('hd', 720), ('hd1080', 1080)):
format_url = video.get('%sUrl' % quality)
if not format_url:
continue
if not format_url.startswith(('http', '//')):
errors.append(format_url)
continue
if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native',
preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False))
else:
tbr = int_or_none(self._search_regex(
r'-(\d+)[Kk]', format_url, 'tbr', default=None))
formats.append({
'url': format_url + auth_token,
'format_id': '%s-http-%dp' % (funimation_id, height),
'height': height,
'tbr': tbr,
'preference': preference,
})
if not formats and errors:
raise ExtractorError(
'%s returned error: %s'
% (self.IE_NAME, clean_html(error_messages.get(errors[0], errors[0]))),
expected=True)
for source in sources:
source_url = source.get('src')
if not source_url:
continue
source_type = source.get('videoType') or determine_ext(source_url)
if source_type == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4',
m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': source_type,
'url': source_url,
})
self._sort_formats(formats)
title = item['title']
artist = item.get('artist')
if artist:
title = '%s - %s' % (artist, title)
description = self._og_search_description(webpage) or item.get('description')
thumbnail = self._og_search_thumbnail(webpage) or item.get('posterUrl')
video_id = item.get('itemId') or display_id
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'thumbnail': self._og_search_thumbnail(webpage),
'series': series,
'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
'episode_number': int_or_none(title_data.get('episodeNum')),
'episode': episode,
'season_id': title_data.get('seriesId'),
'formats': formats,
}

View File

@@ -902,12 +902,13 @@ class GenericIE(InfoExtractor):
},
# LazyYT
{
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
'url': 'https://skiplagged.com/',
'info_dict': {
'id': '1986',
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
'id': 'skiplagged',
'title': 'Skiplagged: The smart way to find cheap flights',
},
'playlist_mincount': 2,
'playlist_mincount': 1,
'add_ie': ['Youtube'],
},
# Cinchcast embed
{
@@ -990,6 +991,20 @@ class GenericIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
# JWPlayer config passed as variable
'url': 'http://www.txxx.com/videos/3326530/ariele/',
'info_dict': {
'id': '3326530_hq',
'ext': 'mp4',
'title': 'ARIELE | Tube Cup',
'uploader': 'www.txxx.com',
'age_limit': 18,
},
'params': {
'skip_download': True,
}
},
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
@@ -1065,6 +1080,21 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
{
# Kaltura iframe embed
'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
'info_dict': {
'id': '0_f2cfbpwy',
'ext': 'mp4',
'title': 'I. M. Pei: A Centennial Celebration',
'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
'upload_date': '20170403',
'uploader_id': 'batchUser',
'timestamp': 1491232186,
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -2549,18 +2579,14 @@ class GenericIE(InfoExtractor):
self._sort_formats(entry['formats'])
return self.playlist_result(entries)
jwplayer_data_str = self._find_jwplayer_data(webpage)
if jwplayer_data_str:
try:
jwplayer_data = self._parse_json(
jwplayer_data_str, video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False)
if not info.get('title'):
info['title'] = video_title
return info
except ExtractorError:
pass
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
if jwplayer_data:
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=url)
if not info.get('title'):
info['title'] = video_title
return info
def check_video(vurl):
if YoutubeIE.suitable(vurl):
@@ -2635,11 +2661,14 @@ class GenericIE(InfoExtractor):
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
self.report_following_redirect(new_url)
return {
'_type': 'url',
'url': new_url,
}
if new_url != url:
self.report_following_redirect(new_url)
return {
'_type': 'url',
'url': new_url,
}
else:
found = None
if not found:
# twitter:player is a https URL to iframe player that may or may not

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
xpath_text,
xpath_element,
@@ -14,14 +15,26 @@ from ..utils import (
class HBOBaseIE(InfoExtractor):
_FORMATS_INFO = {
'pro7': {
'width': 1280,
'height': 720,
},
'1920': {
'width': 1280,
'height': 720,
},
'pro6': {
'width': 768,
'height': 432,
},
'640': {
'width': 768,
'height': 432,
},
'pro5': {
'width': 640,
'height': 360,
},
'highwifi': {
'width': 640,
'height': 360,
@@ -78,6 +91,17 @@ class HBOBaseIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
video_url.replace('.tar', '/base_index_w8.m3u8'),
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
elif source.tag == 'hls':
# #EXT-X-BYTERANGE is not supported by native hls downloader
# and ffmpeg (#10955)
# formats.extend(self._extract_m3u8_formats(
# video_url.replace('.tar', '/base_index.m3u8'),
# video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
continue
elif source.tag == 'dash':
formats.extend(self._extract_mpd_formats(
video_url.replace('.tar', '/manifest.mpd'),
video_id, mpd_id='dash', fatal=False))
else:
format_info = self._FORMATS_INFO.get(source.tag, {})
formats.append({
@@ -112,10 +136,11 @@ class HBOBaseIE(InfoExtractor):
class HBOIE(HBOBaseIE):
IE_NAME = 'hbo'
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
'md5': '1c33253f0c7782142c993c0ba62a8753',
'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
'info_dict': {
'id': '1437839',
'ext': 'mp4',
@@ -131,11 +156,12 @@ class HBOIE(HBOBaseIE):
class HBOEpisodeIE(HBOBaseIE):
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
IE_NAME = 'hbo:episode'
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
_TESTS = [{
'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
'md5': '689132b253cc0ab7434237fc3a293210',
'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
'info_dict': {
'id': '1439518',
'display_id': 'ep-52-inside-the-episode',
@@ -147,16 +173,19 @@ class HBOEpisodeIE(HBOBaseIE):
}, {
'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
'only_matching': True,
}, {
'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
content = self._download_json(
'http://www.hbo.com/api/content/' + path, display_id)['content']
video_id = self._search_regex(
r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
webpage, 'video ID', group='video_id')
video_id = compat_str((content.get('parsed', {}).get(
'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
info_dict = self._extract_from_id(video_id)
info_dict['display_id'] = display_id

View File

@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
}],
},
},
'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
'params': {
'skip_download': True,
},
@@ -107,27 +108,37 @@ class KalturaIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
mobj = (
re.search(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
(?P<q1>['"])wid(?P=q1)\s*:\s*
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or
re.search(
r'''(?xs)
(?P<q1>["\'])
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*?
(?:
entry_?[Ii]d|
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
(?P<q2>["'])entry_?[Ii]d(?P=q2)
)\s*:\s*
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage))
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage) or
re.search(
r'''(?xs)
<iframe[^>]+src=(?P<q1>["'])
(?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)*
[?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?P=q1)
''', webpage)
)
if mobj:
embed_info = mobj.groupdict()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info

View File

@@ -62,13 +62,21 @@ class LimelightBaseIE(InfoExtractor):
fmt = {
'url': stream_url,
'abr': float_or_none(stream.get('audioBitRate')),
'vbr': float_or_none(stream.get('videoBitRate')),
'fps': float_or_none(stream.get('videoFrameRate')),
'width': int_or_none(stream.get('videoWidthInPixels')),
'height': int_or_none(stream.get('videoHeightInPixels')),
'ext': ext,
}
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
width = int_or_none(stream.get('videoWidthInPixels'))
height = int_or_none(stream.get('videoHeightInPixels'))
vbr = float_or_none(stream.get('videoBitRate'))
if width or height or vbr:
fmt.update({
'width': width,
'height': height,
'vbr': vbr,
})
else:
fmt['vcodec'] = 'none'
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
if rtmp:
format_id = 'rtmp'
if stream.get('videoBitRate'):

View File

@@ -119,7 +119,8 @@ class LivestreamIE(InfoExtractor):
m3u8_url = video_data.get('m3u8_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
f4m_url = video_data.get('f4m_url')
if f4m_url:
@@ -158,11 +159,11 @@ class LivestreamIE(InfoExtractor):
if smil_url:
formats.extend(self._extract_smil_formats(smil_url, broadcast_id))
entry_protocol = 'm3u8' if is_live else 'm3u8_native'
m3u8_url = stream_info.get('m3u8_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
m3u8_url, broadcast_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
rtsp_url = stream_info.get('rtsp_url')
if rtsp_url:
@@ -276,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor):
'view_count': view_count,
}
def _extract_video_formats(self, video_data, video_id, entry_protocol):
def _extract_video_formats(self, video_data, video_id):
formats = []
progressive_url = video_data.get('progressiveUrl')
@@ -289,7 +290,8 @@ class LivestreamOriginalIE(InfoExtractor):
m3u8_url = video_data.get('httpUrl')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False))
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
rtsp_url = video_data.get('rtspUrl')
if rtsp_url:
@@ -340,11 +342,10 @@ class LivestreamOriginalIE(InfoExtractor):
}
video_data = self._download_json(stream_url, content_id)
is_live = video_data.get('isLive')
entry_protocol = 'm3u8' if is_live else 'm3u8_native'
info.update({
'id': content_id,
'title': self._live_title(info['title']) if is_live else info['title'],
'formats': self._extract_video_formats(video_data, content_id, entry_protocol),
'formats': self._extract_video_formats(video_data, content_id),
'is_live': is_live,
})
return info

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
update_url_query,
urlencode_postdata,
)
class MediciIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
_TEST = {
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
'md5': '004c21bb0a57248085b6ff3fec72719d',
'info_dict': {
'id': '3059',
'ext': 'flv',
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170408',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
# Sets csrftoken cookie
self._download_webpage(url, video_id)
MEDICI_URL = 'http://www.medici.tv/'
data = self._download_json(
MEDICI_URL, video_id,
data=urlencode_postdata({
'json': 'true',
'page': '/%s' % video_id,
'timezone_offset': -420,
}), headers={
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
'X-Requested-With': 'XMLHttpRequest',
'Referer': MEDICI_URL,
'Content-Type': 'application/x-www-form-urlencoded',
})
video = data['video']['videos']['video1']
title = video.get('nom') or data['title']
video_id = video.get('id') or video_id
formats = self._extract_f4m_formats(
update_url_query(video['url_akamai'], {
'hdcore': '3.1.0',
'plugin=aasp': '3.1.0.43.124',
}), video_id, f4m_id='hds')
description = data.get('meta_description')
thumbnail = video.get('url_thumbnail') or data.get('main_image')
upload_date = unified_strdate(data['video'].get('date'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats,
}

View File

@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>',
r'm-tooltip=["\']([\d,.]+) plays'],
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None))
return {
@@ -138,12 +138,12 @@ class MixcloudPlaylistBaseIE(InfoExtractor):
def _get_user_description(self, page_content):
return self._html_search_regex(
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
page_content, 'user description', fatal=False)
class MixcloudUserIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
IE_NAME = 'mixcloud:user'
_TESTS = [{
@@ -151,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'playlist_mincount': 11,
}, {
@@ -159,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'playlist_mincount': 11,
}, {
@@ -167,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'params': {
'playlist_items': '1-100',
@@ -178,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_listens',
'title': 'Daniel Holbach (listens)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'params': {
'playlist_items': '1-100',
@@ -216,7 +216,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
IE_NAME = 'mixcloud:playlist'
_TESTS = [{
@@ -229,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'playlist_mincount': 16,
}, {
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
'info_dict': {
'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'Jazzcat on Ness Radio',
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
},
'playlist_mincount': 23
'only_matching': True,
}]
def _real_extract(self, url):
@@ -243,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
playlist_id = mobj.group('playlist')
video_id = '%s_%s' % (user_id, playlist_id)
profile = self._download_webpage(
webpage = self._download_webpage(
url, user_id,
note='Downloading playlist page',
errnote='Unable to download playlist page')
description = self._get_user_description(profile)
playlist_title = self._html_search_regex(
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
profile, 'playlist title')
title = self._html_search_regex(
r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
webpage, 'playlist title',
default=None) or self._og_search_title(webpage, fatal=False)
description = self._get_user_description(webpage)
entries = OnDemandPagedList(
functools.partial(
@@ -259,11 +255,11 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
self._PAGE_SIZE)
return self.playlist_result(entries, video_id, playlist_title, description)
return self.playlist_result(entries, video_id, title, description)
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
IE_NAME = 'mixcloud:stream'
_TEST = {

View File

@@ -34,12 +34,6 @@ class NineCNineMediaStackIE(NineCNineMediaBaseIE):
formats.extend(self._extract_f4m_formats(
stack_base_url + 'f4m', stack_id,
f4m_id='hds', fatal=False))
mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
if mp4_url:
formats.append({
'url': mp4_url,
'format_id': 'mp4',
})
self._sort_formats(formats)
return {

View File

@@ -313,9 +313,9 @@ class NPOIE(NPOBaseIE):
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
_TEST = {
_TESTS = [{
'url': 'http://www.npo.nl/live/npo-1',
'info_dict': {
'id': 'LI_NL1_4188102',
@@ -327,10 +327,13 @@ class NPOLiveIE(NPOBaseIE):
'params': {
'skip_download': True,
}
}
}, {
'url': 'http://www.npo.nl/live',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
display_id = self._match_id(url) or 'npo-1'
webpage = self._download_webpage(url, display_id)

View File

@@ -176,7 +176,7 @@ class OdnoklassnikiIE(InfoExtractor):
})
return info
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd', 'full'))
formats = [{
'url': f['url'],

View File

@@ -75,51 +75,38 @@ class OpenloadIE(InfoExtractor):
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
webpage, 'openload ID')
video_url_chars = []
first_char = ord(ol_id[0])
key = first_char - 55
maxKey = max(2, key)
key = min(maxKey, len(ol_id) - 38)
t = ol_id[key:key + 36]
hashMap = {}
v = ol_id.replace(t, '')
h = 0
while h < len(t):
f = t[h:h + 3]
i = int(f, 8)
hashMap[h / 3] = i
h += 3
h = 0
H = 0
while h < len(v):
B = ''
C = ''
if len(v) >= h + 2:
B = v[h:h + 2]
if len(v) >= h + 3:
C = v[h:h + 3]
i = int(B, 16)
h += 2
if H % 3 == 0:
i = int(C, 8)
h += 1
elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60:
i = int(C, 10)
h += 1
index = H % 7
A = hashMap[index]
i ^= 213
i ^= A
video_url_chars.append(compat_chr(i))
H += 1
decoded = ''
a = ol_id[0:24]
b = []
for i in range(0, len(a), 8):
b.append(int(a[i:i + 8] or '0', 16))
ol_id = ol_id[24:]
j = 0
k = 0
while j < len(ol_id):
c = 128
d = 0
e = 0
f = 0
_more = True
while _more:
if j + 1 >= len(ol_id):
c = 143
f = int(ol_id[j:j + 2] or '0', 16)
j += 2
d += (f & 127) << e
e += 7
_more = f >= c
g = d ^ b[k % 3]
for i in range(4):
char_dec = (g >> 8 * i) & (c + 127)
char = compat_chr(char_dec)
if char != '#':
decoded += char
k += 1
video_url = 'https://openload.co/stream/%s?mime=true'
video_url = video_url % (''.join(video_url_chars))
video_url = video_url % decoded
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,

View File

@@ -0,0 +1,138 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
ExtractorError,
remove_end,
strip_or_none,
unified_timestamp,
urljoin,
)
class PacktPubBaseIE(InfoExtractor):
_PACKT_BASE = 'https://www.packtpub.com'
_MAPT_REST = '%s/mapt-rest' % _PACKT_BASE
class PacktPubIE(PacktPubBaseIE):
_VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
_TEST = {
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
'info_dict': {
'id': '20530',
'ext': 'mp4',
'title': 'Project Intro',
'thumbnail': r're:(?i)^https?://.*\.jpg',
'timestamp': 1490918400,
'upload_date': '20170331',
},
}
def _handle_error(self, response):
if response.get('status') != 'success':
raise ExtractorError(
'% said: %s' % (self.IE_NAME, response['message']),
expected=True)
def _download_json(self, *args, **kwargs):
response = super(PacktPubIE, self)._download_json(*args, **kwargs)
self._handle_error(response)
return response
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_id, chapter_id, video_id = mobj.group(
'course_id', 'chapter_id', 'id')
video = self._download_json(
'%s/users/me/products/%s/chapters/%s/sections/%s'
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
'Downloading JSON video')['data']
content = video.get('content')
if not content:
raise ExtractorError('This video is locked', expected=True)
video_url = content['file']
metadata = self._download_json(
'%s/products/%s/chapters/%s/sections/%s/metadata'
% (self._MAPT_REST, course_id, chapter_id, video_id),
video_id)['data']
title = metadata['pageTitle']
course_title = metadata.get('title')
if course_title:
title = remove_end(title, ' - %s' % course_title)
timestamp = unified_timestamp(metadata.get('publicationDate'))
thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'timestamp': timestamp,
}
class PacktPubCourseIE(PacktPubBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
_TEST = {
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
'info_dict': {
'id': '9781787122215',
'title': 'Learn Nodejs by building 12 projects [Video]',
},
'playlist_count': 90,
}
@classmethod
def suitable(cls, url):
return False if PacktPubIE.suitable(url) else super(
PacktPubCourseIE, cls).suitable(url)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url, course_id = mobj.group('url', 'id')
course = self._download_json(
'%s/products/%s/metadata' % (self._MAPT_REST, course_id),
course_id)['data']
entries = []
for chapter_num, chapter in enumerate(course['tableOfContents'], 1):
if chapter.get('type') != 'chapter':
continue
children = chapter.get('children')
if not isinstance(children, list):
continue
chapter_info = {
'chapter': chapter.get('title'),
'chapter_number': chapter_num,
'chapter_id': chapter.get('id'),
}
for section in children:
if section.get('type') != 'section':
continue
section_url = section.get('seoUrl')
if not isinstance(section_url, compat_str):
continue
entry = {
'_type': 'url_transparent',
'url': urljoin(url + '/', section_url),
'title': strip_or_none(section.get('title')),
'description': clean_html(section.get('summary')),
'ie_key': PacktPubIE.ie_key(),
}
entry.update(chapter_info)
entries.append(entry)
return self.playlist_result(entries, course_id, course.get('title'))

View File

@@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor):
class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE):
}, {
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
'only_matching': True,
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
}]
@staticmethod
@@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE):
class PeriscopeUserIE(PeriscopeBaseIE):
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
IE_DESC = 'Periscope user videos'
IE_NAME = 'periscope:user'

View File

@@ -169,11 +169,10 @@ class PluralsightIE(PluralsightBaseIE):
collection = course['modules']
module, clip = None, None
clip = None
for module_ in collection:
if name in (module_.get('moduleName'), module_.get('name')):
module = module_
for clip_ in module_.get('clips', []):
clip_index = clip_.get('clipIndex')
if clip_index is None:

View File

@@ -1,23 +1,40 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_urlparse,
compat_str,
)
from ..utils import (
determine_ext,
ExtractorError,
determine_ext,
find_xpath_attr,
fix_xml_ampersands,
GeoRestrictedError,
int_or_none,
parse_duration,
strip_or_none,
try_get,
unified_strdate,
unified_timestamp,
update_url_query,
urljoin,
xpath_text,
)
class RaiBaseIE(InfoExtractor):
def _extract_relinker_formats(self, relinker_url, video_id):
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
_GEO_COUNTRIES = ['IT']
_GEO_BYPASS = False
def _extract_relinker_info(self, relinker_url, video_id):
formats = []
geoprotection = None
is_live = None
duration = None
for platform in ('mon', 'flash', 'native'):
relinker = self._download_xml(
@@ -27,9 +44,27 @@ class RaiBaseIE(InfoExtractor):
query={'output': 45, 'pl': platform},
headers=self.geo_verification_headers())
media_url = find_xpath_attr(relinker, './url', 'type', 'content').text
if not geoprotection:
geoprotection = xpath_text(
relinker, './geoprotection', default=None) == 'Y'
if not is_live:
is_live = xpath_text(
relinker, './is_live', default=None) == 'Y'
if not duration:
duration = parse_duration(xpath_text(
relinker, './duration', default=None))
url_elem = find_xpath_attr(relinker, './url', 'type', 'content')
if url_elem is None:
continue
media_url = url_elem.text
# This does not imply geo restriction (e.g.
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
if media_url == 'http://download.rai.it/video_no_available.mp4':
self.raise_geo_restricted()
continue
ext = determine_ext(media_url)
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
@@ -53,215 +88,333 @@ class RaiBaseIE(InfoExtractor):
'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http',
})
return formats
if not formats and geoprotection is True:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
def _extract_from_content_id(self, content_id, base_url):
return dict((k, v) for k, v in {
'is_live': is_live,
'duration': duration,
'formats': formats,
}.items() if v is not None)
@staticmethod
def _extract_subtitles(url, subtitle_url):
subtitles = {}
if subtitle_url and isinstance(subtitle_url, compat_str):
subtitle_url = urljoin(url, subtitle_url)
STL_EXT = '.stl'
SRT_EXT = '.srt'
subtitles['it'] = [{
'ext': 'stl',
'url': subtitle_url,
}]
if subtitle_url.endswith(STL_EXT):
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
subtitles['it'].append({
'ext': 'srt',
'url': srt_url,
})
return subtitles
class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
_TESTS = [{
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
'md5': '340aa3b7afb54bfd14a8c11786450d76',
'info_dict': {
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
'ext': 'mp4',
'title': 'La Casa Bianca',
'alt_title': 'S2016 - Puntata del 23/10/2016',
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 3',
'creator': 'Rai 3',
'duration': 3278,
'timestamp': 1477764300,
'upload_date': '20161029',
'series': 'La Casa Bianca',
'season': '2016',
},
}, {
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'mp4',
'title': 'Report del 07/04/2014',
'alt_title': 'S2013/14 - Puntata del 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 5',
'creator': 'Rai 5',
'duration': 6160,
'series': 'Report',
'season_number': 5,
'season': '2013/14',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url, video_id = mobj.group('url', 'id')
media = self._download_json(
'%s?json' % url, video_id, 'Downloading video JSON')
title = media['name']
video = media['video']
relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
self._sort_formats(relinker_info['formats'])
thumbnails = []
if 'images' in media:
for _, value in media.get('images').items():
if value:
thumbnails.append({
'url': value.replace('[RESOLUTION]', '600x400')
})
timestamp = unified_timestamp(try_get(
media, lambda x: x['availabilities'][0]['start'], compat_str))
subtitles = self._extract_subtitles(url, video.get('subtitles'))
info = {
'id': video_id,
'title': title,
'alt_title': media.get('subtitle'),
'description': media.get('description'),
'uploader': media.get('channel'),
'creator': media.get('editor'),
'duration': parse_duration(video.get('duration')),
'timestamp': timestamp,
'thumbnails': thumbnails,
'series': try_get(
media, lambda x: x['isPartOf']['name'], compat_str),
'season_number': int_or_none(try_get(
media, lambda x: x['isPartOf']['numeroStagioni'])),
'season': media.get('stagione') or None,
'subtitles': subtitles,
}
info.update(relinker_info)
return info
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{
# var uniquename = "ContentItem-..."
# data-id="ContentItem-..."
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'mp4',
'title': 'TG PRIMO TEMPO',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1758,
'upload_date': '20140612',
}
}, {
# with ContentItem in many metas
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
'info_dict': {
'id': '1632c009-c843-4836-bb65-80c33084a64b',
'ext': 'mp4',
'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"',
'description': 'I film in uscita questa settimana.',
'thumbnail': r're:^https?://.*\.png$',
'duration': 833,
'upload_date': '20161103',
}
}, {
# with ContentItem in og:url
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
'md5': '11959b4e44fa74de47011b5799490adf',
'info_dict': {
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
'ext': 'mp4',
'title': 'TG1 ore 20:00 del 03/11/2016',
'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2214,
'upload_date': '20161103',
}
}, {
# drawMediaRaiTV(...)
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20141221',
},
}, {
# initEdizione('ContentItem-...'
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
'info_dict': {
'id': 'c2187016-8484-4e3a-8ac8-35e475b07303',
'ext': 'mp4',
'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}',
'duration': 2274,
'upload_date': '20170401',
},
'skip': 'Changes daily',
}, {
# HDS live stream with only relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'params': {
'skip_download': True,
},
}, {
# HLS live stream with ContentItem in og:url
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
'info_dict': {
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
'ext': 'mp4',
'title': 'La diretta di Rainews24',
},
'params': {
'skip_download': True,
},
}]
def _extract_from_content_id(self, content_id, url):
media = self._download_json(
'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id,
content_id, 'Downloading video JSON')
title = media['name'].strip()
media_type = media['type']
if 'Audio' in media_type:
relinker_info = {
'formats': {
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
}
}
elif 'Video' in media_type:
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
else:
raise ExtractorError('not a media file')
self._sort_formats(relinker_info['formats'])
thumbnails = []
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
if thumbnail_url:
thumbnails.append({
'url': compat_urlparse.urljoin(base_url, thumbnail_url),
'url': compat_urlparse.urljoin(url, thumbnail_url),
})
formats = []
media_type = media['type']
if 'Audio' in media_type:
formats.append({
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
})
elif 'Video' in media_type:
formats.extend(self._extract_relinker_formats(media['mediaUri'], content_id))
self._sort_formats(formats)
else:
raise ExtractorError('not a media file')
subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
subtitles = {}
captions = media.get('subtitlesUrl')
if captions:
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': captions,
}]
return {
info = {
'id': content_id,
'title': media['name'],
'description': media.get('desc'),
'title': title,
'description': strip_or_none(media.get('desc')),
'thumbnails': thumbnails,
'uploader': media.get('author'),
'upload_date': unified_strdate(media.get('date')),
'duration': parse_duration(media.get('length')),
'formats': formats,
'subtitles': subtitles,
}
info.update(relinker_info)
class RaiTVIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+(?:media|ondemand)/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'mp4',
'title': 'Report del 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
'upload_date': '20140407',
'duration': 6160,
'thumbnail': r're:^https?://.*\.jpg$',
}
},
{
# no m3u8 stream
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
# HDS download, MD5 is unstable
'info_dict': {
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
'ext': 'flv',
'title': 'TG PRIMO TEMPO',
'upload_date': '20140612',
'duration': 1758,
'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Geo-restricted to Italy',
},
{
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
'md5': '35cf7c229f22eeef43e48b5cf923bef0',
'info_dict': {
'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13',
'ext': 'mp4',
'title': 'State of the Net, Antonella La Carpia: regole virali',
'description': 'md5:b0ba04a324126903e3da7763272ae63c',
'upload_date': '20140613',
},
'skip': 'Error 404',
},
{
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',
'info_dict': {
'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132',
'ext': 'mp4',
'title': 'Alluvione in Sardegna e dissesto idrogeologico',
'description': 'Edizione delle ore 20:30 ',
},
'skip': 'invalid urls',
},
{
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
'md5': 'e57493e1cb8bc7c564663f363b171847',
'info_dict': {
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
'ext': 'mp4',
'title': 'Il Candidato - Primo episodio: "Le Primarie"',
'description': 'md5:364b604f7db50594678f483353164fb8',
'upload_date': '20140923',
'duration': 386,
'thumbnail': r're:^https?://.*\.jpg$',
}
},
]
return info
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_from_content_id(video_id, url)
class RaiIE(RaiBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html'
_TESTS = [
{
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'upload_date': '20141221',
},
},
{
# Direct relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'skip': 'Geo-restricted to Italy',
},
{
# Embedded content item ID
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
'md5': '84c1135ce960e8822ae63cec34441d63',
'info_dict': {
'id': '0960e765-62c8-474a-ac4b-7eb3e2be39c8',
'ext': 'mp4',
'title': 'TG1 ore 20:00 del 02/07/2016',
'upload_date': '20160702',
},
},
{
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
# HDS live stream, MD5 is unstable
'info_dict': {
'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9',
'ext': 'flv',
'title': 'La diretta di Rainews24',
},
},
]
@classmethod
def suitable(cls, url):
return False if RaiTVIE.suitable(url) else super(RaiIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
iframe_url = self._search_regex(
[r'<iframe[^>]+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"',
r'drawMediaRaiTV\(["\'](.+?)["\']'],
webpage, 'iframe', default=None)
if iframe_url:
if not iframe_url.startswith('http'):
iframe_url = compat_urlparse.urljoin(url, iframe_url)
return self.url_result(iframe_url)
content_item_id = None
content_item_id = self._search_regex(
r'initEdizione\((?P<q1>[\'"])ContentItem-(?P<content_id>[^\'"]+)(?P=q1)',
webpage, 'content item ID', group='content_id', default=None)
content_item_url = self._html_search_meta(
('og:url', 'og:video', 'og:video:secure_url', 'twitter:url',
'twitter:player', 'jsonlink'), webpage, default=None)
if content_item_url:
content_item_id = self._search_regex(
r'ContentItem-(%s)' % self._UUID_RE, content_item_url,
'content item id', default=None)
if not content_item_id:
content_item_id = self._search_regex(
r'''(?x)
(?:
(?:initEdizione|drawMediaRaiTV)\(|
<(?:[^>]+\bdata-id|var\s+uniquename)=
)
(["\'])
(?:(?!\1).)*\bContentItem-(?P<id>%s)
''' % self._UUID_RE,
webpage, 'content item id', default=None, group='id')
content_item_ids = set()
if content_item_id:
return self._extract_from_content_id(content_item_id, url)
content_item_ids.add(content_item_id)
if video_id not in content_item_ids:
content_item_ids.add(video_id)
relinker_url = compat_urlparse.urljoin(url, self._search_regex(
r'(?:var\s+videoURL|mediaInfo\.mediaUri)\s*=\s*(?P<q1>[\'"])(?P<url>(https?:)?//mediapolis\.rai\.it/relinker/relinkerServlet\.htm\?cont=\d+)(?P=q1)',
webpage, 'relinker URL', group='url'))
formats = self._extract_relinker_formats(relinker_url, video_id)
self._sort_formats(formats)
for content_item_id in content_item_ids:
try:
return self._extract_from_content_id(content_item_id, url)
except GeoRestrictedError:
raise
except ExtractorError:
pass
relinker_url = self._search_regex(
r'''(?x)
(?:
var\s+videoURL|
mediaInfo\.mediaUri
)\s*=\s*
([\'"])
(?P<url>
(?:https?:)?
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
''',
webpage, 'relinker URL', group='url')
relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id)
self._sort_formats(relinker_info['formats'])
title = self._search_regex(
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
webpage, 'title', group='title', default=None) or self._og_search_title(webpage)
webpage, 'title', group='title',
default=None) or self._og_search_title(webpage)
return {
info = {
'id': video_id,
'title': title,
'formats': formats,
}
info.update(relinker_info)
return info

View File

@@ -13,15 +13,15 @@ from ..utils import (
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
'info_dict': {
'id': 'ford-lopatin-live-at-primavera-sound-2011',
'ext': 'mp3',
'title': 'Main Stage - Ford & Lopatin',
'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2452,
'timestamp': 1307103164,

View File

@@ -1,13 +1,26 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import int_or_none
from ..aes import aes_cbc_decrypt
from ..compat import (
compat_ord,
compat_str,
)
from ..utils import (
bytes_to_intlist,
ExtractorError,
intlist_to_bytes,
int_or_none,
strip_or_none,
)
class RTL2IE(InfoExtractor):
IE_NAME = 'rtl2'
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
_TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
@@ -98,3 +111,98 @@ class RTL2IE(InfoExtractor):
'duration': int_or_none(video_info.get('duration')),
'formats': formats,
}
class RTL2YouBaseIE(InfoExtractor):
_BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
class RTL2YouIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you'
_VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
'info_dict': {
'id': '15740',
'ext': 'mp4',
'title': 'MJUNIK Home of YOU - #307 Hirn, wo bist du?!',
'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
'age_limit': 12,
},
}, {
'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
'only_matching': True,
}]
_AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
_GEO_COUNTRIES = ['DE']
def _real_extract(self, url):
video_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
stream_url = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(data)),
bytes_to_intlist(self._AES_KEY),
bytes_to_intlist(base64.b64decode(iv))
))
if b'rtl2_you_video_not_found' in stream_url:
raise ExtractorError('video not found', expected=True)
formats = self._extract_m3u8_formats(
stream_url[:-compat_ord(stream_url[-1])].decode(),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
video_data = self._download_json(
self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
series = video_data.get('formatTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
return {
'id': video_id,
'title': title,
'formats': formats,
'description': strip_or_none(video_data.get('description')),
'thumbnail': video_data.get('image'),
'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
'series': series,
'episode': episode,
'age_limit': int_or_none(video_data.get('minimumAge')),
}
class RTL2YouSeriesIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you:series'
_VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
_TEST = {
'url': 'http://you.rtl2.de/videos/115/dragon-ball',
'info_dict': {
'id': '115',
},
'playlist_mincount': 5,
}
def _real_extract(self, url):
series_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'videos',
series_id, query={
'formatId': series_id,
'limit': 1000000000,
})
entries = []
for video in stream_data.get('videos', []):
video_id = compat_str(video['videoId'])
if not video_id:
continue
entries.append(self.url_result(
'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
'RTL2You', video_id))
return self.playlist_result(entries, series_id)

View File

@@ -0,0 +1,32 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
class TheSunIE(InfoExtractor):
_VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
'info_dict': {
'id': '2261604',
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
},
'playlist_count': 2,
}
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
entries = []
for ooyala_id in re.findall(
r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
webpage):
entries.append(OoyalaIE._build_url_result(ooyala_id))
return self.playlist_result(
entries, article_id, self._og_search_title(webpage, fatal=False))

View File

@@ -0,0 +1,79 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
extract_attributes,
get_element_by_class,
int_or_none,
parse_duration,
parse_iso8601,
)
class TV5MondePlusIE(InfoExtractor):
IE_DESC = 'TV5MONDE+'
_VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'md5': '12130fc199f020673138a83466542ec6',
'info_dict': {
'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'ext': 'mp4',
'title': 'Tdah, mon amour - Enfants',
'description': 'md5:230e3aca23115afcf8006d1bece6df74',
'upload_date': '20170401',
'timestamp': 1491022860,
}
}
_GEO_BYPASS = False
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
series = get_element_by_class('video-detail__title', webpage)
title = episode = get_element_by_class(
'video-detail__subtitle', webpage) or series
if series and series != title:
title = '%s - %s' % (series, title)
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
video_files = self._parse_json(
vpl_data['data-broadcast'], display_id).get('files', [])
formats = []
for video_file in video_files:
v_url = video_file.get('url')
if not v_url:
continue
video_format = video_file.get('format') or determine_ext(v_url)
if video_format == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, display_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': v_url,
'format_id': video_format,
})
self._sort_formats(formats)
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': clean_html(get_element_by_class('video-detail__description', webpage)),
'thumbnail': vpl_data.get('data-image'),
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -31,9 +31,8 @@ class TVNoeIE(InfoExtractor):
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
ifs_page = self._download_webpage(iframe_url, video_id)
jwplayer_data = self._parse_json(
self._find_jwplayer_data(ifs_page),
video_id, transform_source=js_to_json)
jwplayer_data = self._find_jwplayer_data(
ifs_page, video_id, transform_source=js_to_json)
info_dict = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=iframe_url)

View File

@@ -225,7 +225,11 @@ class TVPlayIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
geo_country = self._search_regex(
r'https?://[^/]+\.([a-z]{2})', url,
'geo country', default=None)
if geo_country:
self._initialize_geo_bypass([geo_country.upper()])
video = self._download_json(
'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')

View File

@@ -9,6 +9,7 @@ from .common import InfoExtractor
class VierIE(InfoExtractor):
IE_NAME = 'vier'
IE_DESC = 'vier.be and vijf.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',

View File

@@ -44,7 +44,7 @@ class ViuBaseIE(InfoExtractor):
class ViuIE(ViuBaseIE):
_VALID_URL = r'(?:viu:|https?://www\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
_VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
'info_dict': {
@@ -69,6 +69,9 @@ class ViuIE(ViuBaseIE):
'skip_download': 'm3u8 download',
},
'skip': 'Geo-restricted to Indonesia',
}, {
'url': 'https://india.viu.com/en/media/1126286865',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -432,8 +432,7 @@ class VKIE(VKBaseIE):
})
elif format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8' if is_live else 'm3u8_native',
format_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False, live=is_live))
elif format_id == 'rtmp':
formats.append({

View File

@@ -10,6 +10,7 @@ from ..utils import (
class VRTIE(InfoExtractor):
IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
_TESTS = [
# deredactie.be

191
youtube_dl/extractor/vrv.py Normal file
View File

@@ -0,0 +1,191 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import json
import hashlib
import hmac
import random
import string
import time
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
float_or_none,
int_or_none,
)
class VRVBaseIE(InfoExtractor):
_API_DOMAIN = None
_API_PARAMS = {}
_CMS_SIGNING = {}
def _call_api(self, path, video_id, note, data=None):
base_url = self._API_DOMAIN + '/core/' + path
encoded_query = compat_urllib_parse_urlencode({
'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
'oauth_signature_method': 'HMAC-SHA1',
'oauth_timestamp': int(time.time()),
'oauth_version': '1.0',
})
headers = self.geo_verification_headers()
if data:
data = json.dumps(data).encode()
headers['Content-Type'] = 'application/json'
method = 'POST' if data else 'GET'
base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
(self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
return self._download_json(
'?'.join([base_url, encoded_query]), video_id,
note='Downloading %s JSON metadata' % note, headers=headers, data=data)
def _call_cms(self, path, video_id, note):
if not self._CMS_SIGNING:
self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
return self._download_json(
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
def _set_api_params(self, webpage, video_id):
if not self._API_PARAMS:
self._API_PARAMS = self._parse_json(self._search_regex(
r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
webpage, 'api config'), video_id)['cxApiParams']
self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
def _get_cms_resource(self, resource_key, video_id):
return self._call_api(
'cms_resource', video_id, 'resource path', data={
'resource_key': resource_key,
})['__links__']['cms_resource']['href']
class VRVIE(VRVBaseIE):
IE_NAME = 'vrv'
_VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
_TEST = {
'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
'info_dict': {
'id': 'GR9PNZ396',
'ext': 'mp4',
'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
'uploader_id': 'seeso',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id,
headers=self.geo_verification_headers())
media_resource = self._parse_json(self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
video_data = media_resource.get('json')
if not video_data:
self._set_api_params(webpage, video_id)
episode_path = self._get_cms_resource(
'cms:/episodes/' + video_id, video_id)
video_data = self._call_cms(episode_path, video_id, 'video')
title = video_data['title']
streams_json = media_resource.get('streams', {}).get('json', {})
if not streams_json:
self._set_api_params(webpage, video_id)
streams_path = video_data['__links__']['streams']['href']
streams_json = self._call_cms(streams_path, video_id, 'streams')
audio_locale = streams_json.get('audio_locale')
formats = []
for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items():
stream_url = stream.get('url')
if not stream_url:
continue
stream_id = stream_id or audio_locale
m3u8_formats = self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id=stream_id,
note='Downloading %s m3u8 information' % stream_id,
fatal=False)
if audio_locale:
for f in m3u8_formats:
f['language'] = audio_locale
formats.extend(m3u8_formats)
self._sort_formats(formats)
thumbnails = []
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
thumbnail_url = thumbnail.get('source')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': video_data.get('description'),
'duration': float_or_none(video_data.get('duration_ms'), 1000),
'uploader_id': video_data.get('channel_id'),
'series': video_data.get('series_title'),
'season': video_data.get('season_title'),
'season_number': int_or_none(video_data.get('season_number')),
'season_id': video_data.get('season_id'),
'episode': title,
'episode_number': int_or_none(video_data.get('episode_number')),
'episode_id': video_data.get('production_episode_id'),
}
class VRVSeriesIE(VRVBaseIE):
IE_NAME = 'vrv:series'
_VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
_TEST = {
'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
'info_dict': {
'id': 'G68VXG3G6',
},
'playlist_mincount': 11,
}
def _real_extract(self, url):
series_id = self._match_id(url)
webpage = self._download_webpage(
url, series_id,
headers=self.geo_verification_headers())
self._set_api_params(webpage, series_id)
seasons_path = self._get_cms_resource(
'cms:/seasons?series_id=' + series_id, series_id)
seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
entries = []
for season in seasons_data.get('items', []):
episodes_path = season['__links__']['season/episodes']['href']
episodes = self._call_cms(episodes_path, series_id, 'episodes')
for episode in episodes.get('items', []):
episode_id = episode['id']
entries.append(self.url_result(
'https://vrv.co/watch/' + episode_id,
'VRV', episode_id, episode.get('title')))
return self.playlist_result(entries, series_id)

View File

@@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://vshare.io/d/0f64ce6',
'md5': '16d7b8fef58846db47419199ff1ab3e7',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
}
}, {
'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://vshare.io/d/%s' % video_id, video_id)
title = self._html_search_regex(
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
video_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
webpage, 'video url', group='url')
return {
'id': video_id,
'title': title,
'url': video_url,
}

View File

@@ -1,12 +1,10 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
_TESTS = [{
'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': '9d04de741161603bf7071bbf4e883186',
@@ -17,48 +15,26 @@ class WorldStarHipHopIE(InfoExtractor):
}
}, {
'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
'info_dict': {
'id': 'wshh6a7q1ny0G34ZwuIO',
'ext': 'mp4',
'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
}
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?', webpage)
if m_vevo_id is not None:
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
entries = self._parse_html5_media_entries(url, webpage, video_id)
video_url = self._search_regex(
[r'so\.addVariable\("file","(.*?)"\)',
r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
webpage, 'video URL')
if not entries:
return self.url_result(url, 'Generic')
if 'youtube' in video_url:
return self.url_result(video_url, ie='Youtube')
video_title = self._html_search_regex(
title = self._html_search_regex(
[r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
webpage, 'title')
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
thumbnail = self._html_search_regex(
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
default=None)
if not thumbnail:
_title = r'candytitles.*>(.*)</span>'
mobj = re.search(_title, webpage)
if mobj is not None:
video_title = mobj.group(1)
return {
info = entries[0]
info.update({
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': thumbnail,
}
'title': title,
})
return info

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
determine_ext,
ExtractorError,
int_or_none,
NO_DEFAULT,
@@ -26,6 +27,9 @@ class XFileShareIE(InfoExtractor):
('vidto.me', 'Vidto'),
('streamin.to', 'Streamin.To'),
('xvidstage.com', 'XVIDSTAGE'),
('vidabc.com', 'Vid ABC'),
('vidbom.com', 'VidBom'),
('vidlo.us', 'vidlo'),
)
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
@@ -95,6 +99,16 @@ class XFileShareIE(InfoExtractor):
# removed by administrator
'url': 'http://xvidstage.com/amfy7atlkx25',
'only_matching': True,
}, {
'url': 'http://vidabc.com/i8ybqscrphfv',
'info_dict': {
'id': 'i8ybqscrphfv',
'ext': 'mp4',
'title': 're:Beauty and the Beast 2017',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
@@ -133,31 +147,45 @@ class XFileShareIE(InfoExtractor):
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or video_id).strip()
def extract_video_url(default=NO_DEFAULT):
return self._search_regex(
(r'file\s*:\s*(["\'])(?P<url>http.+?)\1,',
r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'),
webpage, 'file url', default=default, group='url')
def extract_formats(default=NO_DEFAULT):
urls = []
for regex in (
r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
for mobj in re.finditer(regex, webpage):
video_url = mobj.group('url')
if video_url not in urls:
urls.append(video_url)
formats = []
for video_url in urls:
if determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
else:
formats.append({
'url': video_url,
'format_id': 'sd',
})
if not formats and default is not NO_DEFAULT:
return default
self._sort_formats(formats)
return formats
video_url = extract_video_url(default=None)
formats = extract_formats(default=None)
if not video_url:
if not formats:
webpage = decode_packed_codes(self._search_regex(
r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
webpage, 'packed code'))
video_url = extract_video_url()
formats = extract_formats()
thumbnail = self._search_regex(
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
formats = [{
'format_id': 'sd',
'url': video_url,
'quality': 1,
}]
return {
'id': video_id,
'title': title,

View File

@@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
def _set_language(self):
self._set_cookie(
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
@@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
)? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
(?!.*?\blist=
(?:
%(playlist_id)s| # combined list/video URLs are handled by the playlist IE
WL # WL are handled by the watch later IE
)
)
(?(1).+)? # if we found the ID, everything can follow
$"""
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@@ -924,6 +931,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': 'sJL6WA-aGkQ',
'only_matching': True,
},
{
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
'only_matching': True,
},
]
def __init__(self, *args, **kwargs):
@@ -1864,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
)
.*
|
((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
)"""
(%(playlist_id)s)
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
IE_NAME = 'youtube:playlist'

View File

@@ -459,11 +459,11 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'--fragment-retries',
dest='fragment_retries', metavar='RETRIES', default=10,
help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)')
help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
downloader.add_option(
'--skip-unavailable-fragments',
action='store_true', dest='skip_unavailable_fragments', default=True,
help='Skip unavailable fragments (DASH and hlsnative only)')
help='Skip unavailable fragments (DASH, hlsnative and ISM)')
downloader.add_option(
'--abort-on-unavailable-fragment',
action='store_false', dest='skip_unavailable_fragments',

View File

@@ -39,6 +39,7 @@ from .compat import (
compat_basestring,
compat_chr,
compat_etree_fromstring,
compat_expanduser,
compat_html_entities,
compat_html_entities_html5,
compat_http_client,
@@ -539,6 +540,11 @@ def sanitized_Request(url, *args, **kwargs):
return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
def expand_path(s):
"""Expand shell variables and ~"""
return os.path.expandvars(compat_expanduser(s))
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.03.22'
__version__ = '2017.04.09'