Compare commits

..

132 Commits

Author SHA1 Message Date
Sergey M․
6d0630d880 release 2017.10.29 2017-10-29 07:22:53 +07:00
Sergey M․
518d357b46 [ChangeLog] Actualize 2017-10-29 07:21:33 +07:00
Sergey M․
514e8aefd4 [egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00
Sergey M․
9211e3319e [extractor/common] Prefix format id for audio only HLS formats 2017-10-29 07:05:55 +07:00
Sergey M․
056653bbb1 [utils] Add support for zero years and months in parse_duration 2017-10-29 07:04:48 +07:00
enigmaquip
c3206d02e9 [fxnetworks] Extract series metadata 2017-10-29 05:20:18 +07:00
Sergey M․
eb4b5818e2 [younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436) 2017-10-29 04:18:43 +07:00
Andrew Udvare
47a8587915 [younow] Add extractor 2017-10-29 04:17:03 +07:00
Sergey M․
8e01f3ca81 [dctptv] Fix extraction (closes #14599) 2017-10-28 22:58:01 +07:00
Sergey M․
f2332f18e6 [youtube] Restrict embed regex (#14600) 2017-10-27 22:26:43 +07:00
Sergey M․
7c1f419341 [vimeo] Restrict iframe embed regex (closes #14600) 2017-10-27 22:21:47 +07:00
Sergey M․
30e6161799 [soundgasm] Improve extraction (closes #14588) 2017-10-26 23:16:16 +07:00
Alex Seiler
dc24a7d4a2 [myvideo] Remove extractor (closes #8557)
Redirects to store.maxdome.de
2017-10-25 23:27:55 +07:00
Logan B
d673ab6562 [nbc] Add support for classic-tv videos 2017-10-25 23:23:27 +07:00
Sergey M․
b8c6ffc518 [vrtnu] Add support for cookies authentication and simplify (#11873) 2017-10-25 23:21:51 +07:00
mrBliss
7913e0fca7 [canvas] Add support for vrt.be/vrtnu (closes #11873) 2017-10-25 23:17:28 +07:00
J.D. Purcell
cdd1ce92c4 [twitch:clips] Fix title extraction 2017-10-23 23:12:50 +07:00
rawcoder
55c727a547 [ndtv] Add support for sub-sites 2017-10-22 08:32:20 +07:00
Sergey M․
36e2d3ca43 [dramafever] Fix login error message extraction 2017-10-22 08:16:30 +07:00
Sergey M․
f7a5038305 [travis] Disable IRC notifications 2017-10-22 02:46:28 +07:00
Sergey M․
9ff6273cae [nickru] Add support for more sites 2017-10-22 01:51:01 +07:00
Sergey M․
f03ee0b372 [nickde] Add support for nickelodeon.be 2017-10-22 01:42:44 +07:00
Sergey M․
cf6bda312b [nickde] Add support for nick.ch 2017-10-22 01:30:35 +07:00
Alex Seiler
3ebbd9991e [nick] Add support for more nickelodeon sites (closes #14553) 2017-10-22 01:26:58 +07:00
Sergey M․
21ce434051 [travis] Enable IRC notifications
Let's see how is it verbose now
2017-10-21 02:14:25 +07:00
Sergey M․
5c0e5bc4df [README.md] Add build status bagde 2017-10-21 02:11:11 +07:00
Sergey M․
9a9de2d7b2 [travis] Allow download tests to fail and fast finish 2017-10-21 01:58:45 +07:00
Alex Seiler
424505df76 [azmedien] Fix test 2017-10-21 01:10:56 +07:00
Sergey M․
fa3f0fd856 release 2017.10.20 2017-10-20 23:40:25 +07:00
Sergey M․
c9dcd4b0c5 [ChangeLog] Actualize 2017-10-20 23:37:55 +07:00
Alex Seiler
fc5c47d13c [parliamentliveuk] Fix extraction (closes #14524) 2017-10-20 23:31:13 +07:00
Sergey M․
a26a3c6d34 [soundcloud] Update client id (closes #14546) 2017-10-20 21:43:34 +07:00
Felix Yan
382fa456ea [ChangeLog] Fix typo 2017-10-19 23:36:32 +07:00
Alex Seiler
e1d168e592 [servus] Add extractor (closes #14362) 2017-10-19 22:17:20 +07:00
Parmjit Virk
ca1c9f26fa [unity] Add extractor (fixes #14528) 2017-10-19 04:46:06 +07:00
Sergey M․
6f3b4a98c9 [downloader/fragment] Report warning instead of error on inconsistent download state 2017-10-17 22:53:34 +07:00
Remita Amine
fa4bc6e712 [youtube] replace youtube redirect urls in description(fixes #14517) 2017-10-17 10:07:37 +00:00
Remita Amine
6b9cbd023f [pbs] restrict direct video url regex(fixes #14519) 2017-10-17 09:23:11 +00:00
Yen Chi Hsuan
c233003afe [megaphone] Fix deprecated escape sequence 2017-10-17 15:39:06 +08:00
Sergey M․
83fcf19e2d [drtv] Respect preference for direct http formats (#14509) 2017-10-16 05:48:45 +07:00
Sergey M․
acc4ea6237 [eporner] Add support for embed URLs (closes #14507) 2017-10-16 05:11:25 +07:00
Sergey M․
8cc1840ccb [arte] Capture and output error message 2017-10-15 22:12:34 +07:00
Sergey M․
a9ee4f6e49 [downloader/hls] Fix total fragments count when ad fragments exist 2017-10-15 11:03:54 +07:00
Pawit Pornkitprasan
aaab8c5e71 [niconico] Improve uploader metadata extraction robustness (closes #14135) 2017-10-15 10:40:57 +07:00
Sergey M․
7e721e35da release 2017.10.15.1 2017-10-15 06:16:41 +07:00
Sergey M․
bd7e1406b3 [ChangeLog] Actualize 2017-10-15 06:15:37 +07:00
Sergey M․
74c42d9ec3 [downloader/hls] Ignore anvato ad fragments (closes #14496) 2017-10-15 06:13:48 +07:00
Sergey M․
5efaf43c93 [downloader/fragment] Output ad fragment count 2017-10-15 06:13:07 +07:00
Sergey M․
4827270526 [scrippsnetworks:watch] Bypass geo restriction 2017-10-15 06:11:35 +07:00
Sergey M․
ee093a0ea0 [anvato] Add ability to bypass geo restriction 2017-10-15 06:11:02 +07:00
Sergey M․
9bb2c7673e [redditr] Fix extraction for URLs with query (closes #14495) 2017-10-15 03:38:34 +07:00
Sergey M․
715534083d release 2017.10.15 2017-10-15 02:26:58 +07:00
Sergey M․
ee88c1cbc6 [ChangeLog] Actualize 2017-10-15 02:26:10 +07:00
Sergey M․
57eb45b111 [scrippsnetworks:watch] Add support for geniuskitchen.com 2017-10-15 02:01:16 +07:00
Sergey M․
b21ab85088 [scrippsnetworks:watch] Fix extraction (closes #14389) 2017-10-15 01:57:43 +07:00
Sergey M․
210a2720bc [anvato] Process master m3u8 manifests
>>> Individual m3u8 manifests are not always present, e.g. anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:4173834
2017-10-15 01:44:57 +07:00
Sergey M․
685e87b61f [youtube] Fix relative URLs in description 2017-10-14 20:26:52 +07:00
Remita Amine
c9bd503e7d [spike] bypass geo restriction 2017-10-13 08:41:57 +00:00
Remita Amine
94a530c6cb [howstuffworks] add support for more domains 2017-10-12 19:03:47 +00:00
Remita Amine
e650659b94 [infoq] fix http format downloading 2017-10-12 17:39:51 +00:00
Remita Amine
2637fadc38 [generic] fix some of the tests 2017-10-12 16:14:43 +00:00
Remita Amine
50d808f5c9 [common] add support for jwplayer youtube embeds 2017-10-12 16:12:47 +00:00
Remita Amine
7a64c33aee [rtlnl] add support for another type of embeds 2017-10-12 16:09:06 +00:00
Remita Amine
b0def2c297 [onionstudios] add support for bulbs-video embeds 2017-10-12 16:05:25 +00:00
Remita Amine
81ce479f4d [udn] fix extraction 2017-10-12 16:04:41 +00:00
Remita Amine
414e709405 [shahid] fix extraction(fixes #14448) 2017-10-12 09:20:39 +00:00
Yen Chi Hsuan
645ed3e7c9 [ChangeLog] Update after #14471
[skip ci]
2017-10-12 12:12:37 +08:00
nyuszika7h
c0bddd6d65 [kaltura] Ignore Widevine encrypted video (.wvm)
There is currently no public method to decrypt this, and there may be
other streams available that can be downloaded.

Example URL, has `.wvm` and `.mp4` formats:
https://www.voot.com/shows/bigg-boss-s11/11/538936/bigg-boss-extra-dose-arshi-s-quirky-demand/541700
2017-10-12 12:09:58 +08:00
Yen Chi Hsuan
1baba7f4a8 [vh1] Adding coding cookie 2017-10-12 12:02:26 +08:00
Remita Amine
344d1a6794 [vh1] fix extraction(fixes #9613) 2017-10-11 20:52:14 +00:00
Sergey M․
76581082f6 release 2017.10.12 2017-10-12 01:06:28 +07:00
Sergey M․
2f0eb0a68a [ChangeLog] Actualize 2017-10-12 01:05:14 +07:00
Remita Amine
7fee3377dc [steam] fix extraction(fixes #14067) 2017-10-11 17:50:08 +00:00
Sergey M․
ff3f1a62f0 [funk] Add extractor (closes #14464) 2017-10-12 00:44:13 +07:00
Sergey M․
694b61545c [nexx] Add support for shortcuts and relax domain id extraction 2017-10-12 00:41:20 +07:00
Sergey M․
af0f74288d [YoutubeDL] Improve _default_format_spec (closes #14461) 2017-10-11 23:48:05 +07:00
Remita Amine
9e38dbb19c [voxmedia] add support for recode.net(fixes #14173) 2017-10-11 15:50:20 +00:00
Remita Amine
782195a9d4 [once] add support for vmap urls 2017-10-11 15:50:20 +00:00
Sergey M․
26bae2d965 [generic] Add support for channel9 embeds (closes #14469) 2017-10-11 21:59:30 +07:00
Remita Amine
5fe75f976f [tva] fix extraction(fixes #14328) 2017-10-11 14:15:52 +00:00
Remita Amine
4fe4bda287 [tubitv] add support for new url format(fixes #14460) 2017-10-11 11:36:05 +00:00
Remita Amine
cdab1df912 [afreecatv] remove AfreecaTVGlobalIE
the website now show this message
> Global AfreecaTV will be merged and integrated on July 20th, 2017.
Every user around the world are now able to interact with one another on
www.afreecatv.com!
2017-10-11 10:04:46 +00:00
Yen Chi Hsuan
dfc80bdd2e [ChangeLog] Update after #14420 2017-10-11 02:03:00 +08:00
Khang Nguyen
04af3aca04 Remove YoutubeSharedVideoIE https://github.com/rg3/youtube-dl/issues/14303 2017-10-11 02:01:18 +08:00
Jakub Wilk
d0f2d64114 [slideslive] Add extractor (closes #2680) 2017-10-10 23:45:10 +07:00
Yen Chi Hsuan
01c742ecd0 [facebook] Support thumbnails (closes #14416) 2017-10-10 23:20:38 +08:00
Silvan Mosberger
9e71f88105 [vvvvid] Fix typo 2017-10-10 03:48:26 +07:00
Sergey M․
ae5af89079 [hrti:playlist] Relax _VALID_URL 2017-10-09 23:52:39 +07:00
Sergey M․
197224b7a4 Fix some regexes 2017-10-09 23:50:53 +07:00
Sergey M․
8992331621 [wdr] Relax media link regex (closes #14447) 2017-10-08 21:36:50 +07:00
Aleksandar Topuzović
b0dde6686c [hrti] Relax _VALID_URL 2017-10-08 05:40:08 +07:00
Sergey M․
a22ccac1f0 [fox] Delegate to uplynk:preplay (#14147) 2017-10-08 01:34:17 +07:00
Sergey M․
8b561bfc9d [youtube] Add support for hooktube.com (closes #14437) 2017-10-07 21:59:04 +07:00
Sergey M․
8e751a185c release 2017.10.07 2017-10-07 05:02:53 +07:00
Sergey M․
3fc8f5b7c2 [ChangeLog] Actualize 2017-10-07 05:01:38 +07:00
Sergey M․
665f42d8c1 [reddit] Sort formats (closes #14430) 2017-10-07 01:40:00 +07:00
Sergey M․
e952847541 [PULL_REQUEST_TEMPLATE.md] Add explicit entry on flake8 2017-10-07 00:58:19 +07:00
remis
b1a7bf44b9 [lnkgo] Relax _VALID_URL 2017-10-06 23:59:09 +07:00
Jalaz Kumar
2e2a8e97d5 [pornflip] Extend _VALID_URL (closes #14405) 2017-10-06 23:56:31 +07:00
Sergey M․
ac93c09ab2 [xtube] Add support for embedded URLs (closes #14417) 2017-10-06 23:53:32 +07:00
Sergey M․
cd6fc19ed7 [YoutubeDL] Ignore duplicates in --playlist-items
E.g. '--playlist-items 2-4,3-4,3' should result in '[2,3,4]', not '[2,3,4,3,4,3]'
2017-10-06 23:50:34 +07:00
Sergey M․
86a15ed64b [test_YoutubeDL] Add test for #14425 2017-10-06 23:41:28 +07:00
Sergey M․
7e85e8729f [YoutubeDL] Fix out of range --playlist-items for iterable playlists and reduce code duplication (closes #14425) 2017-10-06 23:34:46 +07:00
Sergey M․
6be08ce602 [utils] Use in OnDemandPagedList by default
Not using cache results in redundant network I/O due to downloading the same pages while using --playlist-items n-m
2017-10-06 23:13:53 +07:00
Sergey M․
cf5f6ed5be [xvideos] Add support for embed URLs and improve extraction (closes #14409) 2017-10-05 00:27:24 +07:00
Philipp Hagemeister
6b46285e85 [comedycentral] new shortcut :theopposition for "The Opposition" show 2017-10-04 07:45:13 +02:00
Sergey M․
6e736d86e7 [beeg] Fix extraction (closes #14403) 2017-10-04 04:27:42 +07:00
M.K
c110944fa2 [extractor/common] Fix typo in _parse_mpd_formats 2017-10-04 03:50:27 +07:00
Sergey M․
9524dca3ac [README.md] Use revision bound link to YoutubeDL options (closes #14401) 2017-10-04 02:53:20 +07:00
Jakub Wilk
3e4cedf9e8 [tvn24] Relax _VALID_URL 2017-10-03 23:28:13 +07:00
remitamine
bfd484ccff Merge pull request #14392 from snipem/nbc-fix
Fix for JSON meta data download(closes #13651)
2017-10-03 14:49:55 +00:00
Matthias Küch
b7e14f06a4 Fix for JSON meta data download
Added fixes according to #13651 and user @remitamine
2017-10-03 15:17:28 +02:00
Sergey M․
d2ae7e24e5 [postprocessor/ffmpeg] Convert to opus using libopus (closes #14381) 2017-10-02 04:43:25 +07:00
Sergey M․
544ffb7790 [ketnet] Add support for videos without direct sources (closes #14377) 2017-10-02 04:15:12 +07:00
Sergey M․
117589dfa2 [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een 2017-10-02 04:14:36 +07:00
Sergey M․
839728f5bf [afreecatv] Add support for adult videos (closes #14376) 2017-10-02 03:28:25 +07:00
Sergey M․
fcdd37d053 release 2017.10.01 2017-10-01 21:54:11 +07:00
Sergey M․
1dd126180e [ChangeLog] Actualize 2017-10-01 21:45:56 +07:00
Rafal Borczuch
4e599194d6 [tvp] Add support for new URL schema (closes #14368) 2017-10-01 18:59:00 +07:00
Sergey M․
c5b7014a9c [generic] Add support for single format Video.js embeds (closes #14371) 2017-10-01 07:01:42 +07:00
Sergey M․
c8da40d834 [yahoo] Bypass geo restriction for brightcove (#14210) 2017-10-01 04:49:27 +07:00
Sergey M․
b69ca0ccfc [yahoo] Use extracted brightcove account id (closes #14210) 2017-10-01 04:37:42 +07:00
Giuseppe Fabiano
2c53bd51c6 [rtve:alacarta] Fix extraction (closes #14290) 2017-10-01 03:21:17 +07:00
Sergey M․
3836b02ce8 [YoutubeDL] PEP 8 2017-09-30 22:56:40 +07:00
Sergey M․
fa3fdeb41f [yahoo] Fix some tests 2017-09-30 22:54:22 +07:00
Sergey M․
eb9a15be60 [yahoo] Add support for custom brigthcove embeds (closes #14210) 2017-09-30 22:47:03 +07:00
Sergey M․
3600fd591d [YoutubeDL] Document youtube_include_dash_manifest 2017-09-28 00:46:48 +07:00
Sergey M․
63d990d285 [generic] Add support for Video.js embeds 2017-09-28 00:37:30 +07:00
Timendum
b14b2283a0 [gfycat] Add support for /gifs/detail URLs (closes #14322) 2017-09-27 22:48:47 +07:00
Sergey M․
02d01e15f1 [generic] Fix infinite recursion for twitter:player URLs (closes #14339) 2017-09-26 21:47:18 +07:00
Sergey M․
db96252831 [xhamsterembed] Fix extraction (closes #14308) 2017-09-24 19:23:08 +07:00
Sergey M․
8b389f7e3c Credit the author of multiple generic HTML5 embeds fix 2017-09-24 18:21:38 +07:00
113 changed files with 2163 additions and 1023 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.24*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.24**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.29**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.09.24
[debug] youtube-dl version 2017.10.29
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -9,6 +9,7 @@
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)

View File

@@ -11,12 +11,12 @@ sudo: false
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
matrix:
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
script: ./devscripts/run_tests.sh
notifications:
email:
- filippo.valsorda@gmail.com
- yasoob.khld@gmail.com
# irc:
# channels:
# - "irc.freenode.org#youtube-dl"
# skip_join: true

View File

@@ -230,3 +230,4 @@ Ashutosh Chaudhary
John Dong
Tatsuyuki Ishi
Daniel Weber
Kay Bouché

147
ChangeLog
View File

@@ -1,3 +1,148 @@
version 2017.10.29
Core
* [extractor/common] Prefix format id for audio only HLS formats
+ [utils] Add support for zero years and months in parse_duration
Extractors
* [egghead] Fix extraction (#14388)
+ [fxnetworks] Extract series metadata (#14603)
+ [younow] Add support for younow.com (#9255, #9432, #12436)
* [dctptv] Fix extraction (#14599)
* [youtube] Restrict embed regex (#14600)
* [vimeo] Restrict iframe embed regex (#14600)
* [soundgasm] Improve extraction (#14588)
- [myvideo] Remove extractor (#8557)
+ [nbc] Add support for classic-tv videos (#14575)
+ [vrtnu] Add support for cookies authentication and simplify (#11873)
+ [canvas] Add support for vrt.be/vrtnu (#11873)
* [twitch:clips] Fix title extraction (#14566)
+ [ndtv] Add support for sub-sites (#14534)
* [dramafever] Fix login error message extraction
+ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
ro, hu) (#14553)
version 2017.10.20
Core
* [downloader/fragment] Report warning instead of error on inconsistent
download state
* [downloader/hls] Fix total fragments count when ad fragments exist
Extractors
* [parliamentliveuk] Fix extraction (#14524)
* [soundcloud] Update client id (#14546)
+ [servus] Add support for servus.com (#14362)
+ [unity] Add support for unity3d.com (#14528)
* [youtube] Replace youtube redirect URLs in description (#14517)
* [pbs] Restrict direct video URL regular expression (#14519)
* [drtv] Respect preference for direct HTTP formats (#14509)
+ [eporner] Add support for embed URLs (#14507)
* [arte] Capture and output error message
* [niconico] Improve uploader metadata extraction robustness (#14135)
version 2017.10.15.1
Core
* [downloader/hls] Ignore anvato ad fragments (#14496)
* [downloader/fragment] Output ad fragment count
Extractors
* [scrippsnetworks:watch] Bypass geo restriction
+ [anvato] Add ability to bypass geo restriction
* [redditr] Fix extraction for URLs with query (#14495)
version 2017.10.15
Core
+ [common] Add support for jwplayer youtube embeds
Extractors
* [scrippsnetworks:watch] Fix extraction (#14389)
* [anvato] Process master m3u8 manifests
* [youtube] Fix relative URLs in description
* [spike] Bypass geo restriction
+ [howstuffworks] Add support for more domains
* [infoq] Fix http format downloading
+ [rtlnl] Add support for another type of embeds
+ [onionstudios] Add support for bulbs-video embeds
* [udn] Fix extraction
* [shahid] Fix extraction (#14448)
* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
* [vh1] Fix extraction (#9613)
version 2017.10.12
Core
* [YoutubeDL] Improve _default_format_spec (#14461)
Extractors
* [steam] Fix extraction (#14067)
+ [funk] Add support for funk.net (#14464)
+ [nexx] Add support for shortcuts and relax domain id extraction
+ [voxmedia] Add support for recode.net (#14173)
+ [once] Add support for vmap URLs
+ [generic] Add support for channel9 embeds (#14469)
* [tva] Fix extraction (#14328)
+ [tubitv] Add support for new URL format (#14460)
- [afreecatv:global] Remove extractor
- [youtube:shared] Removed extractor (#14420)
+ [slideslive] Add support for slideslive.com (#2680)
+ [facebook] Support thumbnails (#14416)
* [vvvvid] Fix episode number extraction (#14456)
* [hrti:playlist] Relax URL regular expression
* [wdr] Relax media link regular expression (#14447)
* [hrti] Relax URL regular expression (#14443)
* [fox] Delegate extraction to uplynk:preplay (#14147)
+ [youtube] Add support for hooktube.com (#14437)
version 2017.10.07
Core
* [YoutubeDL] Ignore duplicates in --playlist-items
* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
reduce code duplication (#14425)
+ [utils] Use cache in OnDemandPagedList by default
* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
Extractors
* [reddit] Sort formats (#14430)
* [lnkgo] Relax URL regular expression (#14423)
* [pornflip] Extend URL regular expression (#14405, #14406)
+ [xtube] Add support for embed URLs (#14417)
+ [xvideos] Add support for embed URLs and improve extraction (#14409)
* [beeg] Fix extraction (#14403)
* [tvn24] Relax URL regular expression (#14395)
* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
#14392, #14414, #14419, #14431)
+ [ketnet] Add support for videos without direct sources (#14377)
* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
+ [afreecatv] Add support for adult videos (#14376)
version 2017.10.01
Core
* [YoutubeDL] Document youtube_include_dash_manifest
Extractors
+ [tvp] Add support for new URL schema (#14368)
+ [generic] Add support for single format Video.js embeds (#14371)
* [yahoo] Bypass geo restriction for brightcove (#14210)
* [yahoo] Use extracted brightcove account id (#14210)
* [rtve:alacarta] Fix extraction (#14290)
+ [yahoo] Add support for custom brigthcove embeds (#14210)
+ [generic] Add support for Video.js embeds
+ [gfycat] Add support for /gifs/detail URLs (#14322)
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
* [xhamsterembed] Fix extraction (#14308)
version 2017.09.24
Core
@@ -734,7 +879,7 @@ version 2017.04.14
Core
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
+ [adobepass] Improve Comcast and Verison login code (#10803)
+ [adobepass] Improve Comcast and Verizon login code (#10803)
+ [adobepass] Add support for Verizon (#10803)
Extractors

View File

@@ -1,3 +1,5 @@
[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms
- [INSTALLATION](#installation)
@@ -1167,7 +1169,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:

View File

@@ -36,7 +36,6 @@
- **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
- **afreecatv**: afreecatv.com
- **afreecatv:global**: afreecatv.com
- **AirMozilla**
- **AliExpressLive**
- **AlJazeera**
@@ -130,7 +129,8 @@
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas**: canvas.be and een.be
- **Canvas**
- **CanvasEen**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
@@ -295,6 +295,7 @@
- **freespeech.org**
- **FreshLive**
- **Funimation**
- **Funk**
- **FunnyOrDie**
- **Fusion**
- **Fux**
@@ -497,7 +498,6 @@
- **MySpace:album**
- **MySpass**
- **Myvi**
- **myvideo** (Currently broken)
- **MyVidster**
- **n-tv.de**
- **natgeo**
@@ -727,6 +727,7 @@
- **SenateISVP**
- **SendtoNews**
- **ServingSys**
- **Servus**
- **Sexu**
- **Shahid**
- **Shared**: shared.sx
@@ -737,6 +738,7 @@
- **skynewsarabia:video**
- **SkySports**
- **Slideshare**
- **SlidesLive**
- **Slutload**
- **smotri**: Smotri.com
- **smotri:broadcast**: Smotri.com broadcasts
@@ -885,6 +887,7 @@
- **UDNEmbed**: 聯合影音
- **UKTVPlay**
- **Unistra**
- **Unity**
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
@@ -968,10 +971,12 @@
- **VoiceRepublic**
- **Voot**
- **VoxMedia**
- **VoxMediaVolume**
- **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
@@ -1030,6 +1035,9 @@
- **YouJizz**
- **youku**: 优酷
- **youku:show**
- **YouNowChannel**
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com
@@ -1043,7 +1051,6 @@
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)

View File

@@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({'is_live': True})
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({'simulate': True, 'is_live': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best')
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
class TestYoutubeDL(unittest.TestCase):
@@ -770,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '10'})
self.assertEqual(result, [])
result = get_ids({'playlist_items': '3-10'})
self.assertEqual(result, [3, 4])
result = get_ids({'playlist_items': '2-4,3-4,3'})
self.assertEqual(result, [2, 3, 4])
def test_urlopen_no_file_protocol(self):
# see https://github.com/rg3/youtube-dl/issues/8227
ydl = YDL()

View File

@@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self):
self.assertEqual(

View File

@@ -65,6 +65,7 @@ from .utils import (
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
orderedSet,
PagedList,
parse_filesize,
PerRequestProxyHandler,
@@ -304,6 +305,12 @@ class YoutubeDL(object):
otherwise prefer avconv.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
The following options are used by the Youtube extractor:
youtube_include_dash_manifest: If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH.
"""
_NUMERIC_FIELDS = set((
@@ -902,15 +909,25 @@ class YoutubeDL(object):
yield int(item)
else:
yield int(string_segment)
playlistitems = iter_playlistitems(playlistitems_str)
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries']
def make_playlistitems_entries(list_ie_entries):
num_entries = len(list_ie_entries)
return [
list_ie_entries[i - 1] for i in playlistitems
if -num_entries <= i - 1 < num_entries]
def report_download(num_entries):
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, num_entries))
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
if playlistitems:
entries = [
ie_entries[i - 1] for i in playlistitems
if -n_all_entries <= i - 1 < n_all_entries]
entries = make_playlistitems_entries(ie_entries)
else:
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
@@ -928,20 +945,15 @@ class YoutubeDL(object):
entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
else: # iterable
if playlistitems:
entry_list = list(ie_entries)
entries = [entry_list[i - 1] for i in playlistitems]
entries = make_playlistitems_entries(list(ie_entries))
else:
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
if self.params.get('playlistreverse', False):
entries = entries[::-1]
@@ -1066,22 +1078,27 @@ class YoutubeDL(object):
return _filter
def _default_format_spec(self, info_dict, download=True):
req_format_list = []
def can_have_partial_formats():
if self.params.get('simulate', False):
return True
if not download:
return True
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return False
if info_dict.get('is_live'):
return False
def can_merge():
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
if can_have_partial_formats():
req_format_list.append('bestvideo+bestaudio')
req_format_list.append('best')
def prefer_best():
if self.params.get('simulate', False):
return False
if not download:
return False
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return True
if info_dict.get('is_live'):
return True
if not can_merge():
return True
return False
req_format_list = ['bestvideo+bestaudio', 'best']
if prefer_best():
req_format_list.reverse()
return '/'.join(req_format_list)
def build_format_selector(self, format_spec):

View File

@@ -117,9 +117,15 @@ class FragmentFD(FileDownloader):
def _prepare_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(
'[%s] Total fragments: %s'
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.report_destination(ctx['filename'])
dl = HttpQuietDownloader(
self.ydl,
@@ -152,7 +158,7 @@ class FragmentFD(FileDownloader):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
if ctx['fragment_index'] > 0 and resume_len == 0:
self.report_error(
self.report_warning(
'Inconsistent state of incomplete fragment download. '
'Restarting from the beginning...')
ctx['fragment_index'] = resume_len = 0

View File

@@ -75,15 +75,30 @@ class HlsFD(FragmentFD):
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
total_frags = 0
def anvato_ad(s):
return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
media_frags = 0
ad_frags = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line and not line.startswith('#'):
total_frags += 1
if not line:
continue
if line.startswith('#'):
if anvato_ad(line):
ad_frags += 1
ad_frag_next = True
continue
if ad_frag_next:
ad_frag_next = False
continue
media_frags += 1
ctx = {
'filename': filename,
'total_frags': total_frags,
'total_frags': media_frags,
'ad_frags': ad_frags,
}
self._prepare_and_start_frag_download(ctx)
@@ -101,10 +116,14 @@ class HlsFD(FragmentFD):
decrypt_info = {'METHOD': 'NONE'}
byte_range = {}
frag_index = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line:
if not line.startswith('#'):
if ad_frag_next:
ad_frag_next = False
continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
@@ -175,6 +194,8 @@ class HlsFD(FragmentFD):
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
elif anvato_ad(line):
ad_frag_next = True
self._finish_frag_download(ctx)

View File

@@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE):
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]

View File

@@ -138,6 +138,23 @@ class AfreecaTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# adult video
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
'info_dict': {
'id': '20171001_F1AE1711_196617479_1',
'ext': 'mp4',
'title': '[생]서아 초심 찾기 방송 (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'BJ서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20171001',
'duration': 3600,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
@@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor):
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, query={'nTitleNo': video_id})
video_id, query={
'nTitleNo': video_id,
'partialView': 'SKIP_ADULT',
})
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag != 'SUCCEED':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True)
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
if video_element is None or video_element.text is None:
@@ -246,107 +271,3 @@ class AfreecaTVIE(InfoExtractor):
})
return info
class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global'
_VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
_TESTS = [{
'url': 'http://afreeca.tv/36853014/v/58301',
'info_dict': {
'id': '58301',
'title': 'tryhard top100',
'uploader_id': '36853014',
'uploader': 'makgi Hearthstone Live!',
},
'playlist_count': 3,
}]
def _real_extract(self, url):
channel_id, video_id = re.match(self._VALID_URL, url).groups()
video_type = 'video' if video_id else 'live'
query = {
'pt': 'view',
'bid': channel_id,
}
if video_id:
query['vno'] = video_id
video_data = self._download_json(
'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
video_id or channel_id, query=query)['channel']
if video_data.get('result') != 1:
raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
title = video_data['title']
info = {
'thumbnail': video_data.get('thumb'),
'view_count': int_or_none(video_data.get('vcnt')),
'age_limit': int_or_none(video_data.get('grade')),
'uploader_id': channel_id,
'uploader': video_data.get('cname'),
}
if video_id:
entries = []
for i, f in enumerate(video_data.get('flist', [])):
video_key = self.parse_video_key(f.get('key', ''))
f_url = f.get('file')
if not video_key or not f_url:
continue
entries.append({
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
'title': title,
'upload_date': video_key.get('upload_date'),
'duration': int_or_none(f.get('length')),
'url': f_url,
'protocol': 'm3u8_native',
'ext': 'mp4',
})
info.update({
'id': video_id,
'title': title,
'duration': int_or_none(video_data.get('length')),
})
if len(entries) > 1:
info['_type'] = 'multi_video'
info['entries'] = entries
elif len(entries) == 1:
i = entries[0].copy()
i.update(info)
info = i
else:
formats = []
for s in video_data.get('strm', []):
s_url = s.get('purl')
if not s_url:
continue
stype = s.get('stype')
if stype == 'HLS':
formats.extend(self._extract_m3u8_formats(
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
elif stype == 'RTMP':
format_id = [stype]
label = s.get('label')
if label:
format_id.append(label)
formats.append({
'format_id': '-'.join(format_id),
'url': s_url,
'tbr': int_or_none(s.get('bps')),
'height': int_or_none(s.get('brt')),
'ext': 'flv',
'rtmp_live': True,
})
self._sort_formats(formats)
info.update({
'id': channel_id,
'title': self._live_title(title),
'is_live': True,
'formats': formats,
})
return info

View File

@@ -18,6 +18,7 @@ from ..utils import (
int_or_none,
strip_jsonp,
unescapeHTML,
unsmuggle_url,
)
@@ -197,12 +198,16 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None,
}
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
if tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
if media_format == 'm3u8' and tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
elif media_format == 'm3u8-variant' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
else:
@@ -271,6 +276,9 @@ class AnvatoIE(InfoExtractor):
anvplayer_data['accessKey'], anvplayer_data['video'])
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:

View File

@@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
@@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
formats = []
for format in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
formats.append({
'url': format_url,
'format': format['type'],

View File

@@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title')
description = self._html_search_meta(

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import (
@@ -15,6 +16,7 @@ from ..utils import (
int_or_none,
NO_DEFAULT,
qualities,
try_get,
unified_strdate,
)
@@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor):
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
vsr = player_info['VSR']
vsr = try_get(player_info, lambda x: x['VSR'], dict)
if not vsr:
raise ExtractorError(
'Video %s is not available' % player_info.get('VID') or video_id,
expected=True)
error = None
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
error = try_get(
player_info, lambda x: x['custom_msg']['msg'], compat_str)
if not error:
error = 'Video %s is not available' % player_info.get('VID') or video_id
raise ExtractorError(error, expected=True)
upload_date_str = player_info.get('shootingDate')
if not upload_date_str:

View File

@@ -47,7 +47,7 @@ class AZMedienIE(AZMedienBaseIE):
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': {
'id': '1_2444peh4',
'ext': 'mov',
'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
'uploader_id': 'TeleZ?ri',

View File

@@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor):
m3u8_id=format_id, fatal=False))
if re.search(self._USP_RE, href):
usp_formats = self._extract_m3u8_formats(
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href),
programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for f in usp_formats:

View File

@@ -60,9 +60,13 @@ class BeegIE(InfoExtractor):
beeg_version = beeg_version or '2185'
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
video = self._download_json(
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
video_id)
for api_path in ('', 'api.'):
video = self._download_json(
'https://%sbeeg.com/api/v6/%s/video/%s'
% (api_path, beeg_version, video_id), video_id,
fatal=api_path == 'api.')
if video:
break
def split(o, e):
def cut(s, x):

View File

@@ -1,26 +1,112 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import float_or_none
from .gigya import GigyaBaseIE
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
strip_or_none,
float_or_none,
int_or_none,
parse_iso8601,
)
class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'flv',
'title': 'Nachtwacht: De Greystook',
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.03,
},
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, {
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site_id'), mobj.group('id')
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), video_id)
title = data['title']
description = data.get('description')
formats = []
for target in data['targetUrls']:
format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type:
continue
if format_type == 'HLS':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_type, fatal=False))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id=format_type, fatal=False))
elif format_type == 'HSS':
formats.extend(self._extract_ism_formats(
format_url, video_id, ism_id='mss', fatal=False))
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
return {
'id': video_id,
'display_id': video_id,
'title': title,
'description': description,
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}
class CanvasEenIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
'md5': 'ea838375a547ac787d4064d8c7860a6c',
'md5': 'ed66976748d12350b118455979cca293',
'info_dict': {
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
'ext': 'mp4',
'ext': 'flv',
'title': 'De afspraak veilt voor de Warmste Week',
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 49.02,
}
},
'expected_warnings': ['is not a supported codec'],
}, {
# with subtitles
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
@@ -40,7 +126,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'Pagina niet gevonden',
}, {
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
'info_dict': {
@@ -54,7 +141,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'Episode no longer available',
}, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True,
@@ -66,55 +154,157 @@ class CanvasIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
title = (self._search_regex(
title = strip_or_none(self._search_regex(
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
webpage, 'title', default=None) or self._og_search_title(
webpage)).strip()
webpage, default=None))
video_id = self._html_search_regex(
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), display_id)
formats = []
for target in data['targetUrls']:
format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type:
continue
if format_type == 'HLS':
formats.extend(self._extract_m3u8_formats(
format_url, display_id, entry_protocol='m3u8_native',
ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, display_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id=format_type, fatal=False))
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
group='id')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}
class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
'info_dict': {
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
'ext': 'flv',
'title': 'De zwarte weduwe',
'description': 'md5:d90c21dced7db869a85db89a623998d4',
'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$',
'season': '1',
'season_number': 1,
'episode_number': 1,
},
'skip': 'This video is only available for registered users'
}]
_NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
_CONTEXT_ID = 'R3595707040'
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
auth_data = {
'APIKey': self._APIKEY,
'targetEnv': 'jssdk',
'loginID': username,
'password': password,
'authMode': 'cookie',
}
auth_info = self._gigya_login(auth_data)
# Sometimes authentication fails for no good reason, retry
login_attempt = 1
while login_attempt <= 3:
try:
# When requesting a token, no actual token is returned, but the
# necessary cookies are set.
self._request_webpage(
'https://token.vrt.be',
None, note='Requesting a token', errnote='Could not get a token',
headers={
'Content-Type': 'application/json',
'Referer': 'https://www.vrt.be/vrtnu/',
},
data=json.dumps({
'uid': auth_info['UID'],
'uidsig': auth_info['UIDSignature'],
'ts': auth_info['signatureTimestamp'],
'email': auth_info['profile']['email'],
}).encode('utf-8'))
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
login_attempt += 1
self.report_warning('Authentication failed')
self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
else:
raise e
else:
break
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title').strip()
description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>',
webpage, 'description', default=None)
season = self._html_search_regex(
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
<span>seizoen\ (.+?)</span>\s*
</div>''',
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
webpage, 'season', default=None)
season_number = int_or_none(season)
episode_number = int_or_none(self._html_search_regex(
r'''(?xms)<div\ class="content__episode">\s*
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
</div>''',
webpage, 'episode_number', default=None))
release_date = parse_iso8601(self._html_search_regex(
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
webpage, 'release_date', default=None))
# If there's a ? or a # in the URL, remove them and everything after
clean_url = url.split('?')[0].split('#')[0].strip('/')
securevideo_url = clean_url + '.mssecurevideo.json'
try:
video = self._download_json(securevideo_url, display_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
self.raise_login_required()
raise
# We are dealing with a '../<show>.relevant' URL
redirect_url = video.get('url')
if redirect_url:
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
# There is only one entry, but with an unknown key, so just get
# the first one
video_id = list(video.values())[0].get('videoid')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'season': season,
'season_number': season_number,
'episode_number': episode_number,
'release_date': release_date,
}

View File

@@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
webpage)
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id

View File

@@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
class ComedyCentralShortnameIE(InfoExtractor):
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
_TESTS = [{
'url': ':tds',
'only_matching': True,
}, {
'url': ':thedailyshow',
'only_matching': True,
}, {
'url': ':theopposition',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
shortcut_map = {
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
}
return self.url_result(shortcut_map[video_id])

View File

@@ -1401,7 +1401,7 @@ class InfoExtractor(object):
media_url = media.get('URI')
if media_url:
format_id = []
for v in (group_id, name):
for v in (m3u8_id, group_id, name):
if v:
format_id.append(v)
f = {
@@ -1920,7 +1920,7 @@ class InfoExtractor(object):
# can't be used at the same time
if '%(Number' in media_template and 's' not in representation_ms_info:
segment_duration = None
if 'total_number' not in representation_ms_info and 'segment_duration':
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
@@ -2322,7 +2322,6 @@ class InfoExtractor(object):
formats = self._parse_jwplayer_formats(
video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
self._sort_formats(formats)
subtitles = {}
tracks = video_data.get('tracks')
@@ -2339,16 +2338,25 @@ class InfoExtractor(object):
'url': self._proto_relative_url(track_url)
})
entries.append({
entry = {
'id': this_video_id,
'title': video_data['title'] if require_title else video_data.get('title'),
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'formats': formats,
})
}
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
entry.update({
'_type': 'url_transparent',
'url': formats[0]['url'],
})
else:
self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
if len(entries) == 1:
return entries[0]
else:

View File

@@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
# vevo embed
vevo_id = self._search_regex(
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
webpage, 'vevo embed', default=None)
if vevo_id:
return self.url_result('vevo:%s' % vevo_id, 'Vevo')

View File

@@ -2,53 +2,85 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import unified_strdate
from ..compat import compat_str
from ..utils import (
float_or_none,
unified_strdate,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
'ext': 'mp4',
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
display_id = self._match_id(url)
object_id = self._html_search_meta('DC.identifier', webpage)
webpage = self._download_webpage(url, display_id)
servers_json = self._download_json(
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
video_id, note='Downloading server list')
server = servers_json[0]['server']
m3u8_path = self._search_regex(
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
formats = self._extract_m3u8_formats(
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
entry_protocol='m3u8_native')
video_id = self._html_search_meta(
'DC.identifier', webpage, 'video id',
default=None) or self._search_regex(
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
title = self._og_search_title(webpage)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
note='Downloading server list', fatal=False)
if servers:
endpoint = next(
server['endpoint']
for server in servers
if isinstance(server.get('endpoint'), compat_str) and
'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
app = self._search_regex(
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
formats = [{
'url': endpoint,
'app': app,
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
'ext': 'flv',
}]
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
duration = float_or_none(self._search_regex(
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
default=None), scale=1000)
return {
'id': object_id,
'id': video_id,
'title': title,
'formats': formats,
'display_id': video_id,
'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': duration,
}

View File

@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
'id': '176747451',
'title': 'Best!',
'uploader': 'Anonymous',
'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
},
'playlist_count': 30,
'skip': 'Only available in .de',

View File

@@ -59,7 +59,7 @@ class DramaFeverBaseIE(AMPIE):
if all(logout_pattern not in response
for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
error = self._html_search_regex(
r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)

View File

@@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor):
'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
'vcodec': 'none' if kind == 'AudioResource' else None,
'preference': preference,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):

View File

@@ -2,7 +2,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
try_get,
unified_timestamp,
@@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
'id': 'professor-frisby-introduces-composable-functional-javascript',
'id': '72',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
@@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
lessons = self._download_json(
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
playlist_id, 'Downloading course lessons JSON')
entries = [
self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
entries = []
for lesson in lessons:
lesson_url = lesson.get('http_url')
if not lesson_url or not isinstance(lesson_url, compat_str):
continue
lesson_id = lesson.get('id')
if lesson_id:
lesson_id = compat_str(lesson_id)
entries.append(self.url_result(
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id,
playlist_id, 'Downloading course JSON', fatal=False) or {}
playlist_id = course.get('id')
if playlist_id:
playlist_id = compat_str(playlist_id)
return self.playlist_result(
entries, playlist_id, course.get('title'),
@@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
_TEST = {
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
'id': 'fv5yotjxcg',
'id': '1196',
'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
},
'params': {
'skip_download': True,
'format': 'bestvideo',
},
}
}, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True,
}]
def _real_extract(self, url):
lesson_id = self._match_id(url)
display_id = self._match_id(url)
lesson = self._download_json(
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
lesson_id = compat_str(lesson['id'])
title = lesson['title']
formats = []
for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, lesson_id, 'mp4', entry_protocol='m3u8',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, lesson_id, mpd_id='dash', fatal=False))
else:
formats.append({
'url': format_url,
})
self._sort_formats(formats)
return {
'_type': 'url_transparent',
'ie_key': 'Wistia',
'url': 'wistia:%s' % lesson['wistia_id'],
'id': lesson['wistia_id'],
'title': lesson.get('title'),
'id': lesson_id,
'display_id': display_id,
'title': title,
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
'series': try_get(
lesson, lambda x: x['series']['title'], compat_str),
'formats': formats,
}

View File

@@ -15,7 +15,7 @@ from ..utils import (
class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8',
@@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor):
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -31,10 +31,7 @@ from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
from .afreecatv import (
AfreecaTVIE,
AfreecaTVGlobalIE,
)
from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
@@ -150,7 +147,11 @@ from .camdemy import (
from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .canvas import CanvasIE
from .canvas import (
CanvasIE,
CanvasEenIE,
VrtNUIE,
)
from .carambatv import (
CarambaTVIE,
CarambaTVPageIE,
@@ -381,6 +382,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freshlive import FreshLiveIE
from .funimation import FunimationIE
from .funk import FunkIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
@@ -622,7 +624,6 @@ from .mwave import MwaveIE, MwaveMeetGreetIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import MyviIE
from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
@@ -924,6 +925,7 @@ from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
from .servus import ServusIE
from .sexu import SexuIE
from .shahid import ShahidIE
from .shared import (
@@ -940,6 +942,7 @@ from .skynewsarabia import (
)
from .skysports import SkySportsIE
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .smotri import (
SmotriIE,
@@ -1136,6 +1139,7 @@ from .udn import UDNEmbedIE
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .unity import UnityIE
from .uol import UOLIE
from .uplynk import (
UplynkIE,
@@ -1243,7 +1247,10 @@ from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
from .voot import VootIE
from .voxmedia import VoxMediaIE
from .voxmedia import (
VoxMediaVolumeIE,
VoxMediaIE,
)
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
@@ -1328,6 +1335,11 @@ from .youku import (
YoukuIE,
YoukuShowIE,
)
from .younow import (
YouNowLiveIE,
YouNowChannelIE,
YouNowMomentIE,
)
from .youporn import YouPornIE
from .yourupload import YourUploadIE
from .youtube import (
@@ -1342,7 +1354,6 @@ from .youtube import (
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
YoutubeSharedVideoIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,

View File

@@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor):
'uploader': 'Tennis on Facebook',
'upload_date': '20140908',
'timestamp': 1410199200,
}
},
'skip': 'Requires logging in',
}, {
'note': 'Video without discernible title',
'url': 'https://www.facebook.com/video.php?v=274175099429670',
'info_dict': {
'id': '274175099429670',
@@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
'thumbnail': r're:^https?://.*',
},
'expected_warnings': [
'title'
@@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20160110',
'timestamp': 1452431627,
},
'skip': 'Requires logging in',
}, {
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
@@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '10153664894881749',
'ext': 'mp4',
'title': 'Facebook video #10153664894881749',
'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
'thumbnail': r're:^https?://.*',
'timestamp': 1456259628,
'upload_date': '20160223',
'uploader': 'Barack Obama',
},
}, {
# have 1080P, but only up to 720p in swf params
@@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
'title': 'Holocaust survivor becomes US citizen',
'title': 'She survived the holocaust — and years later, shes getting her citizenship s...',
'timestamp': 1477818095,
'upload_date': '20161030',
'uploader': 'CNN',
'thumbnail': r're:^https?://.*',
},
}, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1477305000,
'upload_date': '20161024',
'uploader': 'La Guía Del Varón',
'thumbnail': r're:^https?://.*',
},
'params': {
'skip_download': True,
@@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor):
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
thumbnail = self._og_search_thumbnail(webpage)
info_dict = {
'id': video_id,
@@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor):
'formats': formats,
'uploader': uploader,
'timestamp': timestamp,
'thumbnail': thumbnail,
}
return webpage, info_dict

View File

@@ -2,7 +2,10 @@
from __future__ import unicode_literals
from .adobepass import AdobePassIE
from .uplynk import UplynkPreplayIE
from ..compat import compat_str
from ..utils import (
HEADRequest,
int_or_none,
parse_age_limit,
parse_duration,
@@ -53,14 +56,7 @@ class FOXIE(AdobePassIE):
})
title = video['name']
m3u8_url = self._download_json(
video['videoRelease']['url'], video_id)['playURL']
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
release_url = video['videoRelease']['url']
description = video.get('description')
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
@@ -84,7 +80,7 @@ class FOXIE(AdobePassIE):
# TODO: AP
pass
return {
info = {
'id': video_id,
'title': title,
'description': description,
@@ -97,5 +93,22 @@ class FOXIE(AdobePassIE):
'episode': episode,
'episode_number': episode_number,
'release_year': release_year,
'formats': formats,
}
urlh = self._request_webpage(HEADRequest(release_url), video_id)
video_url = compat_str(urlh.geturl())
if UplynkPreplayIE.suitable(video_url):
info.update({
'_type': 'url_transparent',
'url': video_url,
'ie_key': UplynkPreplayIE.ie_key(),
})
else:
m3u8_url = self._download_json(release_url, video_id)['playURL']
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
info['formats'] = formats
return info

View File

@@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info')
info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
info = json.loads(info_json)
return {

View File

@@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .nexx import NexxIE
from ..utils import extract_attributes
class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
_TESTS = [{
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
'md5': '4d40974481fa3475f8bccfd20c5361f8',
'info_dict': {
'id': '716599',
'ext': 'mp4',
'title': 'Neue Rechte Welle',
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
'timestamp': 1501337639,
'upload_date': '20170729',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
domain_id = NexxIE._extract_domain_id(webpage) or '741'
nexx_id = extract_attributes(self._search_regex(
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
webpage, 'media player'))['data-id']
return self.url_result(
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
video_id=nexx_id)

View File

@@ -3,27 +3,31 @@ from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
extract_attributes,
int_or_none,
parse_age_limit,
smuggle_url,
update_url_query,
)
class FXNetworksIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fxnetworks.com/video/719841347694',
'md5': '1447d4722e42ebca19e5232ab93abb22',
'url': 'http://www.fxnetworks.com/video/1032565827847',
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
'info_dict': {
'id': '719841347694',
'id': 'dRzwHC_MMqIv',
'ext': 'mp4',
'title': 'Vanpage',
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
'title': 'First Look: Better Things - Season 2',
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
'age_limit': 14,
'uploader': 'NEWA-FNG-FX',
'upload_date': '20160706',
'timestamp': 1467844741,
'upload_date': '20170825',
'timestamp': 1503686274,
'episode_number': 0,
'season_number': 2,
'series': 'Better Things',
},
'add_ie': ['ThePlatform'],
}, {
@@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE):
'id': video_id,
'title': title,
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
'series': video_data.get('data-show-title'),
'episode_number': int_or_none(video_data.get('data-episode')),
'season_number': int_or_none(video_data.get('data-season')),
'thumbnail': video_data.get('data-large-thumb'),
'age_limit': parse_age_limit(rating),
'ie_key': 'ThePlatform',

View File

@@ -105,7 +105,7 @@ class GameSpotIE(OnceIE):
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
if onceux_url:
formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
if not formats:
for quality in ['sd', 'hd']:

View File

@@ -22,6 +22,8 @@ from ..utils import (
HEADRequest,
is_html,
js_to_json,
KNOWN_EXTENSIONS,
mimetype2ext,
orderedSet,
sanitized_Request,
smuggle_url,
@@ -99,6 +101,7 @@ from .mediaset import MediasetIE
from .joj import JojIE
from .megaphone import MegaphoneIE
from .vzaar import VzaarIE
from .channel9 import Channel9IE
class GenericIE(InfoExtractor):
@@ -1088,7 +1091,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20150212',
'uploader': 'The National Archives UK',
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
'description': 'md5:8078af856dca76edc42910b61273dbbf',
'uploader_id': 'NationalArchives08',
'title': 'Webinar: Using Discovery, The National Archives online catalogue',
},
@@ -1104,7 +1107,8 @@ class GenericIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'does not contain a video anymore',
},
# Complex jwplayer
{
@@ -1113,6 +1117,7 @@ class GenericIE(InfoExtractor):
'id': 'videos',
'ext': 'mp4',
'title': 'king machine trailer 1',
'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
@@ -1130,13 +1135,42 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
{
# Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
'info_dict': {
'id': 'yygqldloqIk',
'ext': 'mp4',
'title': 'SolidWorks. Урок 6 Настройка чертежа',
'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
'upload_date': '20130314',
'uploader': 'PROстое3D',
'uploader_id': 'PROstoe3D',
},
'params': {
'skip_download': True,
},
},
{
# Video.js embed, single format
'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
'info_dict': {
'id': 'watch',
'ext': 'mp4',
'title': 'Step 1 - Good Foundation',
'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
},
'params': {
'skip_download': True,
},
},
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
'playlist_mincount': 5,
'info_dict': {
'id': 'aanslagen-kopenhagen',
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
'title': 'Aanslagen Kopenhagen',
}
},
# Zapiks embed
@@ -1268,6 +1302,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'This video is unavailable.',
},
# Pladform embed
{
@@ -1281,6 +1316,7 @@ class GenericIE(InfoExtractor):
'duration': 694,
'age_limit': 0,
},
'skip': 'HTTP Error 404: Not Found',
},
# Playwire embed
{
@@ -1301,6 +1337,14 @@ class GenericIE(InfoExtractor):
'id': '518726732',
'ext': 'mp4',
'title': 'Facebook Creates "On This Day" | Crunch Report',
'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
'timestamp': 1427237531,
'uploader': 'Crunch Report',
'upload_date': '20150324',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
# SVT embed
@@ -1352,16 +1396,20 @@ class GenericIE(InfoExtractor):
'upload_date': '20140107',
'timestamp': 1389118457,
},
'skip': 'Invalid Page URL',
},
# NBC News embed
{
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
'md5': '1aa589c675898ae6d37a17913cf68d66',
'info_dict': {
'id': '701714499682',
'id': 'x_dtl_oa_LettermanliftPR_160608',
'ext': 'mp4',
'title': 'PREVIEW: On Assignment: David Letterman',
'title': 'David Letterman: A Preview',
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
'upload_date': '20160609',
'timestamp': 1465431544,
'uploader': 'NBCU-NEWS',
},
},
# UDN embed
@@ -1378,6 +1426,7 @@ class GenericIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Failed to parse JSON Expecting value'],
},
# Ooyala embed
{
@@ -1385,7 +1434,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
'description': 'Index/Match versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
'duration': 191.933,
},
@@ -1423,7 +1472,8 @@ class GenericIE(InfoExtractor):
'upload_date': '20150622',
'uploader': 'Public Sénat',
'uploader_id': 'xa9gza',
}
},
'skip': 'File not found.',
},
# OnionStudios embed
{
@@ -1581,22 +1631,6 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['BrightcoveLegacy'],
},
# Nexx embed
{
'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
'info_dict': {
'id': '247746',
'ext': 'mp4',
'title': "Yesterday's Jam (OV)",
'description': 'md5:09bc0984723fed34e2581624a84e05f0',
'timestamp': 1492594816,
'upload_date': '20170419',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@@ -2175,7 +2209,7 @@ class GenericIE(InfoExtractor):
# And then there are the jokers who advertise that they use RTA,
# but actually don't.
AGE_LIMIT_MARKERS = [
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
]
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
age_limit = 18
@@ -2237,7 +2271,7 @@ class GenericIE(InfoExtractor):
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
@@ -2636,7 +2670,7 @@ class GenericIE(InfoExtractor):
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -2840,6 +2874,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
channel9_urls = Channel9IE._extract_urls(webpage)
if channel9_urls:
return self.playlist_from_matches(
channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
@@ -2880,6 +2919,46 @@ class GenericIE(InfoExtractor):
jwplayer_data, video_id, require_title=False, base_url=url)
return merge_dicts(info, info_dict)
# Video.js embed
mobj = re.search(
r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
webpage)
if mobj is not None:
sources = self._parse_json(
mobj.group(1), video_id, transform_source=js_to_json,
fatal=False) or []
if not isinstance(sources, list):
sources = [sources]
formats = []
for source in sources:
src = source.get('src')
if not src or not isinstance(src, compat_str):
continue
src = compat_urlparse.urljoin(url, src)
src_type = source.get('type')
if isinstance(src_type, compat_str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
return self.url_result(src, YoutubeIE.ie_key())
if src_type == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False))
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': src,
'ext': (mimetype2ext(src_type) or
ext if ext in KNOWN_EXTENSIONS else 'mp4'),
})
if formats:
self._sort_formats(formats)
info_dict['formats'] = formats
return info_dict
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
@@ -2973,7 +3052,7 @@ class GenericIE(InfoExtractor):
# be supported by youtube-dl thus this is checked the very last (see
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url:
if embed_url and embed_url != url:
return self.url_result(embed_url)
if not found:

View File

@@ -11,7 +11,7 @@ from ..utils import (
class GfycatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
@@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
'categories': list,
'age_limit': 0,
}
}, {
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
'only_matching': True
}]
def _real_extract(self, url):

View File

@@ -0,0 +1,22 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
urlencode_postdata,
)
class GigyaBaseIE(InfoExtractor):
def _gigya_login(self, auth_data):
auth_info = self._download_json(
'https://accounts.eu1.gigya.com/accounts.login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(auth_data))
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
if error_message:
raise ExtractorError(
'Unable to login: %s' % error_message, expected=True)
return auth_info

View File

@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
'width': int(width),
'height': int(height),
} for width, height, video_url in re.findall(
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
self._sort_formats(formats)
return {

View File

@@ -11,45 +11,20 @@ from ..utils import (
class HowStuffWorksIE(InfoExtractor):
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
_VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
_TESTS = [
{
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm',
'md5': '76646a5acc0c92bf7cd66751ca5db94d',
'info_dict': {
'id': '450221',
'ext': 'flv',
'title': 'Cool Jobs - Iditarod Musher',
'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
'display_id': 'cool-jobs-iditarod-musher',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 161,
},
'skip': 'Video broken',
},
{
'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
'info_dict': {
'id': '453464',
'id': '855410',
'ext': 'mp4',
'title': 'Survival Zone: Food and Water In the Savanna',
'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
'display_id': 'survival-zone-food-and-water-in-the-savanna',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web',
'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb',
},
},
{
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
'info_dict': {
'id': '440011',
'ext': 'mp4',
'title': 'Sword Swallowing #1 by Dan Meyer',
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
'display_id': 'sword-swallowing-1-by-dan-meyer',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm',
'only_matching': True,
}
]

View File

@@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE):
(?:
hrti:(?P<short_id>[0-9]+)|
https?://
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
)
'''
_TESTS = [{
@@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE):
}, {
'url': 'hrti:2181385',
'only_matching': True,
}, {
'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -170,7 +173,7 @@ class HRTiIE(HRTiBaseIE):
class HRTiPlaylistIE(HRTiBaseIE):
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
_VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
_TESTS = [{
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
'info_dict': {
@@ -182,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE):
}, {
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
'only_matching': True,
}, {
'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -203,7 +203,7 @@ class PCMagIE(IGNIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'pcmag'
_EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
_TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',

View File

@@ -8,7 +8,10 @@ from ..compat import (
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import determine_ext
from ..utils import (
determine_ext,
update_url_query,
)
from .bokecc import BokeCCBaseIE
@@ -68,21 +71,22 @@ class InfoQIE(BokeCCBaseIE):
'play_path': playpath,
}]
def _extract_cookies(self, webpage):
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
policy, signature, key_pair_id)
def _extract_cf_auth(self, webpage):
policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
return {
'Policy': policy,
'Signature': signature,
'Key-Pair-Id': key_pair_id,
}
def _extract_http_video(self, webpage):
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
http_video_url = update_url_query(http_video_url, self._extract_cf_auth(webpage))
return [{
'format_id': 'http_video',
'url': http_video_url,
'http_headers': {
'Cookie': self._extract_cookies(webpage)
},
}]
def _extract_http_audio(self, webpage, video_id):
@@ -91,22 +95,20 @@ class InfoQIE(BokeCCBaseIE):
if not http_audio_url:
return []
cookies_header = {'Cookie': self._extract_cookies(webpage)}
# base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
# audio file seem to be missing some times even if there is a download link
# so probe URL to make sure
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
if not self._is_valid_url(http_audio_url, video_id):
return []
return [{
'format_id': 'http_audio',
'url': http_audio_url,
'vcodec': 'none',
'http_headers': cookies_header,
}]
def _real_extract(self, url):

View File

@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
webpage = self._download_webpage(url, title)
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
config_url = self._html_search_regex(
r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
webpage, 'config URL')
config_url = 'http://www.jeuxvideo.com' + config_url

View File

@@ -287,6 +287,9 @@ class KalturaIE(InfoExtractor):
# skip for now.
if f.get('fileExt') == 'chun':
continue
# DRM-protected video, cannot be decrypted
if f.get('fileExt') == 'wvm':
continue
if not f.get('fileExt'):
# QT indicates QuickTime; some videos have broken fileExt
if f.get('containerFormat') == 'qt':

View File

@@ -1,5 +1,6 @@
from __future__ import unicode_literals
from .canvas import CanvasIE
from .common import InfoExtractor
@@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
'md5': '6bdeb65998930251bbd1c510750edba9',
'info_dict': {
'id': 'zomerse-filmpjes',
'ext': 'mp4',
@@ -15,6 +16,20 @@ class KetnetIE(InfoExtractor):
'description': 'Gluur mee met Ghost Rockers op de filmset',
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
# mzid in playerConfig instead of sources
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'flv',
'title': 'Nachtwacht: De Greystook',
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.03,
},
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, {
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
'only_matching': True,
@@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor):
'player config'),
video_id)
mzid = config.get('mzid')
if mzid:
return self.url_result(
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
CanvasIE.ie_key(), video_id=mzid)
title = config['title']
formats = []

View File

@@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor):
info = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
}
video_data = self._download_json(stream_url, content_id)
is_live = video_data.get('isLive')

View File

@@ -11,7 +11,7 @@ from ..utils import (
class LnkGoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
_VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
'info_dict': {
@@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor):
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'only_matching': True,
}]
_AGE_LIMITS = {
'N-7': 7,

View File

@@ -5,7 +5,7 @@ from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',

View File

@@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor):
format_url = self._html_search_regex(
[
r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
r'<a[^>]+href="(rtsp://[^"]+)"'
], webpage, 'format url')
formats = self._extract_wowza_formats(

View File

@@ -2,19 +2,18 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .gigya import GigyaBaseIE
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
try_get,
unified_timestamp,
urlencode_postdata,
)
class MedialaanIE(InfoExtractor):
class MedialaanIE(GigyaBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:www\.|nieuws\.)?
@@ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor):
'password': password,
}
auth_info = self._download_json(
'https://accounts.eu1.gigya.com/accounts.login', None,
note='Logging in', errnote='Unable to log in',
data=urlencode_postdata(auth_data))
error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage')
if error_message:
raise ExtractorError(
'Unable to login: %s' % error_message, expected=True)
auth_info = self._gigya_login(auth_data)
self._uid = auth_info['UID']
self._uid_signature = auth_info['UIDSignature']

View File

@@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor):
'id': 'GLT9749789991',
'ext': 'mp3',
'title': '#97 What Kind Of Idiot Gets Phished?',
'thumbnail': 're:^https://.*\.png.*$',
'thumbnail': r're:^https://.*\.png.*$',
'duration': 1776.26375,
'author': 'Reply All',
},

View File

@@ -11,7 +11,7 @@ from ..utils import (
class MeipaiIE(InfoExtractor):
IE_DESC = '美拍'
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
_TESTS = [{
# regular uploaded video
'url': 'http://www.meipai.com/media/531697625',

View File

@@ -291,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
functools.partial(
self._tracks_page_func,
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
self._PAGE_SIZE, use_cache=True)
self._PAGE_SIZE)
return self.playlist_result(
entries, video_id, '%s (%s)' % (username, list_type), description)

View File

@@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid is None or ':' not in mgid:
mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid', default=None)
if not mgid:

View File

@@ -1,177 +0,0 @@
from __future__ import unicode_literals
import binascii
import base64
import hashlib
import re
import json
from .common import InfoExtractor
from ..compat import (
compat_ord,
compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
)
from ..utils import (
ExtractorError,
sanitized_Request,
)
class MyVideoIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
IE_NAME = 'myvideo'
_TEST = {
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
'md5': '2d2753e8130479ba2cb7e0a37002053e',
'info_dict': {
'id': '8229274',
'ext': 'flv',
'title': 'bowling-fail-or-win',
}
}
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Released into the Public Domain by Tristan Fischer on 2013-05-19
# https://github.com/rg3/youtube-dl/pull/842
def __rc4crypt(self, data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
box[i], box[x] = box[x], box[i]
x = 0
y = 0
out = ''
for char in data:
x = (x + 1) % 256
y = (y + box[x]) % 256
box[x], box[y] = box[y], box[x]
out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
return out
def __md5(self, s):
return hashlib.md5(s).hexdigest().encode()
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
b'TnpsbA0KTVRkbU1tSTRNdz09'
)
# Get video webpage
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
if mobj is not None:
self.report_extraction(video_id)
video_url = mobj.group(1) + '.flv'
video_title = self._html_search_regex('<title>([^<]+)</title>',
webpage, 'title')
return {
'id': video_id,
'url': video_url,
'title': video_title,
}
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
if mobj is not None:
request = sanitized_Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
response = self._download_webpage(request, video_id,
'Downloading video info')
info = json.loads(base64.b64decode(response).decode('utf-8'))
return {
'id': video_id,
'title': info['title'],
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
'play_path': info['filename'],
'ext': 'flv',
'thumbnail': info['thumbnail'][0]['url'],
}
# try encxml
mobj = re.search('var flashvars={(.+?)}', webpage)
if mobj is None:
raise ExtractorError('Unable to extract video')
params = {}
encxml = ''
sec = mobj.group(1)
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
if not a == '_encxml':
params[a] = b
else:
encxml = compat_urllib_parse_unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning('avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
) % video_id
# get enc data
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
enc_data_b = binascii.unhexlify(enc_data)
sk = self.__md5(
base64.b64decode(base64.b64decode(GK)) +
self.__md5(
str(video_id).encode('utf-8')
)
)
dec_data = self.__rc4crypt(enc_data_b, sk)
# extracting infos
self.report_extraction(video_id)
video_url = None
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj:
video_url = compat_urllib_parse_unquote(mobj.group(1))
if 'myvideo2flash' in video_url:
self.report_warning(
'Rewriting URL to use unencrypted rtmp:// ...',
video_id)
video_url = video_url.replace('rtmpe://', 'rtmp://')
if not video_url:
# extract non rtmp videos
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError('unable to extract url')
video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2))
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
video_file = compat_urllib_parse_unquote(video_file)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
else:
video_playpath = ''
video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
webpage, 'title')
return {
'id': video_id,
'url': video_url,
'tc_url': video_url,
'title': video_title,
'ext': 'flv',
'play_path': video_playpath,
'player_url': video_swfobj,
}

View File

@@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
release_url = self._search_regex(
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
webpage, 'release url')
theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
video_id = theplatform_path.split('/')[-1]
query = {
'mbr': 'true',

View File

@@ -43,7 +43,7 @@ class NaverIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
error = self._html_search_regex(

View File

@@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE):
playlist_title = self._og_search_title(webpage, fatal=False)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, team, video_id),
self._PAGE_SIZE, use_cache=True)
self._PAGE_SIZE)
return self.playlist_result(entries, team, playlist_title)

View File

@@ -15,7 +15,7 @@ from ..utils import (
class NBCIE(AdobePassIE):
_VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
{
@@ -67,11 +67,16 @@ class NBCIE(AdobePassIE):
'skip_download': True,
},
'skip': 'Only works from US',
}
},
{
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
'only_matching': True,
},
]
def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + permalink
video_data = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={
'filter[permalink]': permalink,

View File

@@ -1,45 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote_plus
)
from ..utils import (
int_or_none,
parse_duration,
remove_end,
unified_strdate,
urljoin
)
class NDTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
_VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
_TEST = {
'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
'info_dict': {
'id': '300710',
'ext': 'mp4',
'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
'upload_date': '20131208',
'duration': 1327,
'thumbnail': r're:https?://.*\.jpg',
_TESTS = [
{
'url': 'https://khabar.ndtv.com/video/show/prime-time/prime-time-ill-system-and-poor-education-468818',
'md5': '78efcf3880ef3fd9b83d405ca94a38eb',
'info_dict': {
'id': '468818',
'ext': 'mp4',
'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल",
'description': 'md5:f410512f1b49672e5695dea16ef2731d',
'upload_date': '20170928',
'duration': 2218,
'thumbnail': r're:https?://.*\.jpg',
}
},
}
{
# __filename is url
'url': 'http://movies.ndtv.com/videos/cracker-free-diwali-wishes-from-karan-johar-kriti-sanon-other-stars-470304',
'md5': 'f1d709352305b44443515ac56b45aa46',
'info_dict': {
'id': '470304',
'ext': 'mp4',
'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars",
'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465',
'upload_date': '20171019',
'duration': 137,
'thumbnail': r're:https?://.*\.jpg',
}
},
{
'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372',
'only_matching': True
},
{
'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935',
'only_matching': True
},
{
'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764',
'only_matching': True
},
{
'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568',
'only_matching': True
},
{
'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040',
'only_matching': True
},
{
'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083',
'only_matching': True
},
{
'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396',
'only_matching': True
},
{
'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324',
'only_matching': True
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - NDTV')
# '__title' does not contain extra words such as sub-site name, "Video" etc.
title = compat_urllib_parse_unquote_plus(
self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
self._og_search_title(webpage))
filename = self._search_regex(
r"__filename='([^']+)'", webpage, 'video filename')
video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
# in "movies" sub-site pages, filename is URL
video_url = urljoin('https://ndtvod.bc-ssl.cdn.bitgravity.com/23372/ndtv/', filename.lstrip('/'))
duration = int_or_none(self._search_regex(
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
# "doctor" sub-site has MM:SS format
duration = parse_duration(self._search_regex(
r"(?:__)?duration\s*[:=]\s*'([^']+)'", webpage, 'duration', fatal=False))
# "sports", "doctor", "swirlster" sub-sites don't have 'publish-date'
upload_date = unified_strdate(self._html_search_meta(
'publish-date', webpage, 'upload date', fatal=False))
'publish-date', webpage, 'upload date', default=None) or self._html_search_meta(
'uploadDate', webpage, 'upload date', default=None) or self._search_regex(
r'datePublished"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False))
description = remove_end(self._og_search_description(webpage), ' (Read more)')

View File

@@ -18,7 +18,13 @@ from ..utils import (
class NexxIE(InfoExtractor):
_VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
_VALID_URL = r'''(?x)
(?:
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
nexx:(?P<domain_id_s>\d+):
)
(?P<id>\d+)
'''
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
@@ -62,8 +68,18 @@ class NexxIE(InfoExtractor):
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
}, {
'url': 'nexx:748:128907',
'only_matching': True,
}]
@staticmethod
def _extract_domain_id(webpage):
mobj = re.search(
r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
webpage)
return mobj.group('id') if mobj else None
@staticmethod
def _extract_urls(webpage):
# Reference:
@@ -72,11 +88,8 @@ class NexxIE(InfoExtractor):
entries = []
# JavaScript Integration
mobj = re.search(
r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
webpage)
if mobj:
domain_id = mobj.group('id')
domain_id = NexxIE._extract_domain_id(webpage)
if domain_id:
for video_id in re.findall(
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
webpage):
@@ -112,7 +125,8 @@ class NexxIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
domain_id, video_id = mobj.group('domain_id', 'id')
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
video_id = mobj.group('id')
# Reverse engineered from JS code (see getDeviceID function)
device_id = '%d:%d:%d%d' % (

View File

@@ -75,7 +75,7 @@ class NickIE(MTVServicesInfoExtractor):
class NickDeIE(MTVServicesInfoExtractor):
IE_NAME = 'nick.de'
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl)|nickelodeon\.(?:nl|at))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?(?P<host>nick\.(?:de|com\.pl|ch)|nickelodeon\.(?:nl|be|at|dk|no|se))/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
'only_matching': True,
@@ -91,6 +91,21 @@ class NickDeIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.nick.com.pl/seriale/474-spongebob-kanciastoporty/wideo/17412-teatr-to-jest-to-rodeo-oszolom',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.no/program/2626-bulderhuset/videoer/90947-femteklasse-veronica-vs-vanzilla',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.dk/serier/2626-hojs-hus/videoer/761-tissepause',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.se/serier/2626-lugn-i-stormen/videos/998-',
'only_matching': True,
}, {
'url': 'http://www.nick.ch/shows/2304-adventure-time-abenteuerzeit-mit-finn-und-jake',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.be/afspeellijst/4530-top-videos/videos/episode/73917-inval-broodschapper-lariekoek-arie',
'only_matching': True,
}]
def _extract_mrss_url(self, webpage, host):
@@ -132,13 +147,28 @@ class NickNightIE(NickDeIE):
class NickRuIE(MTVServicesInfoExtractor):
IE_NAME = 'nickelodeonru'
_VALID_URL = r'https?://(?:www\.)nickelodeon\.ru/(?:playlist|shows|videos)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)nickelodeon\.(?:ru|fr|es|pt|ro|hu)/[^/]+/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.nickelodeon.ru/shows/henrydanger/videos/episodes/3-sezon-15-seriya-licenziya-na-polyot/pmomfb#playlist/7airc6',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.ru/videos/smotri-na-nickelodeon-v-iyule/g9hvh7',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.fr/programmes/bob-l-eponge/videos/le-marathon-de-booh-kini-bottom-mardi-31-octobre/nfn7z0',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.es/videos/nickelodeon-consejos-tortitas/f7w7xy',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.pt/series/spongebob-squarepants/videos/a-bolha-de-tinta-gigante/xutq1b',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.ro/emisiuni/shimmer-si-shine/video/nahal-din-bomboane/uw5u2k',
'only_matching': True,
}, {
'url': 'http://www.nickelodeon.hu/musorok/spongyabob-kockanadrag/videok/episodes/buborekfujas-az-elszakadt-nadrag/q57iob#playlist/k6te4y',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor):
'uploader': 'takuya0301',
'uploader_id': '2698420',
'upload_date': '20131123',
'timestamp': 1385182762,
'timestamp': int, # timestamp is unstable
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
'view_count': int,
@@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor):
'skip': 'Requires an account',
}, {
# "New" HTML5 video
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm31464864',
'md5': '351647b4917660986dc0fa8864085135',
'info_dict': {
'id': 'sm31464864',
'ext': 'mp4',
@@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor):
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
'timestamp': 1498514060,
'upload_date': '20170626',
'uploader': 'ゲス',
'uploader': 'ゲス',
'uploader_id': '40826363',
'thumbnail': r're:https?://.*',
'duration': 198,
@@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor):
'comment_count': int,
},
'skip': 'Requires an account',
}, {
# Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488',
'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
'info_dict': {
'id': 'sm18238488',
'ext': 'mp4',
'title': '【実写版】ミュータントタートルズ',
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
'timestamp': 1341160408,
'upload_date': '20120701',
'uploader': None,
'uploader_id': None,
'thumbnail': r're:https?://.*',
'duration': 5271,
'view_count': int,
'comment_count': int,
},
'skip': 'Requires an account',
}, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
@@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor):
webpage_url = get_video_info('watch_url') or url
owner = api_data.get('owner', {})
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
# in the JSON, which will cause None to be returned instead of {}.
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')

View File

@@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE):
class HetKlokhuisIE(NPODataMidEmbedIE):
IE_NAME = 'hetklokhuis'
_VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',

View File

@@ -7,7 +7,7 @@ from .common import InfoExtractor
class OnceIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'

View File

@@ -13,11 +13,11 @@ from ..utils import (
class OnionStudiosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
'md5': '719d1f8c32094b8c33902c17bcae5e34',
'info_dict': {
'id': '2937',
'ext': 'mp4',
@@ -29,12 +29,15 @@ class OnionStudiosIE(InfoExtractor):
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
'only_matching': True,
}, {
'url': 'http://www.onionstudios.com/video/6139.json',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
if mobj:
return mobj.group('url')

View File

@@ -11,7 +11,7 @@ class ParliamentLiveUKIE(InfoExtractor):
_TESTS = [{
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'info_dict': {
'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'id': '1_af9nv9ym',
'ext': 'mp4',
'title': 'Home Affairs Committee',
'uploader_id': 'FFMPEG-01',
@@ -28,14 +28,14 @@ class ParliamentLiveUKIE(InfoExtractor):
webpage = self._download_webpage(
'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
widget_config = self._parse_json(self._search_regex(
r'kWidgetConfig\s*=\s*({.+});',
r'(?s)kWidgetConfig\s*=\s*({.+});',
webpage, 'kaltura widget config'), video_id)
kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
kaltura_url = 'kaltura:%s:%s' % (
widget_config['wid'][1:], widget_config['entry_id'])
event_title = self._download_json(
'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
return {
'_type': 'url_transparent',
'id': video_id,
'title': event_title,
'description': '',
'url': kaltura_url,

View File

@@ -187,7 +187,7 @@ class PBSIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
(?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
@@ -367,6 +367,10 @@ class PBSIE(InfoExtractor):
{
'url': 'http://watch.knpb.org/video/2365616055/',
'only_matching': True,
},
{
'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
'only_matching': True,
}
]
_ERRORS = {

View File

@@ -14,7 +14,7 @@ from ..utils import (
class PornFlipIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
_TESTS = [{
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
'md5': '98c46639849145ae1fd77af532a9278c',
@@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor):
}, {
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -1,5 +1,7 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -35,6 +37,8 @@ class RedditIE(InfoExtractor):
'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
mpd_id='dash', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'title': video_id,
@@ -43,7 +47,7 @@ class RedditIE(InfoExtractor):
class RedditRIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)'
_VALID_URL = r'(?P<url>https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': {
@@ -81,10 +85,13 @@ class RedditRIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
url, video_id = mobj.group('url', 'id')
video_id = self._match_id(url)
data = self._download_json(
url + '.json', video_id)[0]['data']['children'][0]['data']
url + '/.json', video_id)[0]['data']['children'][0]['data']
video_url = data['url']

View File

@@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor):
IE_NAME = 'rtl.nl'
IE_DESC = 'rtl.nl and rtlxl.nl'
_VALID_URL = r'''(?x)
https?://(?:www\.)?
https?://(?:(?:www|static)\.)?
(?:
rtlxl\.nl/[^\#]*\#!/[^/]+/|
rtl\.nl/(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid=|video/)
rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)
)
(?P<id>[0-9a-f-]+)'''
@@ -73,6 +73,9 @@ class RtlNlIE(InfoExtractor):
}, {
'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/',
'only_matching': True,
}, {
'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -10,6 +10,7 @@ from ..compat import (
compat_struct_unpack,
)
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
remove_end,
@@ -84,6 +85,18 @@ class RTVEALaCartaIE(InfoExtractor):
'title': 'TODO',
},
'skip': 'The f4m manifest can\'t be used yet',
}, {
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
'md5': 'e55e162379ad587e9640eda4f7353c0f',
'info_dict': {
'id': '4236788',
'ext': 'mp4',
'title': 'Servir y proteger - Capítulo 104 ',
'duration': 3222.0,
},
'params': {
'skip_download': True, # requires ffmpeg
},
}, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True,
@@ -107,24 +120,41 @@ class RTVEALaCartaIE(InfoExtractor):
video_id)['page']['items'][0]
if info['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True)
title = info['title']
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
png_request = sanitized_Request(png_url)
png_request.add_header('Referer', url)
png = self._download_webpage(png_request, video_id, 'Downloading url information')
video_url = _decrypt_url(png)
if not video_url.endswith('.f4m'):
ext = determine_ext(video_url)
formats = []
if not video_url.endswith('.f4m') and ext != 'm3u8':
if '?' not in video_url:
video_url = video_url.replace('resources/', 'auth/resources/')
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds', fatal=False))
else:
formats.append({
'url': video_url,
})
self._sort_formats(formats)
subtitles = None
if info.get('sbtFile') is not None:
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
return {
'id': video_id,
'title': info['title'],
'url': video_url,
'title': title,
'formats': formats,
'thumbnail': info.get('image'),
'page_url': url,
'subtitles': subtitles,

View File

@@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor):
video_url = self._html_search_regex(
r'<param name="src" value="([^"]+)"', webpage, 'video url')
title = self._html_search_regex(
r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>',
r'<title>([^<]+)&nbsp;&nbsp; RUHD\.ru - Видео Высокого качества №1 в России!</title>',
webpage, 'title')
description = self._html_search_regex(
r'(?s)<div id="longdesc">(.+?)<span id="showlink">',

View File

@@ -1,60 +1,190 @@
# coding: utf-8
from __future__ import unicode_literals
from .adobepass import AdobePassIE
import datetime
import json
import hashlib
import hmac
import re
from .common import InfoExtractor
from .anvato import AnvatoIE
from ..utils import (
int_or_none,
smuggle_url,
update_url_query,
urlencode_postdata,
xpath_text,
)
class ScrippsNetworksWatchIE(AdobePassIE):
class ScrippsNetworksWatchIE(InfoExtractor):
IE_NAME = 'scrippsnetworks:watch'
_VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
_TEST = {
'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
_VALID_URL = r'''(?x)
https?://
watch\.
(?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
(?:
player\.[A-Z0-9]+\.html\#|
show/(?:[^/]+/){2}|
player/
)
(?P<id>\d+)
'''
_TESTS = [{
'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
'md5': '26545fd676d939954c6808274bdb905a',
'info_dict': {
'id': '0256538',
'id': '4173834',
'ext': 'mp4',
'title': 'Seeking a Wow House',
'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
'uploader': 'SCNI',
'upload_date': '20170207',
'timestamp': 1486450493,
'title': 'Best Ever Treehouses',
'description': "We're searching for the most over the top treehouses.",
'uploader': 'ANV',
'upload_date': '20170922',
'timestamp': 1506056400,
},
'skip': 'requires TV provider authentication',
'params': {
'skip_download': True,
},
'add_ie': [AnvatoIE.ie_key()],
}, {
'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
'only_matching': True,
}, {
'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
'only_matching': True,
}, {
'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
'only_matching': True,
}]
_SNI_TABLE = {
'hgtv': 'hgtv',
'diynetwork': 'diy',
'foodnetwork': 'food',
'cookingchanneltv': 'cook',
'travelchannel': 'trav',
'geniuskitchen': 'genius',
}
_SNI_HOST = 'web.api.video.snidigital.com'
_AWS_REGION = 'us-east-1'
_AWS_IDENTITY_ID_JSON = json.dumps({
'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
})
_AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
_AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
_AWS_SERVICE = 'execute-api'
_AWS_REQUEST = 'aws4_request'
_AWS_SIGNED_HEADERS = ';'.join([
'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
_AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
%(uri)s
host:%(host)s
x-amz-date:%(date)s
x-amz-security-token:%(token)s
x-api-key:%(key)s
%(signed_headers)s
%(payload_hash)s'''
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
channel = self._parse_json(self._search_regex(
r'"channels"\s*:\s*(\[.+\])',
webpage, 'channels'), video_id)[0]
video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
title = video_data['title']
release_url = video_data['releaseUrl']
if video_data.get('restricted'):
requestor_id = self._search_regex(
r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
resource = self._get_mvpd_resource(
requestor_id, title, video_id,
video_data.get('ratings', [{}])[0].get('rating'))
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
release_url = update_url_query(release_url, {'auth': auth})
mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site', 'id')
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'url': smuggle_url(release_url, {'force_smil_url': True}),
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnailUrl'),
'series': video_data.get('showTitle'),
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episodeNumber')),
'ie_key': 'ThePlatform',
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
token = self._download_json(
'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
headers={
'Accept': '*/*',
'Content-Type': 'application/x-amz-json-1.1',
'Referer': url,
'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
'X-Amz-User-Agent': self._AWS_USER_AGENT,
})['Token']
sts = self._download_xml(
'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
'Action': 'AssumeRoleWithWebIdentity',
'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
'RoleSessionName': 'web-identity',
'Version': '2011-06-15',
'WebIdentityToken': token,
}), headers={
'Referer': url,
'X-Amz-User-Agent': self._AWS_USER_AGENT,
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
})
def get(key):
return xpath_text(
sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
fatal=True)
access_key_id = get('AccessKeyId')
secret_access_key = get('SecretAccessKey')
session_token = get('SessionToken')
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
date = datetime_now[:8]
canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
'uri': uri,
'host': self._SNI_HOST,
'date': datetime_now,
'token': session_token,
'key': self._AWS_API_KEY,
'signed_headers': self._AWS_SIGNED_HEADERS,
'payload_hash': aws_hash(''),
}
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
string_to_sign = '\n'.join([
'AWS4-HMAC-SHA256', datetime_now, credential_string,
aws_hash(canonical_string)])
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest()
def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest()
k_secret = 'AWS4' + secret_access_key
k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
k_region = aws_hmac_digest(k_date, self._AWS_REGION)
k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
signature = aws_hmac_hexdigest(k_signing, string_to_sign)
auth_header = ', '.join([
'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
[access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
'Signature=%s' % signature,
])
mcp_id = self._download_json(
'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
'Accept': '*/*',
'Referer': url,
'Authorization': auth_header,
'X-Amz-Date': datetime_now,
'X-Amz-Security-Token': session_token,
'X-Api-Key': self._AWS_API_KEY,
})['results'][0]['mcpId']
return self.url_result(
smuggle_url(
'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
{'geo_countries': ['US']}),
AnvatoIE.ie_key(), video_id=mcp_id)

View File

@@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class ServusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
_TESTS = [{
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
'md5': '046dee641cda1c4cabe13baef3be2c1c',
'info_dict': {
'id': 'AA-1T6VBU5PW1W12',
'ext': 'mp4',
'title': 'Die Grünen aus Volkssicht',
'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
formats = self._extract_m3u8_formats(
'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -18,46 +18,32 @@ from ..utils import (
class ShahidIE(InfoExtractor):
_NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?P<type>episode|movie)/(?P<id>\d+)'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
_TESTS = [{
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
'info_dict': {
'id': '90574',
'id': '275286',
'ext': 'mp4',
'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
'duration': 2972,
'timestamp': 1422057420,
'upload_date': '20150123',
'title': 'مجلس الشباب الموسم 1 كليب 1',
'timestamp': 1506988800,
'upload_date': '20171003',
},
'params': {
# m3u8 download
'skip_download': True,
}
}, {
'url': 'https://shahid.mbc.net/ar/movie/151746/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9.html',
'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746',
'only_matching': True
}, {
# shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
'only_matching': True
}]
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
def _api2_request(self, *args, **kwargs):
try:
user_data = self._download_json(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
return self._download_json(*args, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
fail_data = self._parse_json(
@@ -69,6 +55,21 @@ class ShahidIE(InfoExtractor):
raise ExtractorError(faults_message, expected=True)
raise
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
user_data = self._api2_request(
'https://shahid.mbc.net/wd/service/users/login',
None, 'Logging in', data=json.dumps({
'email': email,
'password': password,
'basic': 'false',
}).encode('utf-8'), headers={
'Content-Type': 'application/json; charset=UTF-8',
})['user']
self._download_webpage(
'https://shahid.mbc.net/populateContext',
None, 'Populate Context', data=urlencode_postdata({
@@ -93,15 +94,17 @@ class ShahidIE(InfoExtractor):
def _real_extract(self, url):
page_type, video_id = re.match(self._VALID_URL, url).groups()
if page_type == 'clip':
page_type = 'episode'
player = self._get_api_data(self._download_json(
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-player.html' % video_id,
video_id, 'Downloading player JSON'))
playout = self._api2_request(
'https://api2.shahid.net/proxy/v2/playout/url/' + video_id,
video_id, 'Downloading player JSON')['playout']
if player.get('drm'):
if playout.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
self._sort_formats(formats)
video = self._get_api_data(self._download_json(

View File

@@ -0,0 +1,34 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class SlidesLiveIE(InfoExtractor):
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
'info_dict': {
'id': 'LMtgR8ba0b0',
'ext': 'mp4',
'title': '38902413: external video',
'description': '3890241320170925-9-1yd6ech.mp4',
'uploader': 'SlidesLive Administrator',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'upload_date': '20170925',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
url, video_id, headers={'Accept': 'application/json'})
service_name = video_data['video_service_name']
if service_name == 'YOUTUBE':
yt_video_id = video_data['video_service_id']
return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
else:
raise ExtractorError(
'Unsupported service name: {0}'.format(service_name), expected=True)

View File

@@ -138,7 +138,7 @@ class SoundcloudIE(InfoExtractor):
},
]
_CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
_CLIENT_ID = 'c6CU49JDMapyrQo06UxU9xouB9ZVzqCn'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod

View File

@@ -8,36 +8,49 @@ from .common import InfoExtractor
class SoundgasmIE(InfoExtractor):
IE_NAME = 'soundgasm'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
_TEST = {
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
'md5': '010082a2c802c5275bb00030743e75ad',
'info_dict': {
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
'ext': 'm4a',
'title': 'ytdl_Piano-sample',
'description': 'Royalty Free Sample Music'
'title': 'Piano sample',
'description': 'Royalty Free Sample Music',
'uploader': 'ytdl',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('title')
audio_title = mobj.group('user') + '_' + mobj.group('title')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
audio_url = self._html_search_regex(
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
audio_id = re.split(r'\/|\.', audio_url)[-2]
r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'audio URL', group='url')
title = self._search_regex(
r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
webpage, 'title', default=display_id)
description = self._html_search_regex(
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
fatal=False)
(r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
r'(?s)<li>Description:\s(.*?)<\/li>'),
webpage, 'description', fatal=False)
audio_id = self._search_regex(
r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
return {
'id': audio_id,
'display_id': display_id,
'url': audio_url,
'title': audio_title,
'description': description
'vcodec': 'none',
'title': title,
'description': description,
'uploader': mobj.group('user'),
}

View File

@@ -44,6 +44,7 @@ class SpikeIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
_CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
_GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage):
mgid = super(SpikeIE, self)._extract_mgid(webpage)

View File

@@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
r'(?s)<description>([^<]+)</description>',
coursepage, 'description', fatal=False)
links = orderedSet(re.findall(r'<a href="(VideoPage.php\?[^"]+)">', coursepage))
links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage))
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]
@@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
rootpage = self._download_webpage(rootURL, info['id'],
errnote='Unable to download course info page')
links = orderedSet(re.findall(r'<a href="(CoursePage.php\?[^"]+)">', rootpage))
links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage))
info['entries'] = [self.url_result(
'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l)
) for l in links]

View File

@@ -4,8 +4,10 @@ import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
ExtractorError,
unescapeHTML,
get_element_by_class,
js_to_json,
)
@@ -25,35 +27,39 @@ class SteamIE(InfoExtractor):
'url': 'http://store.steampowered.com/video/105600/',
'playlist': [
{
'md5': 'f870007cee7065d7c76b88f0a45ecc07',
'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
'info_dict': {
'id': '81300',
'ext': 'flv',
'title': 'Terraria 1.1 Trailer',
'id': '2040428',
'ext': 'mp4',
'title': 'Terraria 1.3 Trailer',
'playlist_index': 1,
}
},
{
'md5': '61aaf31a5c5c3041afb58fb83cbb5751',
'md5': '911672b20064ca3263fa89650ba5a7aa',
'info_dict': {
'id': '80859',
'ext': 'flv',
'title': 'Terraria Trailer',
'id': '2029566',
'ext': 'mp4',
'title': 'Terraria 1.2 Trailer',
'playlist_index': 2,
}
}
],
'info_dict': {
'id': '105600',
'title': 'Terraria',
},
'params': {
'playlistend': 2,
}
}, {
'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
'info_dict': {
'id': 'WB5DvDOOvAY',
'id': 'X8kpJBlzD2E',
'ext': 'mp4',
'upload_date': '20140329',
'title': 'FRONTIERS - Final Greenlight Trailer',
'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
'upload_date': '20140617',
'title': 'FRONTIERS - Trapping',
'description': 'md5:bf6f7f773def614054089e5769c12a6e',
'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames',
}
@@ -76,48 +82,65 @@ class SteamIE(InfoExtractor):
self.report_age_confirmation()
webpage = self._download_webpage(videourl, playlist_id)
flash_vars = self._parse_json(self._search_regex(
r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage,
'flash vars'), playlist_id, js_to_json)
playlist_title = None
entries = []
if fileID:
playlist_title = self._html_search_regex(
r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
mweb = re.finditer(r'''(?x)
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
''', webpage)
videos = [{
'_type': 'url',
'url': vid.group('youtube_id'),
'ie_key': 'Youtube',
} for vid in mweb]
else:
playlist_title = self._html_search_regex(
r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
mweb = re.finditer(r'''(?x)
'movie_(?P<videoID>[0-9]+)':\s*\{\s*
FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
(,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
''', webpage)
titles = re.finditer(
r'<span class="title">(?P<videoName>.+?)</span>', webpage)
thumbs = re.finditer(
r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
videos = []
for vid, vtitle, thumb in zip(mweb, titles, thumbs):
video_id = vid.group('videoID')
title = vtitle.group('videoName')
video_url = vid.group('videoURL')
video_thumb = thumb.group('thumbnail')
if not video_url:
raise ExtractorError('Cannot find video url for %s' % video_id)
videos.append({
'id': video_id,
'url': video_url,
'ext': 'flv',
'title': unescapeHTML(title),
'thumbnail': video_thumb
playlist_title = get_element_by_class('workshopItemTitle', webpage)
for movie in flash_vars.values():
if not movie:
continue
youtube_id = movie.get('YOUTUBE_VIDEO_ID')
if not youtube_id:
continue
entries.append({
'_type': 'url',
'url': youtube_id,
'ie_key': 'Youtube',
})
if not videos:
else:
playlist_title = get_element_by_class('apphub_AppName', webpage)
for movie_id, movie in flash_vars.items():
if not movie:
continue
video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False)
title = movie.get('MOVIE_NAME')
if not title or not video_id:
continue
entry = {
'id': video_id,
'title': title.replace('+', ' '),
}
formats = []
flv_url = movie.get('FILENAME')
if flv_url:
formats.append({
'format_id': 'flv',
'url': flv_url,
})
highlight_element = self._search_regex(
r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id,
webpage, 'highlight element', fatal=False)
if highlight_element:
highlight_attribs = extract_attributes(highlight_element)
if highlight_attribs:
entry['thumbnail'] = highlight_attribs.get('data-poster')
for quality in ('', '-hd'):
for ext in ('webm', 'mp4'):
video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality))
if video_url:
formats.append({
'format_id': ext + quality,
'url': video_url,
})
if not formats:
continue
entry['formats'] = formats
entries.append(entry)
if not entries:
raise ExtractorError('Could not find any videos')
return self.playlist_result(videos, playlist_id, playlist_title)
return self.playlist_result(entries, playlist_id, playlist_title)

View File

@@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
def hex_to_bytes(hex):
return binascii.a2b_hex(hex.encode('ascii'))
relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)

View File

@@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor):
info_dict = self._extract_jwplayer_data(
webpage, video_id, require_title=False)
uploader = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
r': <a href="http://www\.thisav\.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
webpage, 'uploader name', fatal=False)
uploader_id = self._html_search_regex(
r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
r': <a href="http://www\.thisav\.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
webpage, 'uploader id', fatal=False)
info_dict.update({

View File

@@ -13,11 +13,11 @@ from ..utils import (
class TubiTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/video/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
_LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv'
_GEO_COUNTRIES = ['US']
_TEST = {
_TESTS = [{
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
'info_dict': {
@@ -27,7 +27,13 @@ class TubiTvIE(InfoExtractor):
'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.',
'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434',
},
}
}, {
'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories',
'only_matching': True,
}, {
'url': 'http://tubitv.com/movies/383676/tracker',
'only_matching': True,
}]
def _login(self):
(username, password) = self._get_login_info()

View File

@@ -3,52 +3,50 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
float_or_none,
smuggle_url,
)
class TVAIE(InfoExtractor):
_VALID_URL = r'https?://videos\.tva\.ca/episode/(?P<id>\d+)'
_VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
_TEST = {
'url': 'http://videos.tva.ca/episode/85538',
'url': 'https://videos.tva.ca/details/_5596811470001',
'info_dict': {
'id': '85538',
'id': '5596811470001',
'ext': 'mp4',
'title': 'Épisode du 25 janvier 2017',
'description': 'md5:e9e7fb5532ab37984d2dc87229cadf98',
'upload_date': '20170126',
'timestamp': 1485442329,
'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
'uploader_id': '5481942443001',
'upload_date': '20171003',
'timestamp': 1507064617,
},
'params': {
# m3u8 download
'skip_download': True,
}
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
"https://d18jmrhziuoi7p.cloudfront.net/isl/api/v1/dataservice/Items('%s')" % video_id,
video_id, query={
'$expand': 'Metadata,CustomId',
'$select': 'Metadata,Id,Title,ShortDescription,LongDescription,CreatedDate,CustomId,AverageUserRating,Categories,ShowName',
'$format': 'json',
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
'Accept': 'application/json',
})
metadata = video_data.get('Metadata', {})
def get_attribute(key):
for attribute in video_data.get('attributes', []):
if attribute.get('key') == key:
return attribute.get('value')
return None
return {
'_type': 'url_transparent',
'id': video_id,
'title': video_data['Title'],
'url': smuggle_url('ooyala:' + video_data['CustomId'], {'supportedformats': 'm3u8,hds'}),
'description': video_data.get('LongDescription') or video_data.get('ShortDescription'),
'series': video_data.get('ShowName'),
'episode': metadata.get('EpisodeTitle'),
'episode_number': int_or_none(metadata.get('EpisodeNumber')),
'categories': video_data.get('Categories'),
'average_rating': video_data.get('AverageUserRating'),
'timestamp': parse_iso8601(video_data.get('CreatedDate')),
'ie_key': 'Ooyala',
'title': get_attribute('title'),
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
'description': get_attribute('description'),
'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
'duration': float_or_none(get_attribute('video-duration'), 1000),
'ie_key': 'BrightcoveNew',
}

View File

@@ -9,7 +9,7 @@ from ..utils import (
class TVN24IE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
'md5': 'fbdec753d7bc29d96036808275f2130c',
@@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor):
'ext': 'mp4',
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
'thumbnail': 're:http://.*[.]jpeg',
'thumbnail': 're:https?://.*[.]jpeg',
}
}, {
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
@@ -29,6 +29,9 @@ class TVN24IE(InfoExtractor):
}, {
'url': 'http://tvn24bis.pl/poranek,146,m/gen-koziej-w-tvn24-bis-wracamy-do-czasow-zimnej-wojny,715660.html',
'only_matching': True,
}, {
'url': 'https://www.tvn24.pl/magazyn-tvn24/angie-w-jednej-czwartej-polka-od-szarej-myszki-do-cesarzowej-europy,119,2158',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -15,16 +15,16 @@ from ..utils import (
class TVPIE(InfoExtractor):
IE_NAME = 'tvp'
IE_DESC = 'Telewizja Polska'
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:video/(?:[^,\s]*,)*|(?:(?!\d+/)[^/]+/)*)(?P<id>\d+)'
_TESTS = [{
'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
'info_dict': {
'id': '194536',
'ext': 'mp4',
'title': 'Czas honoru, I seria odc. 13',
'description': 'md5:76649d2014f65c99477be17f23a4dead',
'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
@@ -37,12 +37,13 @@ class TVPIE(InfoExtractor):
},
}, {
# page id is not the same as video id(#7799)
'url': 'http://vod.tvp.pl/22704887/08122015-1500',
'md5': 'cf6a4705dfd1489aef8deb168d6ba742',
'url': 'https://wiadomosci.tvp.pl/33908820/28092017-1930',
'md5': '84cd3c8aec4840046e5ab712416b73d0',
'info_dict': {
'id': '22680786',
'id': '33908820',
'ext': 'mp4',
'title': 'Wiadomości, 08.12.2015, 15:00',
'title': 'Wiadomości, 28.09.2017, 19:30',
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
},
}, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',

View File

@@ -609,7 +609,7 @@ class TwitchClipsIE(InfoExtractor):
r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'),
video_id, transform_source=js_to_json)
title = clip.get('channel_title') or self._og_search_title(webpage)
title = clip.get('title') or clip.get('channel_title') or self._og_search_title(webpage)
formats = [{
'url': option['source'],

View File

@@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE):
webpage = self._download_webpage(url, video_id)
iframe_url = self._html_search_regex(
r'<iframe[^>]+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
webpage, 'video iframe', default=None)
if iframe_url:
return self.url_result(iframe_url)

View File

@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
@@ -29,6 +28,7 @@ class UDNEmbedIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Failed to parse JSON Expecting value'],
}, {
'url': 'https://video.udn.com/embed/news/300040',
'only_matching': True,
@@ -43,10 +43,21 @@ class UDNEmbedIE(InfoExtractor):
page = self._download_webpage(url, video_id)
options = json.loads(js_to_json(self._html_search_regex(
r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary')))
video_urls = options['video']
options_str = self._html_search_regex(
r'var\s+options\s*=\s*([^;]+);', page, 'options')
trans_options_str = js_to_json(options_str)
options = self._parse_json(trans_options_str, 'options', fatal=False) or {}
if options:
video_urls = options['video']
title = options['title']
poster = options.get('poster')
else:
video_urls = self._parse_json(self._html_search_regex(
r'"video"\s*:\s*({.+?})\s*,', trans_options_str, 'video urls'), 'video urls')
title = self._html_search_regex(
r"title\s*:\s*'(.+?)'\s*,", options_str, 'title')
poster = self._html_search_regex(
r"poster\s*:\s*'(.+?)'\s*,", options_str, 'poster', default=None)
if video_urls.get('youtube'):
return self.url_result(video_urls.get('youtube'), 'Youtube')
@@ -68,7 +79,7 @@ class UDNEmbedIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id='hds'))
else:
mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url)
mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+)\.mp4', video_url)
a_format = {
'url': video_url,
# video_type may be 'mp4', which confuses YoutubeDL
@@ -83,14 +94,9 @@ class UDNEmbedIE(InfoExtractor):
self._sort_formats(formats)
thumbnails = [{
'url': img_url,
'id': img_type,
} for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url]
return {
'id': video_id,
'formats': formats,
'title': options['title'],
'thumbnails': thumbnails,
'title': title,
'thumbnail': poster,
}

View File

@@ -0,0 +1,32 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
class UnityIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?unity3d\.com/learn/tutorials/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://unity3d.com/learn/tutorials/topics/animation/animate-anything-mecanim',
'info_dict': {
'id': 'jWuNtik0C8E',
'ext': 'mp4',
'title': 'Live Training 22nd September 2014 - Animate Anything',
'description': 'md5:e54913114bd45a554c56cdde7669636e',
'duration': 2893,
'uploader': 'Unity',
'uploader_id': 'Unity3D',
'upload_date': '20140926',
}
}, {
'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
youtube_id = self._search_regex(
r'data-video-id="([_0-9a-zA-Z-]+)"',
webpage, 'youtube ID')
return self.url_result(youtube_id, ie=YoutubeIE.ie_key(), video_id=video_id)

View File

@@ -1,131 +1,41 @@
# coding: utf-8
from __future__ import unicode_literals
from .mtv import MTVIE
import re
from ..utils import fix_xml_ampersands
from .mtv import MTVServicesInfoExtractor
class VH1IE(MTVIE):
class VH1IE(MTVServicesInfoExtractor):
IE_NAME = 'vh1.com'
_FEED_URL = 'http://www.vh1.com/player/embed/AS3/fullepisode/rss/'
_FEED_URL = 'http://www.vh1.com/feeds/mrss/'
_TESTS = [{
'url': 'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml',
'playlist': [
{
'md5': '7827a7505f59633983165bbd2c119b52',
'info_dict': {
'id': '731565',
'ext': 'mp4',
'title': 'Metal Evolution: Ep. 11 Act 1',
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
}
},
{
'md5': '34fb4b7321c546b54deda2102a61821f',
'info_dict': {
'id': '731567',
'ext': 'mp4',
'title': 'Metal Evolution: Ep. 11 Act 2',
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
}
},
{
'md5': '813f38dba4c1b8647196135ebbf7e048',
'info_dict': {
'id': '731568',
'ext': 'mp4',
'title': 'Metal Evolution: Ep. 11 Act 3',
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
}
},
{
'md5': '51adb72439dfaed11c799115d76e497f',
'info_dict': {
'id': '731569',
'ext': 'mp4',
'title': 'Metal Evolution: Ep. 11 Act 4',
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
}
},
{
'md5': '93d554aaf79320703b73a95288c76a6e',
'info_dict': {
'id': '731570',
'ext': 'mp4',
'title': 'Metal Evolution: Ep. 11 Act 5',
'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.'
}
}
],
'skip': 'Blocked outside the US',
'url': 'http://www.vh1.com/episodes/0umwpq/hip-hop-squares-kent-jones-vs-nick-young-season-1-ep-120',
'info_dict': {
'title': 'Kent Jones vs. Nick Young',
'description': 'Come to Play. Stay to Party. With Mike Epps, TIP, OShea Jackson Jr., T-Pain, Tisha Campbell-Martin and more.',
},
'playlist_mincount': 4,
}, {
# Clip
'url': 'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118',
'md5': '7d67cf6d9cdc6b4f3d3ac97a55403844',
'url': 'http://www.vh1.com/video-clips/t74mif/scared-famous-scared-famous-extended-preview',
'info_dict': {
'id': '706675',
'id': '0a50c2d2-a86b-4141-9565-911c7e2d0b92',
'ext': 'mp4',
'title': 'Metal Evolution: Episode 1 Pre-Metal Show Clip',
'description': 'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.'
'title': 'Scared Famous|October 9, 2017|1|NO-EPISODE#|Scared Famous + Extended Preview',
'description': 'md5:eff5551a274c473a29463de40f7b09da',
'upload_date': '20171009',
'timestamp': 1507574700,
},
'skip': 'Blocked outside the US',
}, {
# Short link
'url': 'http://www.vh1.com/video/play.jhtml?id=1678353',
'md5': '853192b87ad978732b67dd8e549b266a',
'info_dict': {
'id': '730355',
'ext': 'mp4',
'title': 'Metal Evolution: Episode 11 Progressive Metal Sneak',
'description': 'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.'
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Blocked outside the US',
}, {
'url': 'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml',
'md5': 'b1bcb5b4380c9d7f544065589432dee7',
'info_dict': {
'id': '900535',
'ext': 'mp4',
'title': 'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"',
'description': 'The Heist'
},
'skip': 'Blocked outside the US',
}]
_VALID_URL = r'''(?x)
https?://www\.vh1\.com/video/
(?:
.+?/full-episodes/.+?/(?P<playlist_id>[^/]+)/playlist\.jhtml
|
(?:
play.jhtml\?id=|
misc/.+?/.+?\.jhtml\#id=
)
(?P<video_id>[0-9]+)$
|
[^/]+/(?P<music_id>[0-9]+)/[^/]+?
)
'''
_VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('music_id'):
id_field = 'vid'
video_id = mobj.group('music_id')
else:
video_id = mobj.group('playlist_id') or mobj.group('video_id')
id_field = 'id'
doc_url = '%s?%s=%s' % (self._FEED_URL, id_field, video_id)
idoc = self._download_xml(
doc_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
entries = []
for item in idoc.findall('.//item'):
info = self._get_video_info(item)
if info:
entries.append(info)
return self.playlist_result(entries, playlist_id=video_id)
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
mgid = self._extract_triforce_mgid(webpage)
videos_info = self._get_videos_info(mgid)
return videos_info

View File

@@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor):
class ViceArticleIE(InfoExtractor):
IE_NAME = 'vice:article'
_VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
_VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
_TESTS = [{
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',

View File

@@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor):
webpage_url = 'http://videopremium.tv/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
if re.match(r'^<html><head><script[^>]*>window.location\s*=', webpage):
if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
# Download again, we need a cookie
webpage = self._download_webpage(
webpage_url, video_id,

Some files were not shown because too many files have changed in this diff Show More