Compare commits

...

240 Commits

Author SHA1 Message Date
Sergey M․
fa3f0fd856 release 2017.10.20 2017-10-20 23:40:25 +07:00
Sergey M․
c9dcd4b0c5 [ChangeLog] Actualize 2017-10-20 23:37:55 +07:00
Alex Seiler
fc5c47d13c [parliamentliveuk] Fix extraction (closes #14524) 2017-10-20 23:31:13 +07:00
Sergey M․
a26a3c6d34 [soundcloud] Update client id (closes #14546) 2017-10-20 21:43:34 +07:00
Felix Yan
382fa456ea [ChangeLog] Fix typo 2017-10-19 23:36:32 +07:00
Alex Seiler
e1d168e592 [servus] Add extractor (closes #14362) 2017-10-19 22:17:20 +07:00
Parmjit Virk
ca1c9f26fa [unity] Add extractor (fixes #14528) 2017-10-19 04:46:06 +07:00
Sergey M․
6f3b4a98c9 [downloader/fragment] Report warning instead of error on inconsistent download state 2017-10-17 22:53:34 +07:00
Remita Amine
fa4bc6e712 [youtube] replace youtube redirect urls in description(fixes #14517) 2017-10-17 10:07:37 +00:00
Remita Amine
6b9cbd023f [pbs] restrict direct video url regex(fixes #14519) 2017-10-17 09:23:11 +00:00
Yen Chi Hsuan
c233003afe [megaphone] Fix deprecated escape sequence 2017-10-17 15:39:06 +08:00
Sergey M․
83fcf19e2d [drtv] Respect preference for direct http formats (#14509) 2017-10-16 05:48:45 +07:00
Sergey M․
acc4ea6237 [eporner] Add support for embed URLs (closes #14507) 2017-10-16 05:11:25 +07:00
Sergey M․
8cc1840ccb [arte] Capture and output error message 2017-10-15 22:12:34 +07:00
Sergey M․
a9ee4f6e49 [downloader/hls] Fix total fragments count when ad fragments exist 2017-10-15 11:03:54 +07:00
Pawit Pornkitprasan
aaab8c5e71 [niconico] Improve uploader metadata extraction robustness (closes #14135) 2017-10-15 10:40:57 +07:00
Sergey M․
7e721e35da release 2017.10.15.1 2017-10-15 06:16:41 +07:00
Sergey M․
bd7e1406b3 [ChangeLog] Actualize 2017-10-15 06:15:37 +07:00
Sergey M․
74c42d9ec3 [downloader/hls] Ignore anvato ad fragments (closes #14496) 2017-10-15 06:13:48 +07:00
Sergey M․
5efaf43c93 [downloader/fragment] Output ad fragment count 2017-10-15 06:13:07 +07:00
Sergey M․
4827270526 [scrippsnetworks:watch] Bypass geo restriction 2017-10-15 06:11:35 +07:00
Sergey M․
ee093a0ea0 [anvato] Add ability to bypass geo restriction 2017-10-15 06:11:02 +07:00
Sergey M․
9bb2c7673e [redditr] Fix extraction for URLs with query (closes #14495) 2017-10-15 03:38:34 +07:00
Sergey M․
715534083d release 2017.10.15 2017-10-15 02:26:58 +07:00
Sergey M․
ee88c1cbc6 [ChangeLog] Actualize 2017-10-15 02:26:10 +07:00
Sergey M․
57eb45b111 [scrippsnetworks:watch] Add support for geniuskitchen.com 2017-10-15 02:01:16 +07:00
Sergey M․
b21ab85088 [scrippsnetworks:watch] Fix extraction (closes #14389) 2017-10-15 01:57:43 +07:00
Sergey M․
210a2720bc [anvato] Process master m3u8 manifests
>>> Individual m3u8 manifests are not always present, e.g. anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:4173834
2017-10-15 01:44:57 +07:00
Sergey M․
685e87b61f [youtube] Fix relative URLs in description 2017-10-14 20:26:52 +07:00
Remita Amine
c9bd503e7d [spike] bypass geo restriction 2017-10-13 08:41:57 +00:00
Remita Amine
94a530c6cb [howstuffworks] add support for more domains 2017-10-12 19:03:47 +00:00
Remita Amine
e650659b94 [infoq] fix http format downloading 2017-10-12 17:39:51 +00:00
Remita Amine
2637fadc38 [generic] fix some of the tests 2017-10-12 16:14:43 +00:00
Remita Amine
50d808f5c9 [common] add support for jwplayer youtube embeds 2017-10-12 16:12:47 +00:00
Remita Amine
7a64c33aee [rtlnl] add support for another type of embeds 2017-10-12 16:09:06 +00:00
Remita Amine
b0def2c297 [onionstudios] add support for bulbs-video embeds 2017-10-12 16:05:25 +00:00
Remita Amine
81ce479f4d [udn] fix extraction 2017-10-12 16:04:41 +00:00
Remita Amine
414e709405 [shahid] fix extraction(fixes #14448) 2017-10-12 09:20:39 +00:00
Yen Chi Hsuan
645ed3e7c9 [ChangeLog] Update after #14471
[skip ci]
2017-10-12 12:12:37 +08:00
nyuszika7h
c0bddd6d65 [kaltura] Ignore Widevine encrypted video (.wvm)
There is currently no public method to decrypt this, and there may be
other streams available that can be downloaded.

Example URL, has `.wvm` and `.mp4` formats:
https://www.voot.com/shows/bigg-boss-s11/11/538936/bigg-boss-extra-dose-arshi-s-quirky-demand/541700
2017-10-12 12:09:58 +08:00
Yen Chi Hsuan
1baba7f4a8 [vh1] Adding coding cookie 2017-10-12 12:02:26 +08:00
Remita Amine
344d1a6794 [vh1] fix extraction(fixes #9613) 2017-10-11 20:52:14 +00:00
Sergey M․
76581082f6 release 2017.10.12 2017-10-12 01:06:28 +07:00
Sergey M․
2f0eb0a68a [ChangeLog] Actualize 2017-10-12 01:05:14 +07:00
Remita Amine
7fee3377dc [steam] fix extraction(fixes #14067) 2017-10-11 17:50:08 +00:00
Sergey M․
ff3f1a62f0 [funk] Add extractor (closes #14464) 2017-10-12 00:44:13 +07:00
Sergey M․
694b61545c [nexx] Add support for shortcuts and relax domain id extraction 2017-10-12 00:41:20 +07:00
Sergey M․
af0f74288d [YoutubeDL] Improve _default_format_spec (closes #14461) 2017-10-11 23:48:05 +07:00
Remita Amine
9e38dbb19c [voxmedia] add support for recode.net(fixes #14173) 2017-10-11 15:50:20 +00:00
Remita Amine
782195a9d4 [once] add support for vmap urls 2017-10-11 15:50:20 +00:00
Sergey M․
26bae2d965 [generic] Add support for channel9 embeds (closes #14469) 2017-10-11 21:59:30 +07:00
Remita Amine
5fe75f976f [tva] fix extraction(fixes #14328) 2017-10-11 14:15:52 +00:00
Remita Amine
4fe4bda287 [tubitv] add support for new url format(fixes #14460) 2017-10-11 11:36:05 +00:00
Remita Amine
cdab1df912 [afreecatv] remove AfreecaTVGlobalIE
the website now show this message
> Global AfreecaTV will be merged and integrated on July 20th, 2017.
Every user around the world are now able to interact with one another on
www.afreecatv.com!
2017-10-11 10:04:46 +00:00
Yen Chi Hsuan
dfc80bdd2e [ChangeLog] Update after #14420 2017-10-11 02:03:00 +08:00
Khang Nguyen
04af3aca04 Remove YoutubeSharedVideoIE https://github.com/rg3/youtube-dl/issues/14303 2017-10-11 02:01:18 +08:00
Jakub Wilk
d0f2d64114 [slideslive] Add extractor (closes #2680) 2017-10-10 23:45:10 +07:00
Yen Chi Hsuan
01c742ecd0 [facebook] Support thumbnails (closes #14416) 2017-10-10 23:20:38 +08:00
Silvan Mosberger
9e71f88105 [vvvvid] Fix typo 2017-10-10 03:48:26 +07:00
Sergey M․
ae5af89079 [hrti:playlist] Relax _VALID_URL 2017-10-09 23:52:39 +07:00
Sergey M․
197224b7a4 Fix some regexes 2017-10-09 23:50:53 +07:00
Sergey M․
8992331621 [wdr] Relax media link regex (closes #14447) 2017-10-08 21:36:50 +07:00
Aleksandar Topuzović
b0dde6686c [hrti] Relax _VALID_URL 2017-10-08 05:40:08 +07:00
Sergey M․
a22ccac1f0 [fox] Delegate to uplynk:preplay (#14147) 2017-10-08 01:34:17 +07:00
Sergey M․
8b561bfc9d [youtube] Add support for hooktube.com (closes #14437) 2017-10-07 21:59:04 +07:00
Sergey M․
8e751a185c release 2017.10.07 2017-10-07 05:02:53 +07:00
Sergey M․
3fc8f5b7c2 [ChangeLog] Actualize 2017-10-07 05:01:38 +07:00
Sergey M․
665f42d8c1 [reddit] Sort formats (closes #14430) 2017-10-07 01:40:00 +07:00
Sergey M․
e952847541 [PULL_REQUEST_TEMPLATE.md] Add explicit entry on flake8 2017-10-07 00:58:19 +07:00
remis
b1a7bf44b9 [lnkgo] Relax _VALID_URL 2017-10-06 23:59:09 +07:00
Jalaz Kumar
2e2a8e97d5 [pornflip] Extend _VALID_URL (closes #14405) 2017-10-06 23:56:31 +07:00
Sergey M․
ac93c09ab2 [xtube] Add support for embedded URLs (closes #14417) 2017-10-06 23:53:32 +07:00
Sergey M․
cd6fc19ed7 [YoutubeDL] Ignore duplicates in --playlist-items
E.g. '--playlist-items 2-4,3-4,3' should result in '[2,3,4]', not '[2,3,4,3,4,3]'
2017-10-06 23:50:34 +07:00
Sergey M․
86a15ed64b [test_YoutubeDL] Add test for #14425 2017-10-06 23:41:28 +07:00
Sergey M․
7e85e8729f [YoutubeDL] Fix out of range --playlist-items for iterable playlists and reduce code duplication (closes #14425) 2017-10-06 23:34:46 +07:00
Sergey M․
6be08ce602 [utils] Use in OnDemandPagedList by default
Not using cache results in redundant network I/O due to downloading the same pages while using --playlist-items n-m
2017-10-06 23:13:53 +07:00
Sergey M․
cf5f6ed5be [xvideos] Add support for embed URLs and improve extraction (closes #14409) 2017-10-05 00:27:24 +07:00
Philipp Hagemeister
6b46285e85 [comedycentral] new shortcut :theopposition for "The Opposition" show 2017-10-04 07:45:13 +02:00
Sergey M․
6e736d86e7 [beeg] Fix extraction (closes #14403) 2017-10-04 04:27:42 +07:00
M.K
c110944fa2 [extractor/common] Fix typo in _parse_mpd_formats 2017-10-04 03:50:27 +07:00
Sergey M․
9524dca3ac [README.md] Use revision bound link to YoutubeDL options (closes #14401) 2017-10-04 02:53:20 +07:00
Jakub Wilk
3e4cedf9e8 [tvn24] Relax _VALID_URL 2017-10-03 23:28:13 +07:00
remitamine
bfd484ccff Merge pull request #14392 from snipem/nbc-fix
Fix for JSON meta data download(closes #13651)
2017-10-03 14:49:55 +00:00
Matthias Küch
b7e14f06a4 Fix for JSON meta data download
Added fixes according to #13651 and user @remitamine
2017-10-03 15:17:28 +02:00
Sergey M․
d2ae7e24e5 [postprocessor/ffmpeg] Convert to opus using libopus (closes #14381) 2017-10-02 04:43:25 +07:00
Sergey M․
544ffb7790 [ketnet] Add support for videos without direct sources (closes #14377) 2017-10-02 04:15:12 +07:00
Sergey M․
117589dfa2 [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een 2017-10-02 04:14:36 +07:00
Sergey M․
839728f5bf [afreecatv] Add support for adult videos (closes #14376) 2017-10-02 03:28:25 +07:00
Sergey M․
fcdd37d053 release 2017.10.01 2017-10-01 21:54:11 +07:00
Sergey M․
1dd126180e [ChangeLog] Actualize 2017-10-01 21:45:56 +07:00
Rafal Borczuch
4e599194d6 [tvp] Add support for new URL schema (closes #14368) 2017-10-01 18:59:00 +07:00
Sergey M․
c5b7014a9c [generic] Add support for single format Video.js embeds (closes #14371) 2017-10-01 07:01:42 +07:00
Sergey M․
c8da40d834 [yahoo] Bypass geo restriction for brightcove (#14210) 2017-10-01 04:49:27 +07:00
Sergey M․
b69ca0ccfc [yahoo] Use extracted brightcove account id (closes #14210) 2017-10-01 04:37:42 +07:00
Giuseppe Fabiano
2c53bd51c6 [rtve:alacarta] Fix extraction (closes #14290) 2017-10-01 03:21:17 +07:00
Sergey M․
3836b02ce8 [YoutubeDL] PEP 8 2017-09-30 22:56:40 +07:00
Sergey M․
fa3fdeb41f [yahoo] Fix some tests 2017-09-30 22:54:22 +07:00
Sergey M․
eb9a15be60 [yahoo] Add support for custom brigthcove embeds (closes #14210) 2017-09-30 22:47:03 +07:00
Sergey M․
3600fd591d [YoutubeDL] Document youtube_include_dash_manifest 2017-09-28 00:46:48 +07:00
Sergey M․
63d990d285 [generic] Add support for Video.js embeds 2017-09-28 00:37:30 +07:00
Timendum
b14b2283a0 [gfycat] Add support for /gifs/detail URLs (closes #14322) 2017-09-27 22:48:47 +07:00
Sergey M․
02d01e15f1 [generic] Fix infinite recursion for twitter:player URLs (closes #14339) 2017-09-26 21:47:18 +07:00
Sergey M․
db96252831 [xhamsterembed] Fix extraction (closes #14308) 2017-09-24 19:23:08 +07:00
Sergey M․
8b389f7e3c Credit the author of multiple generic HTML5 embeds fix 2017-09-24 18:21:38 +07:00
Sergey M․
9fc41bcb6b release 2017.09.24 2017-09-24 00:22:50 +07:00
Sergey M․
10cab6613f [ChangeLog] Actualize 2017-09-24 00:21:34 +07:00
Sergey M․
4d182955a2 [kakao] Fix _VALID_URL 2017-09-24 00:19:27 +07:00
Sergey M․
011da618bd [openload] Fix _load_cookies for python 2.6 2017-09-24 00:12:40 +07:00
Sergey M․
4c54b89e03 Hide experimental phantomjs wrapper 2017-09-24 00:08:27 +07:00
Sergey M․
a87d7b4953 Credit @nbppp2 for americastestkitchen (#13996) 2017-09-23 23:27:28 +07:00
Sergey M․
2f3933aa1e Credit @ishitatsuyuki for mixcloud fix (#14132) 2017-09-23 23:26:35 +07:00
Sergey M․
aab20aabfc Credit @jdong92 for voot (#14059) 2017-09-23 23:23:27 +07:00
Sergey M․
16f54d0751 Credit @codeasashu for voot (#11814) 2017-09-23 23:20:20 +07:00
Sergey M․
07d1344c85 Credit @coreynicholson for vlive:playlist (#13613) 2017-09-23 23:16:27 +07:00
Sergey M․
47b5dfb047 Credit @luboss for joj (#13268) 2017-09-23 23:14:41 +07:00
Sergey M․
e3440d824a [24video] Fix timestamp extraction and make non fatal (#14295) 2017-09-23 07:46:53 +07:00
Sergey M․
136507b39a [24video] Add support for 24video.adult (closes #14295) 2017-09-23 07:41:22 +07:00
Sergey M․
7f4921b38d [heise] PEP 8 2017-09-23 07:28:29 +07:00
Sergey M․
f70ddd4aeb [kakao] Improve (closes #14007) 2017-09-23 07:28:24 +07:00
Namnamseo
1c22d7a7f3 [kakao] Add extractor (closes #12298) 2017-09-23 07:28:19 +07:00
Giuseppe Fabiano
5c1452e8f1 [twitter] Add support for user_id-less URLs (closes #14270) 2017-09-23 06:38:09 +07:00
Sergey M․
4bb58fa118 [americastestkitchen] Improve (closes #13996) 2017-09-23 06:29:20 +07:00
Dan Weber
13de91c9e9 [americastestkitchen] Add extractor (closes #10764) 2017-09-23 06:29:07 +07:00
kayb94
9ce1ac4046 [generic] Fix support for multiple HTML5 videos on one page (closes #14080) 2017-09-23 05:49:48 +07:00
Sergey M․
095774e591 [mixcloud] Improve and simplify (closes #14132) 2017-09-23 05:37:03 +07:00
Tatsuyuki Ishi
2384f5a64e [mixcloud] Fix extraction (closes #14088) 2017-09-23 05:36:57 +07:00
Yen Chi Hsuan
8c2895305d [options] Accept lrc as a subtitle conversion target format (closes #14292) 2017-09-23 02:30:03 +08:00
Sergey M․
8c6919e433 [lynda] Add support for educourse.ga (closes #14286) 2017-09-21 23:00:35 +07:00
Giuseppe Fabiano
f6ff52b473 [beeg] Fix extraction (closes #14275) 2017-09-21 04:05:33 +07:00
Parmjit Virk
12ea5c79fb [nbcsports:vplayer] Correct theplatform URL (closes #13873) 2017-09-21 02:53:06 +07:00
capital-G
3b65a6fbf3 [twitter] Fix duration extraction 2017-09-20 03:58:06 +07:00
Sergey M․
dc76eef092 [tvplay] Bypass geo restriction 2017-09-20 00:00:04 +07:00
Kareem Moussa
8a1a60d173 [devscripts/check-porn] Fix gettestcases import 2017-09-19 22:51:20 +07:00
kayb94
4d8c4b46d5 [heise] Add support for YouTube embeds 2017-09-17 22:46:52 +07:00
Sergey M․
9c2a17f2ce [popcorntv] Add extractor (closes #5914, closes #14211) 2017-09-17 22:19:57 +07:00
Yen Chi Hsuan
4ed2d7b7d1 Fix flake8 issues after #14225 2017-09-17 13:53:04 +08:00
Vijay Singh
8251af63a1 [viki] Update app data (closes #14181) 2017-09-16 22:45:23 +07:00
Windom
790d379e4d [morningstar] Relax _VALID_URL 2017-09-16 22:39:46 +07:00
Yen Chi Hsuan
3869028ffb [utils] Use bytes-like objects in dfxp2srt
This fixes handling of non-UTF8 TTML subtitles

Closes #14191
2017-09-16 12:18:38 +08:00
Yen Chi Hsuan
68d43a61b5 Ignore TTML subtitles 2017-09-16 12:14:48 +08:00
Yen Chi Hsuan
a88d461dff Merge pull request #14225 from Tithen-Firion/openload-phantomjs-method
Openload phantomjs method
2017-09-16 02:28:28 +08:00
Sergey M․
a4245acef8 [noovo] Fix extraction (closes #14214) 2017-09-15 23:12:19 +07:00
Sergey M․
6be44a50ed [dailymotion:playlist] Relax _VALID_URL (closes #14219) 2017-09-15 22:25:38 +07:00
Sergey M․
b763e1d68c [twitch] Add support for go.twitch.tv URLs (closes #14215) 2017-09-15 22:18:38 +07:00
Sergey M․
cbf85239bb [vgtv] Relax _VALID_URL (closes #14223) 2017-09-15 22:13:30 +07:00
Sergey M․
159d304a9f release 2017.09.15 2017-09-15 21:48:06 +07:00
Sergey M․
86e55e317c [ChangeLog] Actualize 2017-09-15 21:45:18 +07:00
Sergey M․
c46680fb2a [condenast] Fix extraction (closes #14196, closes #14207) 2017-09-15 02:01:17 +07:00
Philipp Hagemeister
fad9fc537d [tv4] fix a test URL 2017-09-14 20:47:23 +02:00
Philipp Hagemeister
0732a90579 [orf] Add new extractor for f4m stories 2017-09-14 20:37:46 +02:00
Sergey M․
319fc70676 [tv4] Relax _VALID_URL (closes #14206) 2017-09-14 23:50:19 +07:00
Sergey M․
e7c3e33456 [downloader/fragment] Restart inconsistent incomplete fragment downloads (#13731) 2017-09-14 23:19:53 +07:00
Yen Chi Hsuan
757984af90 Merge pull request #12909 from remitamine/raw-sub
[YoutubeDL] write raw subtitle files
2017-09-13 17:36:40 +08:00
Sergey M․
2f483758bc [animeondemand] Improve and modernize 2017-09-11 04:32:35 +07:00
Sergey M․
018cc61549 [animeondemand] Bypass geo restriction 2017-09-11 04:23:42 +07:00
Sergey M․
2709d9fa28 [animeondemand] Add support for flash videos (closes #9944) 2017-09-11 04:23:42 +07:00
Sergey M․
7dacceae75 release 2017.09.11 2017-09-11 03:30:33 +07:00
Sergey M․
43df248f10 [ChangeLog] Actualize 2017-09-11 03:27:43 +07:00
Sergey M․
f12a6e88b2 [rutube:playlist] Fix suitable (closes #14166) 2017-09-11 03:23:00 +07:00
Sergey M․
806498cf2f release 2017.09.10 2017-09-10 22:16:55 +07:00
Sergey M․
b98339b54b [ChangeLog] Actualize 2017-09-10 22:15:55 +07:00
Sergey M․
bf6ec2fea9 [fox] Fix extraction (#14147) 2017-09-10 22:08:32 +07:00
Sergey M․
c3dd44e085 [rutube] Use bool_or_none 2017-09-10 19:09:27 +07:00
Sergey M․
c7e327c4d4 [utils] Introduce bool_or_none 2017-09-10 19:08:39 +07:00
Sergey M․
48b813748d [rutube] Rework and generalize playlist extractors (closes #13565) 2017-09-10 18:40:33 +07:00
luceatnobis
debed8d759 [rutube:playlist] Add extractor (closes #13534) 2017-09-10 18:40:33 +07:00
kayb94
51aee72d16 [README.md] Clarify how to run extractor specific test cases 2017-09-08 22:13:17 +07:00
Olivier Bilodeau
931edb2ada [radiocanada] Add fallback for title extraction 2017-09-08 21:53:24 +07:00
Sergey M․
5113b69124 [abcnews,chilloutsoze,cracked,vice,vk] Use dedicated YouTube embeds extraction routines 2017-09-06 00:50:25 +07:00
Sergey M․
66c9fa36c1 [youtube] Separate methods for embeds extraction 2017-09-06 00:48:37 +07:00
Sergey M․
c5c9bf0c12 [YoutubeDL] Ensure dir existence for each requested format (closes #14116) 2017-09-05 23:31:34 +07:00
Sergey M․
880fa66f4f [redtube] Fix formats extraction (closes #14122) 2017-09-05 22:45:49 +07:00
Sergey M․
6348671c4a [arte] Relax unavailability check (closes #14112) 2017-09-04 23:08:40 +07:00
Sergey M․
efc57145c1 [manyvids] Improve (closes #14059) 2017-09-03 17:32:23 +07:00
John D
e9b865267a [manyvids] Add support for preview videos (closes #14053) 2017-09-03 17:31:53 +07:00
Sergey M․
bc35f07537 [vidme:user] Make tests only matching (closes #14054) 2017-09-03 17:03:51 +07:00
theychx
0b4a8eb3ac [vidme:user] Relax _VALID_URLs 2017-09-03 17:03:45 +07:00
Sergey M․
c1c1585b31 [bpb] Improve (closes #14086) 2017-09-03 16:43:33 +07:00
Timendum
0cbb841ba9 [bpb] Fix extraction (closes #14043) 2017-09-03 16:39:12 +07:00
Sergey M
d7c7100e3d [soundcloud] Simplify and add test (closes #14093) 2017-09-03 16:29:58 +07:00
Tatsuyuki Ishi
73602bcd0c [soundcloud] Fix download URL with private tracks 2017-09-03 16:28:34 +07:00
Sergey M․
23b2df82c7 [aliexpress:live] Fix issues (closes #13698, closes #13707) 2017-09-03 16:05:31 +07:00
dubber0
503115540d [aliexpress:live] Add extractor 2017-09-03 16:05:00 +07:00
Sergey M․
64f0e30b93 [viidea] Capture and output lecture error message (#14099) 2017-09-02 15:44:49 +07:00
Sergey M․
a3431e1224 [radiocanada] Skip unsupported platforms (closes #14100) 2017-09-02 15:33:54 +07:00
Sergey M․
a2022b0c40 release 2017.09.02 2017-09-02 01:08:32 +07:00
Sergey M․
8681ed7fc8 [ChangeLog] Actualize 2017-09-02 01:04:22 +07:00
Sergey M․
8d81f3e36d [youtube] Force old layout for each webpage (closes #14083) 2017-09-02 00:58:19 +07:00
Sergey M․
7998520933 [youtube] Fix upload date extraction (closes #14065) 2017-08-31 00:47:58 +07:00
Sergey M․
5b4bfbfc3b [charlierose] Add support for episodes (closes #14062) 2017-08-30 23:50:33 +07:00
Sergey M․
53647dfd0a [bbccouk] Add support for w-prefixed ids (closes #14056) 2017-08-30 05:27:56 +07:00
Yen Chi Hsuan
22f65a9efc Merge pull request #14048 from ryandesign/patch-1
Fix build failures with old cp and zip
2017-08-28 11:22:27 +08:00
Ryan Schmidt
c75c384fb6 Fix build failures with old cp and zip 2017-08-27 18:07:09 -05:00
Sergey M․
1b41da488d [googledrive] Extend _VALID_URL (closes #9785) 2017-08-28 00:50:41 +07:00
Sergey M․
fea82c1780 [googledrive] Add support for source format (closes #14046) 2017-08-28 00:39:22 +07:00
Sergey M․
3902cdd0e3 [pornhd] Fix extraction (closes #14005) 2017-08-27 22:37:26 +07:00
Sergey M․
2cfa7cbdd0 release 2017.08.27.1 2017-08-27 06:09:29 +07:00
Sergey M․
cc0412ef91 [ChangeLog] Actualize 2017-08-27 06:06:49 +07:00
Sergey M․
1c9c8de29e [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (closes #14037) 2017-08-27 06:06:39 +07:00
Sergey M․
f031b76065 release 2017.08.27 2017-08-27 04:28:04 +07:00
Sergey M․
62c06c593d [ChangeLog] Actualize 2017-08-27 04:27:19 +07:00
Sergey M․
ff17be3ac9 [extractor/generic] Extract from LD-JSON last of all
Previous sources may contain several formats, e.g. http://tamasha.com/v/PgGZ
2017-08-27 03:31:40 +07:00
Sergey M․
1ed4549942 [extractor/common] Extract format id from label attribute of source tag for HTML5 videos (#14034) 2017-08-27 03:27:05 +07:00
Sergey M․
dd121cc1ca [extractor/common] Extract height from res attribute of source tag for HTML5 videos (closes #14034) 2017-08-27 03:12:56 +07:00
Sergey M․
a3c3a1e128 [http] Rework HTTP downloader
* Simplify code and split into separate routines to facilitate maintaining
* Make retry mechanism work on errors during actual download not only during connection establishment phase
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
* Retry on content too short and various timeout errors
* Show error description on retry
* Closes #506, closes #809, closes #2849, closes #4240, closes #6023, closes #8625, closes #9483
2017-08-27 02:22:30 +07:00
Sergey M․
085d9dd9be [rai] Fix audio formats extraction (closes #14024) 2017-08-26 22:02:49 +07:00
Vijay Singh
151978f38a [mixcloud] Fix extraction (closes #14020) 2017-08-26 19:32:57 +07:00
Sergey M․
c7121fa7b8 [youtube] Fix controversy videos extraction (closes #14027, closes #14029) 2017-08-26 15:38:38 +07:00
Vijay Singh
745968bc72 [mixcloud] Fix extraction (closes #14015) 2017-08-24 22:28:44 +07:00
Sergey M․
df235dbba8 release 2017.08.23 2017-08-23 23:23:13 +07:00
Sergey M․
c4bdc68113 [ChangeLog] Actualize 2017-08-23 23:21:19 +07:00
Sergey M․
5bae33485c [toutv] PEP 8 2017-08-23 22:50:00 +07:00
Sergey M․
0830f3e048 [cbc:watch] Bypass geo-restriction (closes #13993) 2017-08-23 22:45:45 +07:00
Sergey M․
8d7a24aff6 [toutv] Relax DRM check (closes #13994) 2017-08-23 22:28:09 +07:00
Sergey M․
37d9af306a [googledrive] Simplify and carry long lines (#13638) 2017-08-23 00:33:53 +07:00
Sergey M․
e01c3d2ef7 [extractor/common] Introduce _parse_xml 2017-08-23 00:32:41 +07:00
Parmjit Virk
05915e379a [googledrive] Add support for subtitles (fixes #13619) 2017-08-22 23:48:59 +07:00
Yen Chi Hsuan
7b67b60773 Merge pull request #13669 from bmwiedemann/master
[build] Override timestamps in zip file
2017-08-22 21:51:20 +08:00
Sergey M․
8d9c2a681a [pornhub] Relax uploader regex (closes #13906, closes #13975) 2017-08-21 23:06:27 +07:00
Alan Yee
903d4d1625 [README.md] Switch to HTTPS URLs 2017-08-20 23:35:39 +07:00
Luca Steeb
8239c6791a [bandcamp:album] Extract track titles 2017-08-20 23:32:33 +07:00
Sergey M․
b359e977b9 [extractor/common] Make HLS and DASH extraction non fatal in _parse_html5_media_entries (closes #13970) 2017-08-20 14:16:58 +07:00
Bernhard M. Wiedemann
305d99f0bd [build] Override timestamps in zip file
to make build reproducible.
See https://reproducible-builds.org/ for why this is good

Copying files to not interfere with freshness detection.
2017-08-19 21:43:48 +02:00
Sergey M․
d3d45e0a45 [bbccouk] Add support for events URLs (closes #13893) 2017-08-19 23:54:15 +07:00
Yen Chi Hsuan
381ad4f309 [liveleak] Support multi-video pages (closes #6542) 2017-08-19 22:48:00 +08:00
Yen Chi Hsuan
e2481b9b6e [ChangeLog] Fix 2017-08-19 22:28:58 +08:00
Yen Chi Hsuan
09747ba766 [liveleak] Support another liveleak embedding pattern (closes #13336) 2017-08-19 22:28:13 +08:00
Yen Chi Hsuan
f8f18f332f [cda] Fix extraction (closes #13935) 2017-08-19 21:44:47 +08:00
Yen Chi Hsuan
95f3f7c20a [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) 2017-08-19 21:40:53 +08:00
Sergey M․
f5469da9e6 [laola1tv] Add support for tv.ittf.com (closes #13965) 2017-08-19 19:48:20 +07:00
Sergey M․
d14d9d8903 [mixcloud] Fix extraction (closes #13958) 2017-08-18 23:31:42 +07:00
Tithen-Firion
feee8d32e4 [phantomjs] add exe version to debug info 2017-08-03 14:17:25 +02:00
Tithen-Firion
c89267d31a Merge branch 'master' into openload-phantomjs-method 2017-05-04 11:00:06 +02:00
Remita Amine
5ff1bc0cc1 [YoutubeDL] write raw subtitle files 2017-04-29 20:03:03 +01:00
Tithen-Firion
7552f96352 [openload] Add required version 2017-04-29 12:41:57 +02:00
Tithen-Firion
98f9d87381 [phantomjs] Add required version checking 2017-04-29 12:41:42 +02:00
Tithen-Firion
fcace2d1ad [openload] raise not found before executing js 2017-04-29 10:30:45 +02:00
Tithen-Firion
40e41780f1 [phantomjs] add cookie support 2017-04-25 15:12:54 +02:00
Tithen-Firion
da57ebaf84 [openload] separate PhantomJS code from extractor 2017-04-25 01:06:14 +02:00
Tithen-Firion
47e0cef46e [openload] rewrite extractor 2017-04-16 00:34:34 +02:00
148 changed files with 3901 additions and 1466 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.20**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.08.18
[debug] youtube-dl version 2017.10.20
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -9,6 +9,7 @@
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)

1
.gitignore vendored
View File

@@ -22,6 +22,7 @@ cover/
updates_key.pem
*.egg-info
*.srt
*.ttml
*.sbv
*.vtt
*.flv

View File

@@ -224,3 +224,10 @@ Giuseppe Fabiano
Örn Guðjónsson
Parmjit Virk
Genki Sky
Ľuboš Katrinec
Corey Nicholson
Ashutosh Chaudhary
John Dong
Tatsuyuki Ishi
Daniel Weber
Kay Bouché

View File

@@ -3,7 +3,7 @@
$ youtube-dl -v <your command line>
[debug] System config: []
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
@@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
### Are you using the latest version?
@@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py
nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need
* python
@@ -118,7 +120,7 @@ After you have ensured this site is distributing its content legally, you can fo
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
@@ -149,10 +151,10 @@ After you have ensured this site is distributing its content legally, you can fo
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py

267
ChangeLog
View File

@@ -1,3 +1,266 @@
version 2017.10.20
Core
* [downloader/fragment] Report warning instead of error on inconsistent
download state
* [downloader/hls] Fix total fragments count when ad fragments exist
Extractors
* [parliamentliveuk] Fix extraction (#14524)
* [soundcloud] Update client id (#14546)
+ [servus] Add support for servus.com (#14362)
+ [unity] Add support for unity3d.com (#14528)
* [youtube] Replace youtube redirect URLs in description (#14517)
* [pbs] Restrict direct video URL regular expression (#14519)
* [drtv] Respect preference for direct HTTP formats (#14509)
+ [eporner] Add support for embed URLs (#14507)
* [arte] Capture and output error message
* [niconico] Improve uploader metadata extraction robustness (#14135)
version 2017.10.15.1
Core
* [downloader/hls] Ignore anvato ad fragments (#14496)
* [downloader/fragment] Output ad fragment count
Extractors
* [scrippsnetworks:watch] Bypass geo restriction
+ [anvato] Add ability to bypass geo restriction
* [redditr] Fix extraction for URLs with query (#14495)
version 2017.10.15
Core
+ [common] Add support for jwplayer youtube embeds
Extractors
* [scrippsnetworks:watch] Fix extraction (#14389)
* [anvato] Process master m3u8 manifests
* [youtube] Fix relative URLs in description
* [spike] Bypass geo restriction
+ [howstuffworks] Add support for more domains
* [infoq] Fix http format downloading
+ [rtlnl] Add support for another type of embeds
+ [onionstudios] Add support for bulbs-video embeds
* [udn] Fix extraction
* [shahid] Fix extraction (#14448)
* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471)
* [vh1] Fix extraction (#9613)
version 2017.10.12
Core
* [YoutubeDL] Improve _default_format_spec (#14461)
Extractors
* [steam] Fix extraction (#14067)
+ [funk] Add support for funk.net (#14464)
+ [nexx] Add support for shortcuts and relax domain id extraction
+ [voxmedia] Add support for recode.net (#14173)
+ [once] Add support for vmap URLs
+ [generic] Add support for channel9 embeds (#14469)
* [tva] Fix extraction (#14328)
+ [tubitv] Add support for new URL format (#14460)
- [afreecatv:global] Remove extractor
- [youtube:shared] Removed extractor (#14420)
+ [slideslive] Add support for slideslive.com (#2680)
+ [facebook] Support thumbnails (#14416)
* [vvvvid] Fix episode number extraction (#14456)
* [hrti:playlist] Relax URL regular expression
* [wdr] Relax media link regular expression (#14447)
* [hrti] Relax URL regular expression (#14443)
* [fox] Delegate extraction to uplynk:preplay (#14147)
+ [youtube] Add support for hooktube.com (#14437)
version 2017.10.07
Core
* [YoutubeDL] Ignore duplicates in --playlist-items
* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and
reduce code duplication (#14425)
+ [utils] Use cache in OnDemandPagedList by default
* [postprocessor/ffmpeg] Convert to opus using libopus (#14381)
Extractors
* [reddit] Sort formats (#14430)
* [lnkgo] Relax URL regular expression (#14423)
* [pornflip] Extend URL regular expression (#14405, #14406)
+ [xtube] Add support for embed URLs (#14417)
+ [xvideos] Add support for embed URLs and improve extraction (#14409)
* [beeg] Fix extraction (#14403)
* [tvn24] Relax URL regular expression (#14395)
* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378,
#14392, #14414, #14419, #14431)
+ [ketnet] Add support for videos without direct sources (#14377)
* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een
+ [afreecatv] Add support for adult videos (#14376)
version 2017.10.01
Core
* [YoutubeDL] Document youtube_include_dash_manifest
Extractors
+ [tvp] Add support for new URL schema (#14368)
+ [generic] Add support for single format Video.js embeds (#14371)
* [yahoo] Bypass geo restriction for brightcove (#14210)
* [yahoo] Use extracted brightcove account id (#14210)
* [rtve:alacarta] Fix extraction (#14290)
+ [yahoo] Add support for custom brigthcove embeds (#14210)
+ [generic] Add support for Video.js embeds
+ [gfycat] Add support for /gifs/detail URLs (#14322)
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
* [xhamsterembed] Fix extraction (#14308)
version 2017.09.24
Core
+ [options] Accept lrc as a subtitle conversion target format (#14292)
* [utils] Fix handling raw TTML subtitles (#14191)
Extractors
* [24video] Fix timestamp extraction and make non fatal (#14295)
+ [24video] Add support for 24video.adult (#14295)
+ [kakao] Add support for tv.kakao.com (#12298, #14007)
+ [twitter] Add support for URLs without user id (#14270)
+ [americastestkitchen] Add support for americastestkitchen.com (#10764,
#13996)
* [generic] Fix support for multiple HTML5 videos on one page (#14080)
* [mixcloud] Fix extraction (#14088, #14132)
+ [lynda] Add support for educourse.ga (#14286)
* [beeg] Fix extraction (#14275)
* [nbcsports:vplayer] Correct theplatform URL (#13873)
* [twitter] Fix duration extraction (#14141)
* [tvplay] Bypass geo restriction
+ [heise] Add support for YouTube embeds (#14109)
+ [popcorntv] Add support for popcorntv.it (#5914, #14211)
* [viki] Update app data (#14181)
* [morningstar] Relax URL regular expression (#14222)
* [openload] Fix extraction (#14225, #14257)
* [noovo] Fix extraction (#14214)
* [dailymotion:playlist] Relax URL regular expression (#14219)
+ [twitch] Add support for go.twitch.tv URLs (#14215)
* [vgtv] Relax URL regular expression (#14223)
version 2017.09.15
Core
* [downloader/fragment] Restart inconsistent incomplete fragment downloads
(#13731)
* [YoutubeDL] Download raw subtitles files (#12909, #14191)
Extractors
* [condenast] Fix extraction (#14196, #14207)
+ [orf] Add support for f4m stories
* [tv4] Relax URL regular expression (#14206)
* [animeondemand] Bypass geo restriction
+ [animeondemand] Add support for flash videos (#9944)
version 2017.09.11
Extractors
* [rutube:playlist] Fix suitable (#14166)
version 2017.09.10
Core
+ [utils] Introduce bool_or_none
* [YoutubeDL] Ensure dir existence for each requested format (#14116)
Extractors
* [fox] Fix extraction (#14147)
* [rutube] Use bool_or_none
* [rutube] Rework and generalize playlist extractors (#13565)
+ [rutube:playlist] Add support for playlists (#13534, #13565)
+ [radiocanada] Add fallback for title extraction (#14145)
* [vk] Use dedicated YouTube embeds extraction routine
* [vice] Use dedicated YouTube embeds extraction routine
* [cracked] Use dedicated YouTube embeds extraction routine
* [chilloutzone] Use dedicated YouTube embeds extraction routine
* [abcnews] Use dedicated YouTube embeds extraction routine
* [youtube] Separate methods for embeds extraction
* [redtube] Fix formats extraction (#14122)
* [arte] Relax unavailability check (#14112)
+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
* [vidme:user] Relax URL regular expression (#14054)
* [bpb] Fix extraction (#14043, #14086)
* [soundcloud] Fix download URL with private tracks (#14093)
* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
* [viidea] Capture and output lecture error message (#14099)
* [radiocanada] Skip unsupported platforms (#14100)
version 2017.09.02
Extractors
* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
#14077, #14079, #14082, #14083, #14094, #14095, #14096)
* [youtube] Fix upload date extraction (#14065)
+ [charlierose] Add support for episodes (#14062)
+ [bbccouk] Add support for w-prefixed ids (#14056)
* [googledrive] Extend URL regular expression (#9785)
+ [googledrive] Add support for source format (#14046)
* [pornhd] Fix extraction (#14005)
version 2017.08.27.1
Extractors
* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
version 2017.08.27
Core
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
#8625, #9483)
* Simplify code and split into separate routines to facilitate maintaining
* Make retry mechanism work on errors during actual download not only
during connection establishment phase
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
* Retry on content too short
* Show error description on retry
Extractors
* [generic] Lower preference for extraction from LD-JSON
* [rai] Fix audio formats extraction (#14024)
* [youtube] Fix controversy videos extraction (#14027, #14029)
* [mixcloud] Fix extraction (#14015, #14020)
version 2017.08.23
Core
+ [extractor/common] Introduce _parse_xml
* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
non fatal (#13970)
* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
Extractors
* [cbc:watch] Bypass geo restriction (#13993)
* [toutv] Relax DRM check (#13994)
+ [googledrive] Add support for subtitles (#13619, #13638)
* [pornhub] Relax uploader regular expression (#13906, #13975)
* [bandcamp:album] Extract track titles (#13962)
+ [bbccouk] Add support for events URLs (#13893)
+ [liveleak] Support multi-video pages (#6542)
+ [liveleak] Support another liveleak embedding pattern (#13336)
* [cda] Fix extraction (#13935)
+ [laola1tv] Add support for tv.ittf.com (#13965)
* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
version 2017.08.18
Core
@@ -118,7 +381,7 @@ Extractors
* [youku:show] Fix playlist extraction (#13248)
+ [dispeak] Recognize sevt subdomain (#13276)
* [adn] Improve error reporting (#13663)
* [crunchyroll] Relax series and season regex (#13659)
* [crunchyroll] Relax series and season regular expression (#13659)
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
+ [nexx:embed] Add support for iframe embeds
* [nexx] Improve JS embed extraction
@@ -591,7 +854,7 @@ version 2017.04.14
Core
+ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955)
+ [adobepass] Improve Comcast and Verison login code (#10803)
+ [adobepass] Improve Comcast and Verizon login code (#10803)
+ [adobepass] Add support for Verizon (#10803)
Extractors

View File

@@ -46,8 +46,15 @@ tar: youtube-dl.tar.gz
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
mkdir -p zip
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\
done
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
mv zip/youtube_dl/__main__.py zip/
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
rm -rf zip
echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip

View File

@@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+rx /usr/local/bin/youtube-dl
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
You can also use pip:
@@ -33,7 +33,7 @@ You can also use pip:
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
brew install youtube-dl
@@ -427,7 +427,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
syntax. Example: --exec 'adb push {}
/sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt)
(currently supported: srt|ass|vtt|lrc)
# CONFIGURATION
@@ -458,7 +458,7 @@ You can also use `--config-location` if you want to use custom configuration fil
### Authentication with `.netrc` file
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
```
touch $HOME/.netrc
chmod a-rwx,u+rw $HOME/.netrc
@@ -485,7 +485,7 @@ The `-o` option allows users to indicate a template for the output file names.
**tl;dr:** [navigate me to examples](#output-template-examples).
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
- `id` (string): Video identifier
- `title` (string): Video title
@@ -603,7 +603,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
# Stream the video being downloaded to stdout
$ youtube-dl -o - BaW_jenozKc
@@ -716,17 +716,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
### How do I update youtube-dl?
If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
sudo apt-get remove -y youtube-dl
Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
```
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
@@ -766,11 +766,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
### I have downloaded a video but how can I play it?
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
@@ -845,10 +845,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o
### How do I download a video starting with a `-`?
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
youtube-dl -- -wNyEUrxzFU
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
### How do I pass cookies to youtube-dl?
@@ -862,9 +862,9 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula
### How do I stream directly to media player?
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
### How do I download only new videos from a playlist?
@@ -884,7 +884,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
@@ -910,7 +910,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
@@ -924,7 +924,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
# DEVELOPER INSTRUCTIONS
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -936,6 +936,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
python test/test_download.py
nosetests
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
If you want to create a build of youtube-dl yourself, you'll need
* python
@@ -972,7 +974,7 @@ After you have ensured this site is distributing its content legally, you can fo
class YourExtractorIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://yourextractor.com/watch/42',
'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
@@ -1003,10 +1005,10 @@ After you have ensured this site is distributing its content legally, you can fo
}
```
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
@@ -1162,10 +1164,10 @@ import youtube_dl
ydl_opts = {}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
@@ -1201,19 +1203,19 @@ ydl_opts = {
'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
# BUGS
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
$ youtube-dl -v <your command line>
[debug] System config: []
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2015.12.06
[debug] Git HEAD: 135392e
@@ -1244,7 +1246,7 @@ For bug reports, this means that your report should contain the *complete* outpu
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
### Are you using the latest version?

View File

@@ -14,7 +14,7 @@ import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import get_testcases
from test.helper import gettestcases
from youtube_dl.utils import compat_urllib_parse_urlparse
from youtube_dl.utils import compat_urllib_request
@@ -24,7 +24,7 @@ if len(sys.argv) > 1:
else:
METHOD = 'EURISTIC'
for test in get_testcases():
for test in gettestcases():
if METHOD == 'EURISTIC':
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()

View File

@@ -36,12 +36,13 @@
- **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
- **afreecatv**: afreecatv.com
- **afreecatv:global**: afreecatv.com
- **AirMozilla**
- **AliExpressLive**
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
- **AMCNetworks**
- **AmericasTestKitchen**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- **anitube.se**
@@ -128,7 +129,8 @@
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas**: canvas.be and een.be
- **Canvas**
- **CanvasEen**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
@@ -293,6 +295,7 @@
- **freespeech.org**
- **FreshLive**
- **Funimation**
- **Funk**
- **FunnyOrDie**
- **Fusion**
- **Fux**
@@ -363,6 +366,7 @@
- **IPrima**
- **iqiyi**: 爱奇艺
- **Ir90Tv**
- **ITTF**
- **ITV**
- **ivi**: ivi.ru
- **ivi:compilation**: ivi.ru compilations
@@ -376,6 +380,7 @@
- **Jove**
- **jpopsuki.tv**
- **JWPlatform**
- **Kakao**
- **Kaltura**
- **Kamcord**
- **KanalPlay**: Kanal 5/9/11 Play
@@ -419,6 +424,7 @@
- **limelight:channel_list**
- **LiTV**
- **LiveLeak**
- **LiveLeakEmbed**
- **livestream**
- **livestream:original**
- **LnkGo**
@@ -435,6 +441,7 @@
- **MakerTV**
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
@@ -589,6 +596,7 @@
- **Openload**
- **OraTV**
- **orf:fm4**: radio FM4
- **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
@@ -622,6 +630,7 @@
- **Pokemon**
- **PolskieRadio**
- **PolskieRadioCategory**
- **PopcornTV**
- **PornCom**
- **PornerBros**
- **PornFlip**
@@ -699,6 +708,7 @@
- **rutube:embed**: Rutube embedded videos
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **rutube:playlist**: Rutube playlists
- **RUTV**: RUTV.RU
- **Ruutu**
- **Ruv**
@@ -718,6 +728,7 @@
- **SenateISVP**
- **SendtoNews**
- **ServingSys**
- **Servus**
- **Sexu**
- **Shahid**
- **Shared**: shared.sx
@@ -728,6 +739,7 @@
- **skynewsarabia:video**
- **SkySports**
- **Slideshare**
- **SlidesLive**
- **Slutload**
- **smotri**: Smotri.com
- **smotri:broadcast**: Smotri.com broadcasts
@@ -876,6 +888,7 @@
- **UDNEmbed**: 聯合影音
- **UKTVPlay**
- **Unistra**
- **Unity**
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
@@ -959,6 +972,7 @@
- **VoiceRepublic**
- **Voot**
- **VoxMedia**
- **VoxMediaVolume**
- **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
@@ -1034,7 +1048,6 @@
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:shared**
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)

View File

@@ -466,12 +466,18 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({'is_live': True})
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({'simulate': True, 'is_live': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best')
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
class TestYoutubeDL(unittest.TestCase):
@@ -770,6 +776,12 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '10'})
self.assertEqual(result, [])
result = get_ids({'playlist_items': '3-10'})
self.assertEqual(result, [3, 4])
result = get_ids({'playlist_items': '2-4,3-4,3'})
self.assertEqual(result, [2, 3, 4])
def test_urlopen_no_file_protocol(self):
# see https://github.com/rg3/youtube-dl/issues/8227
ydl = YDL()

View File

@@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unescapeHTML('&#47;'), '/')
self.assertEqual(unescapeHTML('&eacute;'), 'é')
self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
# HTML5 entities
self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
@@ -1063,7 +1064,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
<p begin="3" dur="-1">Ignored, three</p>
</div>
</body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The following line contains Chinese characters and special symbols
@@ -1088,7 +1089,7 @@ Line
<p begin="0" end="1">The first line</p>
</div>
</body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
The first line
@@ -1114,7 +1115,7 @@ The first line
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
</div>
</body>
</tt>'''
</tt>'''.encode('utf-8')
srt_data = '''1
00:00:02,080 --> 00:00:05,839
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
@@ -1137,6 +1138,26 @@ part 3</font></u>
'''
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?>
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
<body>
<div xml:lang="en">
<p begin="0" end="1">Line 1</p>
<p begin="1" end="2">第二行</p>
</div>
</body>
</tt>'''.encode('utf-16')
srt_data = '''1
00:00:00,000 --> 00:00:01,000
Line 1
2
00:00:01,000 --> 00:00:02,000
第二行
'''
self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data)
def test_cli_option(self):
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])

View File

@@ -65,6 +65,7 @@ from .utils import (
locked_file,
make_HTTPS_handler,
MaxDownloadsReached,
orderedSet,
PagedList,
parse_filesize,
PerRequestProxyHandler,
@@ -92,6 +93,7 @@ from .utils import (
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
@@ -303,6 +305,12 @@ class YoutubeDL(object):
otherwise prefer avconv.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
The following options are used by the Youtube extractor:
youtube_include_dash_manifest: If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH.
"""
_NUMERIC_FIELDS = set((
@@ -901,15 +909,25 @@ class YoutubeDL(object):
yield int(item)
else:
yield int(string_segment)
playlistitems = iter_playlistitems(playlistitems_str)
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries']
def make_playlistitems_entries(list_ie_entries):
num_entries = len(list_ie_entries)
return [
list_ie_entries[i - 1] for i in playlistitems
if -num_entries <= i - 1 < num_entries]
def report_download(num_entries):
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, num_entries))
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
if playlistitems:
entries = [
ie_entries[i - 1] for i in playlistitems
if -n_all_entries <= i - 1 < n_all_entries]
entries = make_playlistitems_entries(ie_entries)
else:
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
@@ -927,20 +945,15 @@ class YoutubeDL(object):
entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
else: # iterable
if playlistitems:
entry_list = list(ie_entries)
entries = [entry_list[i - 1] for i in playlistitems]
entries = make_playlistitems_entries(list(ie_entries))
else:
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
'[%s] playlist %s: Downloading %d videos' %
(ie_result['extractor'], playlist, n_entries))
report_download(n_entries)
if self.params.get('playlistreverse', False):
entries = entries[::-1]
@@ -1065,22 +1078,27 @@ class YoutubeDL(object):
return _filter
def _default_format_spec(self, info_dict, download=True):
req_format_list = []
def can_have_partial_formats():
if self.params.get('simulate', False):
return True
if not download:
return True
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return False
if info_dict.get('is_live'):
return False
def can_merge():
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
if can_have_partial_formats():
req_format_list.append('bestvideo+bestaudio')
req_format_list.append('best')
def prefer_best():
if self.params.get('simulate', False):
return False
if not download:
return False
if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
return True
if info_dict.get('is_live'):
return True
if not can_merge():
return True
return False
req_format_list = ['bestvideo+bestaudio', 'best']
if prefer_best():
req_format_list.reverse()
return '/'.join(req_format_list)
def build_format_selector(self, format_spec):
@@ -1710,12 +1728,17 @@ class YoutubeDL(object):
if filename is None:
return
try:
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + error_to_compat_str(err))
def ensure_dir_exists(path):
try:
dn = os.path.dirname(path)
if dn and not os.path.exists(dn):
os.makedirs(dn)
return True
except (OSError, IOError) as err:
self.report_error('unable to create directory ' + error_to_compat_str(err))
return False
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
return
if self.params.get('writedescription', False):
@@ -1758,29 +1781,30 @@ class YoutubeDL(object):
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
if sub_info.get('data') is not None:
sub_data = sub_info['data']
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
try:
sub_data = ie._download_webpage(
sub_info['url'], info_dict['id'], note=False)
except ExtractorError as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err.cause)))
continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
# Use newline='' to prevent conversion of newline characters
# See https://github.com/rg3/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
try:
sub_data = ie._request_webpage(
sub_info['url'], info_dict['id'], note=False).read()
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
except (ExtractorError, IOError, OSError, ValueError) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
if self.params.get('writeinfojson', False):
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
@@ -1853,8 +1877,11 @@ class YoutubeDL(object):
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
fname = self.prepare_filename(new_info)
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
fname = prepend_extension(
self.prepare_filename(new_info),
'f%s' % f['format_id'], new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname)
partial_success = dl(fname, new_info)
success = success and partial_success
@@ -2208,6 +2235,7 @@ class YoutubeDL(object):
exe_versions = FFmpegPostProcessor.get_versions(self)
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
'%s %s' % (exe, v)
for exe, v in sorted(exe_versions.items())

View File

@@ -206,7 +206,7 @@ def _real_main(argv=None):
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
parser.error('invalid video recode format specified')
if opts.convertsubtitles is not None:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
parser.error('invalid subtitle format specified')
if opts.date is not None:

View File

@@ -6,6 +6,7 @@ import collections
import email
import getpass
import io
import itertools
import optparse
import os
import re
@@ -15,7 +16,6 @@ import socket
import struct
import subprocess
import sys
import itertools
import xml.etree.ElementTree
@@ -2898,6 +2898,13 @@ else:
compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack
try:
from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x
try:
from itertools import izip as compat_zip # < 2.5 or 3.x
except ImportError:
compat_zip = zip
__all__ = [
'compat_HTMLParseError',
@@ -2948,5 +2955,6 @@ __all__ = [
'compat_urlretrieve',
'compat_xml_parse_error',
'compat_xpath',
'compat_zip',
'workaround_optparse_bug9161',
]

View File

@@ -304,11 +304,11 @@ class FileDownloader(object):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
def report_retry(self, count, retries):
def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
% (count, self.format_retries(retries)))
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
% (error_to_compat_str(err), count, self.format_retries(retries)))
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""

View File

@@ -117,9 +117,15 @@ class FragmentFD(FileDownloader):
def _prepare_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(
'[%s] Total fragments: %s'
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
self.report_destination(ctx['filename'])
dl = HttpQuietDownloader(
self.ydl,
@@ -151,10 +157,15 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx):
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
self._read_ytdl_file(ctx)
if ctx['fragment_index'] > 0 and resume_len == 0:
self.report_warning(
'Inconsistent state of incomplete fragment download. '
'Restarting from the beginning...')
ctx['fragment_index'] = resume_len = 0
self._write_ytdl_file(ctx)
else:
self._write_ytdl_file(ctx)
if ctx['fragment_index'] > 0:
assert resume_len > 0
assert ctx['fragment_index'] == 0
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)

View File

@@ -75,15 +75,30 @@ class HlsFD(FragmentFD):
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
total_frags = 0
def anvato_ad(s):
return s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
media_frags = 0
ad_frags = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line and not line.startswith('#'):
total_frags += 1
if not line:
continue
if line.startswith('#'):
if anvato_ad(line):
ad_frags += 1
ad_frag_next = True
continue
if ad_frag_next:
ad_frag_next = False
continue
media_frags += 1
ctx = {
'filename': filename,
'total_frags': total_frags,
'total_frags': media_frags,
'ad_frags': ad_frags,
}
self._prepare_and_start_frag_download(ctx)
@@ -101,10 +116,14 @@ class HlsFD(FragmentFD):
decrypt_info = {'METHOD': 'NONE'}
byte_range = {}
frag_index = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line:
if not line.startswith('#'):
if ad_frag_next:
ad_frag_next = False
continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
@@ -175,6 +194,8 @@ class HlsFD(FragmentFD):
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
elif anvato_ad(line):
ad_frag_next = True
self._finish_frag_download(ctx)

View File

@@ -22,8 +22,16 @@ from ..utils import (
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
tmpfilename = self.temp_name(filename)
stream = None
class DownloadContext(dict):
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
ctx = DownloadContext()
ctx.filename = filename
ctx.tmpfilename = self.temp_name(filename)
ctx.stream = None
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
@@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
if is_test:
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
# Establish possible resume length
if os.path.isfile(encodeFilename(tmpfilename)):
resume_len = os.path.getsize(encodeFilename(tmpfilename))
else:
resume_len = 0
ctx.open_mode = 'wb'
ctx.resume_len = 0
open_mode = 'wb'
if resume_len != 0:
if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
else:
resume_len = 0
if self.params.get('continuedl', True):
# Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
count = 0
retries = self.params.get('retries', 0)
while count <= retries:
class SucceedDownload(Exception):
pass
class RetryDownload(Exception):
def __init__(self, source_error):
self.source_error = source_error
def establish_connection():
if ctx.resume_len != 0:
self.report_resuming_byte(ctx.resume_len)
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
ctx.open_mode = 'ab'
# Establish connection
try:
data = self.ydl.urlopen(request)
ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
if resume_len > 0:
content_range = data.headers.get('Content-Range')
if ctx.resume_len > 0:
content_range = ctx.data.headers.get('Content-Range')
if content_range:
content_range_m = re.search(r'bytes (\d+)-', content_range)
# Content-Range is present and matches requested Range, resume is possible
if content_range_m and resume_len == int(content_range_m.group(1)):
break
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
resume_len = 0
open_mode = 'wb'
break
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
# Unexpected HTTP error
@@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
# Unable to resume (requested range not satisfiable)
try:
# Open the connection again without the range header
data = self.ydl.urlopen(basic_request)
content_length = data.info()['Content-Length']
ctx.data = self.ydl.urlopen(basic_request)
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
raise
else:
# Examine the reported length
if (content_length is not None and
(resume_len - 100 < int(content_length) < resume_len + 100)):
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
@@ -102,152 +115,184 @@ class HttpFD(FileDownloader):
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
self.report_file_already_downloaded(filename)
self.try_rename(tmpfilename, filename)
self.report_file_already_downloaded(ctx.filename)
self.try_rename(ctx.tmpfilename, ctx.filename)
self._hook_progress({
'filename': filename,
'filename': ctx.filename,
'status': 'finished',
'downloaded_bytes': resume_len,
'total_bytes': resume_len,
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
})
return True
raise SucceedDownload()
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
resume_len = 0
open_mode = 'wb'
break
except socket.error as e:
if e.errno != errno.ECONNRESET:
ctx.resume_len = 0
ctx.open_mode = 'wb'
return
raise RetryDownload(err)
except socket.error as err:
if err.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry
raise
raise RetryDownload(err)
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
def download():
data_len = ctx.data.info().get('Content-length', None)
if count > retries:
self.report_error('giving up after %s retries' % retries)
return False
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
data_len = self._TEST_FILE_SIZE
data_len = data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# However, for a test we still would like to download just a piece of a file.
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
# block size when downloading a file.
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
data_len = self._TEST_FILE_SIZE
if data_len is not None:
data_len = int(data_len) + resume_len
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
before = start # start measuring
while True:
# Download and write
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
byte_counter += len(data_block)
# exit loop when download is finished
if len(data_block) == 0:
break
# Open destination file just in time
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
assert stream is not None
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError) as err:
self.report_error('unable to open for writing: %s' % str(err))
if data_len is not None:
data_len = int(data_len) + ctx.resume_len
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
byte_counter = 0 + ctx.resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
before = start # start measuring
def retry(e):
if ctx.tmpfilename != '-':
ctx.stream.close()
ctx.stream = None
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
raise RetryDownload(e)
while True:
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket.timeout as e:
retry(e)
except socket.error as e:
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
raise
retry(e)
byte_counter += len(data_block)
# exit loop when download is finished
if len(data_block) == 0:
break
# Open destination file just in time
if ctx.stream is None:
try:
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
ctx.stream, ctx.tmpfilename = sanitize_open(
ctx.tmpfilename, ctx.open_mode)
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
except (OSError, IOError) as err:
self.report_error('unable to open for writing: %s' % str(err))
return False
try:
stream.write(data_block)
except (IOError, OSError) as err:
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
ctx.stream.write(data_block)
except (IOError, OSError) as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
return False
# Apply rate limit
self.slow_down(start, now, byte_counter - ctx.resume_len)
# end measuring of one loop run
now = time.time()
after = now
# Adjust block size
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
before = after
# Progress message
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
if data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': ctx.tmpfilename,
'filename': ctx.filename,
'eta': eta,
'speed': speed,
'elapsed': now - start,
})
if is_test and byte_counter == data_len:
break
if ctx.stream is None:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
self.report_error('Did not get any data blocks')
return False
if ctx.tmpfilename != '-':
ctx.stream.close()
# Apply rate limit
self.slow_down(start, now, byte_counter - resume_len)
if data_len is not None and byte_counter != data_len:
err = ContentTooShortError(byte_counter, int(data_len))
if count <= retries:
retry(err)
raise err
# end measuring of one loop run
now = time.time()
after = now
self.try_rename(ctx.tmpfilename, ctx.filename)
# Adjust block size
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block))
before = after
# Progress message
speed = self.calc_speed(start, now, byte_counter - resume_len)
if data_len is None:
eta = None
else:
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'eta': eta,
'speed': speed,
'elapsed': now - start,
'total_bytes': byte_counter,
'filename': ctx.filename,
'status': 'finished',
'elapsed': time.time() - start,
})
if is_test and byte_counter == data_len:
break
return True
if stream is None:
self.to_stderr('\n')
self.report_error('Did not get any data blocks')
return False
if tmpfilename != '-':
stream.close()
while count <= retries:
try:
establish_connection()
download()
return True
except RetryDownload as e:
count += 1
if count <= retries:
self.report_retry(e.source_error, count, retries)
continue
except SucceedDownload:
return True
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
# Update file modification time
if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
self._hook_progress({
'downloaded_bytes': byte_counter,
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
'elapsed': time.time() - start,
})
return True
self.report_error('giving up after %s retries' % retries)
return False

View File

@@ -7,6 +7,7 @@ import time
from .amp import AMPIE
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_urlparse
@@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
full_video_url = compat_urlparse.urljoin(url, video_url)
youtube_url = self._html_search_regex(
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
webpage, 'YouTube URL', default=None)
youtube_url = YoutubeIE._extract_url(webpage)
timestamp = None
date_str = self._html_search_regex(
@@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
}
if youtube_url:
entries = [entry, self.url_result(youtube_url, 'Youtube')]
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
return self.playlist_result(entries)
return entry

View File

@@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE):
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]

View File

@@ -138,6 +138,23 @@ class AfreecaTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# adult video
'url': 'http://vod.afreecatv.com/PLAYER/STATION/26542731',
'info_dict': {
'id': '20171001_F1AE1711_196617479_1',
'ext': 'mp4',
'title': '[생]서아 초심 찾기 방송 (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'BJ서아',
'uploader_id': 'bjdyrksu',
'upload_date': '20171001',
'duration': 3600,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
@@ -160,7 +177,15 @@ class AfreecaTVIE(InfoExtractor):
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, query={'nTitleNo': video_id})
video_id, query={
'nTitleNo': video_id,
'partialView': 'SKIP_ADULT',
})
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
if flag and flag != 'SUCCEED':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, flag), expected=True)
video_element = video_xml.findall(compat_xpath('./track/video'))[1]
if video_element is None or video_element.text is None:
@@ -246,107 +271,3 @@ class AfreecaTVIE(InfoExtractor):
})
return info
class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global'
_VALID_URL = r'https?://(?:www\.)?afreeca\.tv/(?P<channel_id>\d+)(?:/v/(?P<video_id>\d+))?'
_TESTS = [{
'url': 'http://afreeca.tv/36853014/v/58301',
'info_dict': {
'id': '58301',
'title': 'tryhard top100',
'uploader_id': '36853014',
'uploader': 'makgi Hearthstone Live!',
},
'playlist_count': 3,
}]
def _real_extract(self, url):
channel_id, video_id = re.match(self._VALID_URL, url).groups()
video_type = 'video' if video_id else 'live'
query = {
'pt': 'view',
'bid': channel_id,
}
if video_id:
query['vno'] = video_id
video_data = self._download_json(
'http://api.afreeca.tv/%s/view_%s.php' % (video_type, video_type),
video_id or channel_id, query=query)['channel']
if video_data.get('result') != 1:
raise ExtractorError('%s said: %s' % (self.IE_NAME, video_data['remsg']))
title = video_data['title']
info = {
'thumbnail': video_data.get('thumb'),
'view_count': int_or_none(video_data.get('vcnt')),
'age_limit': int_or_none(video_data.get('grade')),
'uploader_id': channel_id,
'uploader': video_data.get('cname'),
}
if video_id:
entries = []
for i, f in enumerate(video_data.get('flist', [])):
video_key = self.parse_video_key(f.get('key', ''))
f_url = f.get('file')
if not video_key or not f_url:
continue
entries.append({
'id': '%s_%s' % (video_id, video_key.get('part', i + 1)),
'title': title,
'upload_date': video_key.get('upload_date'),
'duration': int_or_none(f.get('length')),
'url': f_url,
'protocol': 'm3u8_native',
'ext': 'mp4',
})
info.update({
'id': video_id,
'title': title,
'duration': int_or_none(video_data.get('length')),
})
if len(entries) > 1:
info['_type'] = 'multi_video'
info['entries'] = entries
elif len(entries) == 1:
i = entries[0].copy()
i.update(info)
info = i
else:
formats = []
for s in video_data.get('strm', []):
s_url = s.get('purl')
if not s_url:
continue
stype = s.get('stype')
if stype == 'HLS':
formats.extend(self._extract_m3u8_formats(
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
elif stype == 'RTMP':
format_id = [stype]
label = s.get('label')
if label:
format_id.append(label)
formats.append({
'format_id': '-'.join(format_id),
'url': s_url,
'tbr': int_or_none(s.get('bps')),
'height': int_or_none(s.get('brt')),
'ext': 'flv',
'rtmp_live': True,
})
self._sort_formats(formats)
info.update({
'id': channel_id,
'title': self._live_title(title),
'is_live': True,
'formats': formats,
})
return info

View File

@@ -0,0 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
try_get,
)
class AliExpressLiveIE(InfoExtractor):
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
_TEST = {
'url': 'https://live.aliexpress.com/live/2800002704436634',
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
'info_dict': {
'id': '2800002704436634',
'ext': 'mp4',
'title': 'CASIMA7.22',
'thumbnail': r're:http://.*\.jpg',
'uploader': 'CASIMA Official Store',
'timestamp': 1500717600,
'upload_date': '20170722',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._parse_json(
self._search_regex(
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
webpage, 'runParams'),
video_id)
title = data['title']
formats = self._extract_m3u8_formats(
data['replyStreamUrl'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
return {
'id': video_id,
'title': title,
'thumbnail': data.get('coverUrl'),
'uploader': try_get(
data, lambda x: x['followBar']['name'], compat_str),
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats,
}

View File

@@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
try_get,
unified_strdate,
)
class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': {
'id': '1_5g5zua6e',
'title': 'Summer Dinner Party',
'ext': 'mp4',
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1497285541,
'upload_date': '20170612',
'uploader_id': 'roger.metcalf@americastestkitchen.com',
'release_date': '20170617',
'series': "America's Test Kitchen",
'season_number': 17,
'episode': 'Summer Dinner Party',
'episode_number': 24,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id')
video_data = self._parse_json(
self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
webpage, 'initial context'),
video_id)
ep_data = try_get(
video_data,
(lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
external_id = ep_data.get('external_id') or ep_meta['external_id']
title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
'description') or ep_meta.get('description'))
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
release_date = unified_strdate(ep_data.get('aired_at'))
season_number = int_or_none(ep_meta.get('season_number'))
episode = ep_meta.get('title')
episode_number = int_or_none(ep_meta.get('episode_number'))
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, external_id),
'ie_key': 'Kaltura',
'title': title,
'description': description,
'thumbnail': thumbnail,
'release_date': release_date,
'series': "America's Test Kitchen",
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
}

View File

@@ -3,16 +3,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_str,
)
from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
sanitized_Request,
urlencode_postdata,
urljoin,
)
@@ -21,6 +18,8 @@ class AnimeOnDemandIE(InfoExtractor):
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand'
# German-speaking countries of Europe
_GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU']
_TESTS = [{
# jap, OmU
'url': 'https://www.anime-on-demand.de/anime/161',
@@ -46,6 +45,10 @@ class AnimeOnDemandIE(InfoExtractor):
# Full length film, non-series, ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/185',
'only_matching': True,
}, {
# Flash videos
'url': 'https://www.anime-on-demand.de/anime/12',
'only_matching': True,
}]
def _login(self):
@@ -72,14 +75,13 @@ class AnimeOnDemandIE(InfoExtractor):
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
request = sanitized_Request(
post_url, urlencode_postdata(login_form))
request.add_header('Referer', self._LOGIN_URL)
post_url = urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
post_url, None, 'Logging in as %s' % username,
data=urlencode_postdata(login_form), headers={
'Referer': self._LOGIN_URL,
})
if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')):
error = self._search_regex(
@@ -120,10 +122,11 @@ class AnimeOnDemandIE(InfoExtractor):
formats = []
for input_ in re.findall(
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html):
attributes = extract_attributes(input_)
title = attributes.get('data-dialog-header')
playlist_urls = []
for playlist_key in ('data-playlist', 'data-otherplaylist'):
for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'):
playlist_url = attributes.get(playlist_key)
if isinstance(playlist_url, compat_str) and re.match(
r'/?[\da-zA-Z]+', playlist_url):
@@ -147,19 +150,38 @@ class AnimeOnDemandIE(InfoExtractor):
format_id_list.append(compat_str(num))
format_id = '-'.join(format_id_list)
format_note = ', '.join(filter(None, (kind, lang_note)))
request = sanitized_Request(
compat_urlparse.urljoin(url, playlist_url),
item_id_list = []
if format_id:
item_id_list.append(format_id)
item_id_list.append('videomaterial')
playlist = self._download_json(
urljoin(url, playlist_url), video_id,
'Downloading %s JSON' % ' '.join(item_id_list),
headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token,
'Referer': url,
'Accept': 'application/json, text/javascript, */*; q=0.01',
})
playlist = self._download_json(
request, video_id, 'Downloading %s playlist JSON' % format_id,
fatal=False)
}, fatal=False)
if not playlist:
continue
stream_url = playlist.get('streamurl')
if stream_url:
rtmp = re.search(
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
stream_url)
if rtmp:
formats.append({
'url': rtmp.group('url'),
'app': rtmp.group('app'),
'play_path': rtmp.group('playpath'),
'page_url': url,
'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf',
'rtmp_real_time': True,
'format_id': 'rtmp',
'ext': 'flv',
})
continue
start_video = playlist.get('startvideo', 0)
playlist = playlist.get('playlist')
if not playlist or not isinstance(playlist, list):
@@ -222,7 +244,7 @@ class AnimeOnDemandIE(InfoExtractor):
f.update({
'id': '%s-%s' % (f['id'], m.group('kind').lower()),
'title': m.group('title'),
'url': compat_urlparse.urljoin(url, m.group('href')),
'url': urljoin(url, m.group('href')),
})
entries.append(f)

View File

@@ -18,6 +18,7 @@ from ..utils import (
int_or_none,
strip_jsonp,
unescapeHTML,
unsmuggle_url,
)
@@ -197,12 +198,16 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None,
}
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
if tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
if media_format == 'm3u8' and tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
elif media_format == 'm3u8-variant' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
else:
@@ -271,6 +276,9 @@ class AnvatoIE(InfoExtractor):
anvplayer_data['accessKey'], anvplayer_data['video'])
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:

View File

@@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor):
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'url': re.sub(r'_(\d+p.mov)', r'_h\1', src),
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
'language': version[:2],
@@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
formats = []
for format in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
formats.append({
'url': format_url,
'format': format['type'],

View File

@@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title')
description = self._html_search_meta(

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import (
@@ -15,6 +16,7 @@ from ..utils import (
int_or_none,
NO_DEFAULT,
qualities,
try_get,
unified_strdate,
)
@@ -80,12 +82,15 @@ class ArteTVBaseIE(InfoExtractor):
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
vsr = player_info['VSR']
if not vsr and not player_info.get('VRU'):
raise ExtractorError(
'Video %s is not available' % player_info.get('VID') or video_id,
expected=True)
vsr = try_get(player_info, lambda x: x['VSR'], dict)
if not vsr:
error = None
if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
error = try_get(
player_info, lambda x: x['custom_msg']['msg'], compat_str)
if not error:
error = 'Video %s is not available' % player_info.get('VID') or video_id
raise ExtractorError(error, expected=True)
upload_date_str = player_info.get('shootingDate')
if not upload_date_str:

View File

@@ -242,7 +242,12 @@ class BandcampAlbumIE(InfoExtractor):
raise ExtractorError('The page doesn\'t contain any tracks')
# Only tracks with duration info have songs
entries = [
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
self.url_result(
compat_urlparse.urljoin(url, t_path),
ie=BandcampIE.ie_key(),
video_title=self._search_regex(
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
elem_content, 'track title', fatal=False))
for elem_content, t_path in track_elements
if self._html_search_meta('duration', elem_content, default=None)]

View File

@@ -29,7 +29,7 @@ from ..compat import (
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pb][\da-z]{7}'
_ID_REGEX = r'[pbw][\da-z]{7}'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
@@ -37,7 +37,8 @@ class BBCCoUkIE(InfoExtractor):
programmes/(?!articles/)|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/
radio/player/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
@@ -232,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
}, {
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
'only_matching': True,
}, {
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
'only_matching': True,
}]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
@@ -382,7 +386,7 @@ class BBCCoUkIE(InfoExtractor):
m3u8_id=format_id, fatal=False))
if re.search(self._USP_RE, href):
usp_formats = self._extract_m3u8_formats(
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href),
programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for f in usp_formats:

View File

@@ -9,6 +9,7 @@ from ..compat import (
from ..utils import (
int_or_none,
parse_iso8601,
urljoin,
)
@@ -36,9 +37,11 @@ class BeegIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
cpl_url = self._search_regex(
r'<script[^>]+src=(["\'])(?P<url>(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1',
r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
webpage, 'cpl', default=None, group='url')
cpl_url = urljoin(url, cpl_url)
beeg_version, beeg_salt = [None] * 2
if cpl_url:
@@ -54,12 +57,16 @@ class BeegIE(InfoExtractor):
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
default=None, group='beeg_salt')
beeg_version = beeg_version or '2000'
beeg_version = beeg_version or '2185'
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
video = self._download_json(
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
video_id)
for api_path in ('', 'api.'):
video = self._download_json(
'https://%sbeeg.com/api/v6/%s/video/%s'
% (api_path, beeg_version, video_id), video_id,
fatal=api_path == 'api.')
if video:
break
def split(o, e):
def cut(s, x):

View File

@@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
title = self._html_search_regex(
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
video_info_dicts = re.findall(
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
formats = []
for video_info in video_info_dicts:
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
quality = video_info['quality']
video_url = video_info['src']
video_info = self._parse_json(
video_info, video_id, transform_source=js_to_json, fatal=False)
if not video_info:
continue
video_url = video_info.get('src')
if not video_url:
continue
quality = 'high' if '_high' in video_url else 'low'
formats.append({
'url': video_url,
'preference': 10 if quality == 'high' else 0,

View File

@@ -3,24 +3,104 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import float_or_none
from ..utils import (
float_or_none,
strip_or_none,
)
class CanvasIE(InfoExtractor):
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'flv',
'title': 'Nachtwacht: De Greystook',
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.03,
},
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, {
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site_id'), mobj.group('id')
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), video_id)
title = data['title']
description = data.get('description')
formats = []
for target in data['targetUrls']:
format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type:
continue
if format_type == 'HLS':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_type, fatal=False))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id=format_type, fatal=False))
elif format_type == 'HSS':
formats.extend(self._extract_ism_formats(
format_url, video_id, ism_id='mss', fatal=False))
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
return {
'id': video_id,
'display_id': video_id,
'title': title,
'description': description,
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}
class CanvasEenIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
'md5': 'ea838375a547ac787d4064d8c7860a6c',
'md5': 'ed66976748d12350b118455979cca293',
'info_dict': {
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
'ext': 'mp4',
'ext': 'flv',
'title': 'De afspraak veilt voor de Warmste Week',
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 49.02,
}
},
'expected_warnings': ['is not a supported codec'],
}, {
# with subtitles
'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
@@ -40,7 +120,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'Pagina niet gevonden',
}, {
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
'info_dict': {
@@ -54,7 +135,8 @@ class CanvasIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'Episode no longer available',
}, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True,
@@ -66,55 +148,21 @@ class CanvasIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
title = (self._search_regex(
title = strip_or_none(self._search_regex(
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
webpage, 'title', default=None) or self._og_search_title(
webpage)).strip()
webpage, default=None))
video_id = self._html_search_regex(
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
% (site_id, video_id), display_id)
formats = []
for target in data['targetUrls']:
format_url, format_type = target.get('url'), target.get('type')
if not format_url or not format_type:
continue
if format_type == 'HLS':
formats.extend(self._extract_m3u8_formats(
format_url, display_id, entry_protocol='m3u8_native',
ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, display_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id=format_type, fatal=False))
else:
formats.append({
'format_id': format_type,
'url': format_url,
})
self._sort_formats(formats)
subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls:
subtitle_url = subtitle.get('url')
if subtitle_url and subtitle.get('type') == 'CLOSED':
subtitles.setdefault('nl', []).append({'url': subtitle_url})
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
group='id')
return {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
'ie_key': CanvasIE.ie_key(),
'id': video_id,
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),
'formats': formats,
'duration': float_or_none(data.get('duration'), 1000),
'thumbnail': data.get('posterImageUrl'),
'subtitles': subtitles,
}

View File

@@ -200,6 +200,7 @@ class CBCWatchBaseIE(InfoExtractor):
'media': 'http://search.yahoo.com/mrss/',
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
}
_GEO_COUNTRIES = ['CA']
def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path
@@ -287,6 +288,11 @@ class CBCWatchBaseIE(InfoExtractor):
class CBCWatchVideoIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch:video'
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TEST = {
# geo-restricted to Canada, bypassable
'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
'only_matching': True,
}
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -323,9 +329,10 @@ class CBCWatchIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch'
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
_TESTS = [{
# geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
'info_dict': {
'id': '38e815a-009e3ab12e4',
'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
'ext': 'mp4',
'title': 'Customer (Dis)Service',
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
@@ -337,8 +344,8 @@ class CBCWatchIE(CBCWatchBaseIE):
'skip_download': True,
'format': 'bestvideo',
},
'skip': 'Geo-restricted to Canada',
}, {
# geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
'info_dict': {
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
@@ -346,7 +353,6 @@ class CBCWatchIE(CBCWatchBaseIE):
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
},
'playlist_mincount': 30,
'skip': 'Geo-restricted to Canada',
}]
def _real_extract(self, url):

View File

@@ -124,7 +124,7 @@ class CDAIE(InfoExtractor):
}
def extract_format(page, version):
json_str = self._search_regex(
json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
'%s player_json' % version, fatal=False, group='player_data')
if not json_str:

View File

@@ -81,6 +81,12 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
webpage)
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id

View File

@@ -5,7 +5,7 @@ from ..utils import remove_end
class CharlieRoseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://charlierose.com/videos/27996',
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
@@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
}, {
'url': 'https://charlierose.com/videos/27996',
'only_matching': True,
}, {
'url': 'https://charlierose.com/episodes/30887?autoplay=true',
'only_matching': True,
}]
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'

View File

@@ -5,6 +5,7 @@ import base64
import json
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
clean_html,
ExtractorError
@@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor):
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
if native_platform is None:
youtube_url = self._html_search_regex(
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
webpage, 'fallback video URL', default=None)
if youtube_url is not None:
return self.url_result(youtube_url, ie='Youtube')
youtube_url = YoutubeIE._extract_url(webpage)
if youtube_url:
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
# the own CDN

View File

@@ -120,13 +120,16 @@ class ComedyCentralTVIE(MTVServicesInfoExtractor):
class ComedyCentralShortnameIE(InfoExtractor):
_VALID_URL = r'^:(?P<id>tds|thedailyshow)$'
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
_TESTS = [{
'url': ':tds',
'only_matching': True,
}, {
'url': ':thedailyshow',
'only_matching': True,
}, {
'url': ':theopposition',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -134,5 +137,6 @@ class ComedyCentralShortnameIE(InfoExtractor):
shortcut_map = {
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
}
return self.url_result(shortcut_map[video_id])

View File

@@ -27,6 +27,7 @@ from ..compat import (
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
)
from ..downloader.f4m import remove_encrypted_media
from ..utils import (
@@ -646,15 +647,29 @@ class InfoExtractor(object):
def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)
if xml_string is False:
return xml_string
return self._parse_xml(
xml_string, video_id, transform_source=transform_source,
fatal=fatal)
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source:
xml_string = transform_source(xml_string)
return compat_etree_fromstring(xml_string.encode('utf-8'))
try:
return compat_etree_fromstring(xml_string.encode('utf-8'))
except compat_xml_parse_error as ve:
errmsg = '%s: Failed to parse XML ' % video_id
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
self.report_warning(errmsg + str(ve))
def _download_json(self, url_or_request, video_id,
note='Downloading JSON metadata',
@@ -1905,7 +1920,7 @@ class InfoExtractor(object):
# can't be used at the same time
if '%(Number' in media_template and 's' not in representation_ms_info:
segment_duration = None
if 'total_number' not in representation_ms_info and 'segment_duration':
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
@@ -2123,11 +2138,11 @@ class InfoExtractor(object):
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
preference=preference)
preference=preference, fatal=False)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
full_url, video_id, mpd_id=mpd_id)
full_url, video_id, mpd_id=mpd_id, fatal=False)
else:
is_plain_url = True
formats = [{
@@ -2169,6 +2184,12 @@ class InfoExtractor(object):
f = parse_content_type(source_attributes.get('type'))
is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url:
# res attribute is not standard but seen several times
# in the wild
f.update({
'height': int_or_none(source_attributes.get('res')),
'format_id': source_attributes.get('label'),
})
f.update(formats[0])
media_info['formats'].append(f)
else:
@@ -2301,7 +2322,6 @@ class InfoExtractor(object):
formats = self._parse_jwplayer_formats(
video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
self._sort_formats(formats)
subtitles = {}
tracks = video_data.get('tracks')
@@ -2318,16 +2338,25 @@ class InfoExtractor(object):
'url': self._proto_relative_url(track_url)
})
entries.append({
entry = {
'id': this_video_id,
'title': video_data['title'] if require_title else video_data.get('title'),
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'formats': formats,
})
}
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
entry.update({
'_type': 'url_transparent',
'url': formats[0]['url'],
})
else:
self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
if len(entries) == 1:
return entries[0]
else:
@@ -2428,10 +2457,12 @@ class InfoExtractor(object):
self._downloader.report_warning(msg)
return res
def _set_cookie(self, domain, name, value, expire_time=None):
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs):
cookie = compat_cookiejar.Cookie(
0, name, value, None, None, domain, None,
None, '/', True, False, expire_time, '', None, None, None)
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
self._downloader.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):

View File

@@ -116,16 +116,16 @@ class CondeNastIE(InfoExtractor):
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title)
def _extract_video_params(self, webpage):
query = {}
params = self._search_regex(
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
if params:
query.update({
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
})
def _extract_video_params(self, webpage, display_id):
query = self._parse_json(
self._search_regex(
r'(?s)var\s+params\s*=\s*({.+?})[;,]', webpage, 'player params',
default='{}'),
display_id, transform_source=js_to_json, fatal=False)
if query:
query['videoId'] = self._search_regex(
r'(?:data-video-id=|currentVideoId\s*=\s*)["\']([\da-f]+)',
webpage, 'video id', default=None)
else:
params = extract_attributes(self._search_regex(
r'(<[^>]+data-js="video-player"[^>]+>)',
@@ -141,17 +141,27 @@ class CondeNastIE(InfoExtractor):
video_id = params['videoId']
video_info = None
if params.get('playerId'):
info_page = self._download_json(
'http://player.cnevids.com/player/video.js',
video_id, 'Downloading video info', fatal=False, query=params)
if info_page:
video_info = info_page.get('video')
if not video_info:
info_page = self._download_webpage(
'http://player.cnevids.com/player/loader.js',
video_id, 'Downloading loader info', query=params)
else:
# New API path
query = params.copy()
query['embedType'] = 'inline'
info_page = self._download_json(
'http://player.cnevids.com/embed-api.json', video_id,
'Downloading embed info', fatal=False, query=query)
# Old fallbacks
if not info_page:
if params.get('playerId'):
info_page = self._download_json(
'http://player.cnevids.com/player/video.js', video_id,
'Downloading video info', fatal=False, query=params)
if info_page:
video_info = info_page.get('video')
if not video_info:
info_page = self._download_webpage(
'http://player.cnevids.com/player/loader.js',
video_id, 'Downloading loader info', query=params)
if not video_info:
info_page = self._download_webpage(
'https://player.cnevids.com/inline/video/%s.js' % video_id,
video_id, 'Downloading inline info', query={
@@ -215,7 +225,7 @@ class CondeNastIE(InfoExtractor):
if url_type == 'series':
return self._extract_series(url, webpage)
else:
params = self._extract_video_params(webpage)
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
info.update(self._extract_video(params))

View File

@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
parse_iso8601,
str_to_int,
@@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
youtube_url = self._search_regex(
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
webpage, 'youtube url', default=None)
youtube_url = YoutubeIE._extract_url(webpage)
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
video_url = self._html_search_regex(
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],

View File

@@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
# vevo embed
vevo_id = self._search_regex(
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
webpage, 'vevo embed', default=None)
if vevo_id:
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
@@ -325,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_TESTS = [{

View File

@@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor):
'id': '176747451',
'title': 'Best!',
'uploader': 'Anonymous',
'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$',
'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$',
},
'playlist_count': 30,
'skip': 'Only available in .de',

View File

@@ -138,6 +138,7 @@ class DRTVIE(InfoExtractor):
'tbr': int_or_none(bitrate),
'ext': link.get('FileFormat'),
'vcodec': 'none' if kind == 'AudioResource' else None,
'preference': preference,
})
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):

View File

@@ -15,7 +15,7 @@ from ..utils import (
class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8',
@@ -35,6 +35,9 @@ class EpornerIE(InfoExtractor):
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -31,20 +31,19 @@ from .aenetworks import (
AENetworksIE,
HistoryTopicIE,
)
from .afreecatv import (
AfreecaTVIE,
AfreecaTVGlobalIE,
)
from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .amcnetworks import AMCNetworksIE
from .americastestkitchen import AmericasTestKitchenIE
from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anvato import AnvatoIE
from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
from .appletrailers import (
@@ -148,7 +147,10 @@ from .camdemy import (
from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .canvas import CanvasIE
from .canvas import (
CanvasIE,
CanvasEenIE,
)
from .carambatv import (
CarambaTVIE,
CarambaTVPageIE,
@@ -379,6 +381,7 @@ from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freshlive import FreshLiveIE
from .funimation import FunimationIE
from .funk import FunkIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
@@ -481,6 +484,7 @@ from .jove import JoveIE
from .joj import JojIE
from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE
from .kakao import KakaoIE
from .kaltura import KalturaIE
from .kamcord import KamcordIE
from .kanalplay import KanalPlayIE
@@ -509,6 +513,7 @@ from .la7 import LA7IE
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
ITTFIE,
)
from .lci import LCIIE
from .lcp import (
@@ -536,7 +541,10 @@ from .limelight import (
LimelightChannelListIE,
)
from .litv import LiTVIE
from .liveleak import LiveLeakIE
from .liveleak import (
LiveLeakIE,
LiveLeakEmbedIE,
)
from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
@@ -559,6 +567,7 @@ from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
)
from .manyvids import ManyVidsIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .mediaset import MediasetIE
@@ -762,6 +771,7 @@ from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
ORFFM4IE,
ORFFM4StoryIE,
ORFOE1IE,
ORFIPTVIE,
)
@@ -801,6 +811,7 @@ from .polskieradio import (
PolskieRadioIE,
PolskieRadioCategoryIE,
)
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .porncom import PornComIE
from .pornflip import PornFlipIE
@@ -893,6 +904,7 @@ from .rutube import (
RutubeEmbedIE,
RutubeMovieIE,
RutubePersonIE,
RutubePlaylistIE,
)
from .rutv import RUTVIE
from .ruutu import RuutuIE
@@ -913,6 +925,7 @@ from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
from .servus import ServusIE
from .sexu import SexuIE
from .shahid import ShahidIE
from .shared import (
@@ -929,6 +942,7 @@ from .skynewsarabia import (
)
from .skysports import SkySportsIE
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .smotri import (
SmotriIE,
@@ -1125,6 +1139,7 @@ from .udn import UDNEmbedIE
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .unity import UnityIE
from .uol import UOLIE
from .uplynk import (
UplynkIE,
@@ -1232,7 +1247,10 @@ from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
from .voot import VootIE
from .voxmedia import VoxMediaIE
from .voxmedia import (
VoxMediaVolumeIE,
VoxMediaIE,
)
from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
@@ -1331,7 +1349,6 @@ from .youtube import (
YoutubeSearchDateIE,
YoutubeSearchIE,
YoutubeSearchURLIE,
YoutubeSharedVideoIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE,

View File

@@ -67,9 +67,9 @@ class FacebookIE(InfoExtractor):
'uploader': 'Tennis on Facebook',
'upload_date': '20140908',
'timestamp': 1410199200,
}
},
'skip': 'Requires logging in',
}, {
'note': 'Video without discernible title',
'url': 'https://www.facebook.com/video.php?v=274175099429670',
'info_dict': {
'id': '274175099429670',
@@ -78,6 +78,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
'thumbnail': r're:^https?://.*',
},
'expected_warnings': [
'title'
@@ -94,6 +95,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20160110',
'timestamp': 1452431627,
},
'skip': 'Requires logging in',
}, {
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
@@ -121,7 +123,11 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '10153664894881749',
'ext': 'mp4',
'title': 'Facebook video #10153664894881749',
'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
'thumbnail': r're:^https?://.*',
'timestamp': 1456259628,
'upload_date': '20160223',
'uploader': 'Barack Obama',
},
}, {
# have 1080P, but only up to 720p in swf params
@@ -130,10 +136,11 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
'title': 'Holocaust survivor becomes US citizen',
'title': 'She survived the holocaust — and years later, shes getting her citizenship s...',
'timestamp': 1477818095,
'upload_date': '20161030',
'uploader': 'CNN',
'thumbnail': r're:^https?://.*',
},
}, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -158,6 +165,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1477305000,
'upload_date': '20161024',
'uploader': 'La Guía Del Varón',
'thumbnail': r're:^https?://.*',
},
'params': {
'skip_download': True,
@@ -376,6 +384,7 @@ class FacebookIE(InfoExtractor):
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
thumbnail = self._og_search_thumbnail(webpage)
info_dict = {
'id': video_id,
@@ -383,6 +392,7 @@ class FacebookIE(InfoExtractor):
'formats': formats,
'uploader': uploader,
'timestamp': timestamp,
'thumbnail': thumbnail,
}
return webpage, info_dict

View File

@@ -2,57 +2,113 @@
from __future__ import unicode_literals
from .adobepass import AdobePassIE
from .uplynk import UplynkPreplayIE
from ..compat import compat_str
from ..utils import (
smuggle_url,
update_url_query,
HEADRequest,
int_or_none,
parse_age_limit,
parse_duration,
try_get,
unified_timestamp,
)
class FOXIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.fox.com/watch/255180355939/7684182528',
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
_TESTS = [{
# clip
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
'info_dict': {
'id': '255180355939',
'id': '4b765a60490325103ea69888fb2bd4e8',
'ext': 'mp4',
'title': 'Official Trailer: Gotham',
'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
'duration': 129,
'timestamp': 1400020798,
'upload_date': '20140513',
'uploader': 'NEWA-FNG-FOXCOM',
'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
'duration': 102,
'timestamp': 1504291893,
'upload_date': '20170901',
'creator': 'FOX',
'series': 'Gotham',
},
'add_ie': ['ThePlatform'],
}
'params': {
'skip_download': True,
},
}, {
# episode, geo-restricted
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
'only_matching': True,
}, {
# episode, geo-restricted, tv provided required
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
settings = self._parse_json(self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings'), video_id)
fox_pdk_player = settings['fox_pdk_player']
release_url = fox_pdk_player['release_url']
query = {
'mbr': 'true',
'switch': 'http'
}
if fox_pdk_player.get('access') == 'locked':
ap_p = settings['foxAdobePassProvider']
rating = ap_p.get('videoRating')
if rating == 'n/a':
rating = None
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
video = self._download_json(
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
video_id, headers={
'apikey': 'abdcbed02c124d393b39e818a4312055',
'Content-Type': 'application/json',
'Referer': url,
})
info = self._search_json_ld(webpage, video_id, fatal=False)
info.update({
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
title = video['name']
release_url = video['videoRelease']['url']
description = video.get('description')
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
video.get('duration')) or parse_duration(video.get('duration'))
timestamp = unified_timestamp(video.get('datePublished'))
age_limit = parse_age_limit(video.get('contentRating'))
data = try_get(
video, lambda x: x['trackingData']['properties'], dict) or {}
creator = data.get('brand') or data.get('network') or video.get('network')
series = video.get('seriesName') or data.get(
'seriesName') or data.get('show')
season_number = int_or_none(video.get('seasonNumber'))
episode = video.get('name')
episode_number = int_or_none(video.get('episodeNumber'))
release_year = int_or_none(video.get('releaseYear'))
if data.get('authRequired'):
# TODO: AP
pass
info = {
'id': video_id,
})
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit,
'creator': creator,
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'release_year': release_year,
}
urlh = self._request_webpage(HEADRequest(release_url), video_id)
video_url = compat_str(urlh.geturl())
if UplynkPreplayIE.suitable(video_url):
info.update({
'_type': 'url_transparent',
'url': video_url,
'ie_key': UplynkPreplayIE.ie_key(),
})
else:
m3u8_url = self._download_json(release_url, video_id)['playURL']
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
info['formats'] = formats
return info

View File

@@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
title = mobj.group('title')
webpage = self._download_webpage(url, title)
info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info')
info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info')
info = json.loads(info_json)
return {

View File

@@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .nexx import NexxIE
from ..utils import extract_attributes
class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
_TESTS = [{
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
'md5': '4d40974481fa3475f8bccfd20c5361f8',
'info_dict': {
'id': '716599',
'ext': 'mp4',
'title': 'Neue Rechte Welle',
'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
'timestamp': 1501337639,
'upload_date': '20170729',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
domain_id = NexxIE._extract_domain_id(webpage) or '741'
nexx_id = extract_attributes(self._search_regex(
r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
webpage, 'media player'))['data-id']
return self.url_result(
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
video_id=nexx_id)

View File

@@ -105,7 +105,7 @@ class GameSpotIE(OnceIE):
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
if onceux_url:
formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', '')))
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url)))
if not formats:
for quality in ['sd', 'hd']:

View File

@@ -22,6 +22,8 @@ from ..utils import (
HEADRequest,
is_html,
js_to_json,
KNOWN_EXTENSIONS,
mimetype2ext,
orderedSet,
sanitized_Request,
smuggle_url,
@@ -99,6 +101,7 @@ from .mediaset import MediasetIE
from .joj import JojIE
from .megaphone import MegaphoneIE
from .vzaar import VzaarIE
from .channel9 import Channel9IE
class GenericIE(InfoExtractor):
@@ -1088,7 +1091,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20150212',
'uploader': 'The National Archives UK',
'description': 'md5:a236581cd2449dd2df4f93412f3f01c6',
'description': 'md5:8078af856dca76edc42910b61273dbbf',
'uploader_id': 'NationalArchives08',
'title': 'Webinar: Using Discovery, The National Archives online catalogue',
},
@@ -1104,7 +1107,8 @@ class GenericIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'does not contain a video anymore',
},
# Complex jwplayer
{
@@ -1113,6 +1117,7 @@ class GenericIE(InfoExtractor):
'id': 'videos',
'ext': 'mp4',
'title': 'king machine trailer 1',
'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
@@ -1130,13 +1135,42 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
{
# Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
'info_dict': {
'id': 'yygqldloqIk',
'ext': 'mp4',
'title': 'SolidWorks. Урок 6 Настройка чертежа',
'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
'upload_date': '20130314',
'uploader': 'PROстое3D',
'uploader_id': 'PROstoe3D',
},
'params': {
'skip_download': True,
},
},
{
# Video.js embed, single format
'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
'info_dict': {
'id': 'watch',
'ext': 'mp4',
'title': 'Step 1 - Good Foundation',
'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
},
'params': {
'skip_download': True,
},
},
# rtl.nl embed
{
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
'playlist_mincount': 5,
'info_dict': {
'id': 'aanslagen-kopenhagen',
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
'title': 'Aanslagen Kopenhagen',
}
},
# Zapiks embed
@@ -1268,6 +1302,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'This video is unavailable.',
},
# Pladform embed
{
@@ -1281,6 +1316,7 @@ class GenericIE(InfoExtractor):
'duration': 694,
'age_limit': 0,
},
'skip': 'HTTP Error 404: Not Found',
},
# Playwire embed
{
@@ -1301,6 +1337,14 @@ class GenericIE(InfoExtractor):
'id': '518726732',
'ext': 'mp4',
'title': 'Facebook Creates "On This Day" | Crunch Report',
'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
'timestamp': 1427237531,
'uploader': 'Crunch Report',
'upload_date': '20150324',
},
'params': {
# m3u8 download
'skip_download': True,
},
},
# SVT embed
@@ -1352,16 +1396,20 @@ class GenericIE(InfoExtractor):
'upload_date': '20140107',
'timestamp': 1389118457,
},
'skip': 'Invalid Page URL',
},
# NBC News embed
{
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
'md5': '1aa589c675898ae6d37a17913cf68d66',
'info_dict': {
'id': '701714499682',
'id': 'x_dtl_oa_LettermanliftPR_160608',
'ext': 'mp4',
'title': 'PREVIEW: On Assignment: David Letterman',
'title': 'David Letterman: A Preview',
'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
'upload_date': '20160609',
'timestamp': 1465431544,
'uploader': 'NBCU-NEWS',
},
},
# UDN embed
@@ -1378,6 +1426,7 @@ class GenericIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Failed to parse JSON Expecting value'],
},
# Ooyala embed
{
@@ -1385,7 +1434,7 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
'ext': 'mp4',
'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',
'description': 'Index/Match versus VLOOKUP.',
'title': 'This is what separates the Excel masters from the wannabes',
'duration': 191.933,
},
@@ -1423,7 +1472,8 @@ class GenericIE(InfoExtractor):
'upload_date': '20150622',
'uploader': 'Public Sénat',
'uploader_id': 'xa9gza',
}
},
'skip': 'File not found.',
},
# OnionStudios embed
{
@@ -1519,14 +1569,27 @@ class GenericIE(InfoExtractor):
# LiveLeak embed
{
'url': 'http://www.wykop.pl/link/3088787/',
'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
'md5': '7619da8c820e835bef21a1efa2a0fc71',
'info_dict': {
'id': '874_1459135191',
'ext': 'mp4',
'title': 'Man shows poor quality of new apartment building',
'description': 'The wall is like a sand pile.',
'uploader': 'Lake8737',
}
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Another LiveLeak embed pattern (#13336)
{
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
'info_dict': {
'id': '2eb_1496309988',
'ext': 'mp4',
'title': 'Thief robs place where everyone was armed',
'description': 'md5:694d73ee79e535953cf2488562288eee',
'uploader': 'brazilwtf',
},
'add_ie': [LiveLeakIE.ie_key()],
},
# Duplicated embedded video URLs
{
@@ -1568,22 +1631,6 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['BrightcoveLegacy'],
},
# Nexx embed
{
'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503',
'info_dict': {
'id': '247746',
'ext': 'mp4',
'title': "Yesterday's Jam (OV)",
'description': 'md5:09bc0984723fed34e2581624a84e05f0',
'timestamp': 1492594816,
'upload_date': '20170419',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@@ -1866,6 +1913,15 @@ class GenericIE(InfoExtractor):
'title': 'Building A Business Online: Principal Chairs Q & A',
},
},
{
# multiple HTML5 videos on one page
'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
'info_dict': {
'id': 'keyscenarios',
'title': 'Rescue Kit 14 Free Edition - Getting started',
},
'playlist_count': 4,
}
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -2153,7 +2209,7 @@ class GenericIE(InfoExtractor):
# And then there are the jokers who advertise that they use RTA,
# but actually don't.
AGE_LIMIT_MARKERS = [
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
]
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
age_limit = 18
@@ -2215,7 +2271,7 @@ class GenericIE(InfoExtractor):
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
@@ -2230,36 +2286,11 @@ class GenericIE(InfoExtractor):
if vid_me_embed_url is not None:
return self.url_result(vid_me_embed_url, 'Vidme')
# Look for embedded YouTube player
matches = re.findall(r'''(?x)
(?:
<iframe[^>]+?src=|
data-video-url=|
<embed[^>]+?src=|
embedSWF\(?:\s*|
<object[^>]+data=|
new\s+SWFObject\(
)
(["\'])
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v|p)/.+?)
\1''', webpage)
if matches:
# Look for YouTube embeds
youtube_urls = YoutubeIE._extract_urls(webpage)
if youtube_urls:
return self.playlist_from_matches(
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
# Look for lazyYT YouTube embed
matches = re.findall(
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
# Look for Wordpress "YouTube Video Importer" plugin
matches = re.findall(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches:
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
matches = DailymotionIE._extract_urls(webpage)
if matches:
@@ -2639,7 +2670,7 @@ class GenericIE(InfoExtractor):
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
@@ -2757,9 +2788,9 @@ class GenericIE(InfoExtractor):
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
# Look for LiveLeak embeds
liveleak_url = LiveLeakIE._extract_url(webpage)
if liveleak_url:
return self.url_result(liveleak_url, 'LiveLeak')
liveleak_urls = LiveLeakIE._extract_urls(webpage)
if liveleak_urls:
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
# Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
@@ -2843,6 +2874,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
channel9_urls = Channel9IE._extract_urls(webpage)
if channel9_urls:
return self.playlist_from_matches(
channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
@@ -2858,22 +2894,23 @@ class GenericIE(InfoExtractor):
merged[k] = v
return merged
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
if json_ld.get('url'):
return merge_dicts(json_ld, info_dict)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
for entry in entries:
entry.update({
if len(entries) == 1:
entries[0].update({
'id': video_id,
'title': video_title,
})
else:
for num, entry in enumerate(entries, start=1):
entry.update({
'id': '%s-%s' % (video_id, num),
'title': '%s (%d)' % (video_title, num),
})
for entry in entries:
self._sort_formats(entry['formats'])
return self.playlist_result(entries)
return self.playlist_result(entries, video_id, video_title)
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
@@ -2882,6 +2919,52 @@ class GenericIE(InfoExtractor):
jwplayer_data, video_id, require_title=False, base_url=url)
return merge_dicts(info, info_dict)
# Video.js embed
mobj = re.search(
r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
webpage)
if mobj is not None:
sources = self._parse_json(
mobj.group(1), video_id, transform_source=js_to_json,
fatal=False) or []
if not isinstance(sources, list):
sources = [sources]
formats = []
for source in sources:
src = source.get('src')
if not src or not isinstance(src, compat_str):
continue
src = compat_urlparse.urljoin(url, src)
src_type = source.get('type')
if isinstance(src_type, compat_str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
return self.url_result(src, YoutubeIE.ie_key())
if src_type == 'application/dash+xml' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False))
elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': src,
'ext': (mimetype2ext(src_type) or
ext if ext in KNOWN_EXTENSIONS else 'mp4'),
})
if formats:
self._sort_formats(formats)
info_dict['formats'] = formats
return info_dict
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
if json_ld.get('url'):
return merge_dicts(json_ld, info_dict)
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
@@ -2969,7 +3052,7 @@ class GenericIE(InfoExtractor):
# be supported by youtube-dl thus this is checked the very last (see
# https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
embed_url = self._html_search_meta('twitter:player', webpage, default=None)
if embed_url:
if embed_url and embed_url != url:
return self.url_result(embed_url)
if not found:

View File

@@ -11,7 +11,7 @@ from ..utils import (
class GfycatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
@@ -44,6 +44,9 @@ class GfycatIE(InfoExtractor):
'categories': list,
'age_limit': 0,
}
}, {
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
'only_matching': True
}]
def _real_extract(self, url):

View File

@@ -4,26 +4,61 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
lowercase_escape,
update_url_query,
)
class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
_VALID_URL = r'''(?x)
https?://
(?:
(?:docs|drive)\.google\.com/
(?:
(?:uc|open)\?.*?id=|
file/d/
)|
video\.google\.com/get_player\?.*?docid=
)
(?P<id>[a-zA-Z0-9_-]{28,})
'''
_TESTS = [{
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
'md5': '5c602afbbf2c1db91831f5d82f678554',
'info_dict': {
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
'ext': 'mp4',
'title': 'Big Buck Bunny.mp4',
'duration': 45,
}
}, {
# video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
'info_dict': {
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
'ext': 'mp4',
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
}
}, {
# video id is longer than 28 characters
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
'info_dict': {
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
'ext': 'mp4',
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
'duration': 189,
},
'only_matching': True,
}, {
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
'only_matching': True,
}, {
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
'only_matching': True,
}]
_FORMATS_EXT = {
@@ -44,6 +79,13 @@ class GoogleDriveIE(InfoExtractor):
'46': 'webm',
'59': 'mp4',
}
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
_CAPTIONS_ENTRY_TAG = {
'subtitles': 'track',
'automatic_captions': 'target',
}
_caption_formats_ext = []
_captions_xml = None
@staticmethod
def _extract_url(webpage):
@@ -53,54 +95,183 @@ class GoogleDriveIE(InfoExtractor):
if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
def _download_subtitles_xml(self, video_id, subtitles_id, hl):
if self._captions_xml:
return
self._captions_xml = self._download_xml(
self._BASE_URL_CAPTIONS, video_id, query={
'id': video_id,
'vid': subtitles_id,
'hl': hl,
'v': video_id,
'type': 'list',
'tlangs': '1',
'fmts': '1',
'vssids': '1',
}, note='Downloading subtitles XML',
errnote='Unable to download subtitles XML', fatal=False)
if self._captions_xml:
for f in self._captions_xml.findall('format'):
if f.attrib.get('fmt_code') and not f.attrib.get('default'):
self._caption_formats_ext.append(f.attrib['fmt_code'])
def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
origin_lang_code=None):
if not subtitles_id or not caption_type:
return
captions = {}
for caption_entry in self._captions_xml.findall(
self._CAPTIONS_ENTRY_TAG[caption_type]):
caption_lang_code = caption_entry.attrib.get('lang_code')
if not caption_lang_code:
continue
caption_format_data = []
for caption_format in self._caption_formats_ext:
query = {
'vid': subtitles_id,
'v': video_id,
'fmt': caption_format,
'lang': (caption_lang_code if origin_lang_code is None
else origin_lang_code),
'type': 'track',
'name': '',
'kind': '',
}
if origin_lang_code is not None:
query.update({'tlang': caption_lang_code})
caption_format_data.append({
'url': update_url_query(self._BASE_URL_CAPTIONS, query),
'ext': caption_format,
})
captions[caption_lang_code] = caption_format_data
return captions
def _get_subtitles(self, video_id, subtitles_id, hl):
if not subtitles_id or not hl:
return
self._download_subtitles_xml(video_id, subtitles_id, hl)
if not self._captions_xml:
return
return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
def _get_automatic_captions(self, video_id, subtitles_id, hl):
if not subtitles_id or not hl:
return
self._download_subtitles_xml(video_id, subtitles_id, hl)
if not self._captions_xml:
return
track = self._captions_xml.find('track')
if track is None:
return
origin_lang_code = track.attrib.get('lang_code')
if not origin_lang_code:
return
return self._get_captions_by_type(
video_id, subtitles_id, 'automatic_captions', origin_lang_code)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id)
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason)
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
title = self._search_regex(
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
default=None) or self._og_search_title(webpage)
duration = int_or_none(self._search_regex(
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
fmt_stream_map = self._search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
resolutions = {}
for fmt in fmt_list:
mobj = re.search(
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
if mobj:
resolutions[mobj.group('format_id')] = (
int(mobj.group('width')), int(mobj.group('height')))
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
default=None))
formats = []
for fmt_stream in fmt_stream_map:
fmt_stream_split = fmt_stream.split('|')
if len(fmt_stream_split) < 2:
continue
format_id, format_url = fmt_stream_split[:2]
f = {
'url': lowercase_escape(format_url),
'format_id': format_id,
'ext': self._FORMATS_EXT[format_id],
}
resolution = resolutions.get(format_id)
if resolution:
f.update({
'width': resolution[0],
'height': resolution[1],
fmt_stream_map = self._search_regex(
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
'fmt stream map', default='').split(',')
fmt_list = self._search_regex(
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
'fmt_list', default='').split(',')
if fmt_stream_map and fmt_list:
resolutions = {}
for fmt in fmt_list:
mobj = re.search(
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
if mobj:
resolutions[mobj.group('format_id')] = (
int(mobj.group('width')), int(mobj.group('height')))
for fmt_stream in fmt_stream_map:
fmt_stream_split = fmt_stream.split('|')
if len(fmt_stream_split) < 2:
continue
format_id, format_url = fmt_stream_split[:2]
f = {
'url': lowercase_escape(format_url),
'format_id': format_id,
'ext': self._FORMATS_EXT[format_id],
}
resolution = resolutions.get(format_id)
if resolution:
f.update({
'width': resolution[0],
'height': resolution[1],
})
formats.append(f)
source_url = update_url_query(
'https://drive.google.com/uc', {
'id': video_id,
'export': 'download',
})
urlh = self._request_webpage(
source_url, video_id, note='Requesting source file',
errnote='Unable to request source file', fatal=False)
if urlh:
def add_source_format(src_url):
formats.append({
'url': src_url,
'ext': determine_ext(title, 'mp4').lower(),
'format_id': 'source',
'quality': 1,
})
formats.append(f)
if urlh.headers.get('Content-Disposition'):
add_source_format(source_url)
else:
confirmation_webpage = self._webpage_read_content(
urlh, url, video_id, note='Downloading confirmation page',
errnote='Unable to confirm download', fatal=False)
if confirmation_webpage:
confirm = self._search_regex(
r'confirm=([^&"\']+)', confirmation_webpage,
'confirmation code', fatal=False)
if confirm:
add_source_format(update_url_query(source_url, {
'confirm': confirm,
}))
if not formats:
reason = self._search_regex(
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason, expected=True)
self._sort_formats(formats)
hl = self._search_regex(
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
subtitles_id = None
ttsurl = self._search_regex(
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
if ttsurl:
# the video Id for subtitles will be the last value in the ttsurl
# query string
subtitles_id = ttsurl.encode('utf-8').decode(
'unicode_escape').split('=')[-1]
return {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'duration': duration,
'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
'automatic_captions': self.extract_automatic_captions(
video_id, subtitles_id, hl),
}

View File

@@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor):
'width': int(width),
'height': int(height),
} for width, height, video_url in re.findall(
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)]
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
self._sort_formats(formats)
return {

View File

@@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
determine_ext,
int_or_none,
@@ -25,6 +26,22 @@ class HeiseIE(InfoExtractor):
'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20',
'thumbnail': r're:^https?://.*/gallery/$',
}
}, {
# YouTube embed
'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html',
'md5': 'e403d2b43fea8e405e88e3f8623909f1',
'info_dict': {
'id': '6kmWbXleKW4',
'ext': 'mp4',
'title': 'NEU IM SEPTEMBER | Netflix',
'description': 'md5:2131f3c7525e540d5fd841de938bd452',
'upload_date': '20170830',
'uploader': 'Netflix Deutschland, Österreich und Schweiz',
'uploader_id': 'netflixdach',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html',
'only_matching': True,
@@ -40,6 +57,16 @@ class HeiseIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('fulltitle', webpage, default=None)
if not title or title == "c't":
title = self._search_regex(
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
webpage, 'title')
yt_urls = YoutubeIE._extract_urls(webpage)
if yt_urls:
return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key())
container_id = self._search_regex(
r'<div class="videoplayerjw"[^>]+data-container="([0-9]+)"',
webpage, 'container ID')
@@ -47,12 +74,6 @@ class HeiseIE(InfoExtractor):
r'<div class="videoplayerjw"[^>]+data-sequenz="([0-9]+)"',
webpage, 'sequenz ID')
title = self._html_search_meta('fulltitle', webpage, default=None)
if not title or title == "c't":
title = self._search_regex(
r'<div[^>]+class="videoplayerjw"[^>]+data-title="([^"]+)"',
webpage, 'title')
doc = self._download_xml(
'http://www.heise.de/videout/feed', video_id, query={
'container': container_id,

View File

@@ -11,45 +11,20 @@ from ..utils import (
class HowStuffWorksIE(InfoExtractor):
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
_VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm'
_TESTS = [
{
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm',
'md5': '76646a5acc0c92bf7cd66751ca5db94d',
'info_dict': {
'id': '450221',
'ext': 'flv',
'title': 'Cool Jobs - Iditarod Musher',
'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
'display_id': 'cool-jobs-iditarod-musher',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 161,
},
'skip': 'Video broken',
},
{
'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
'info_dict': {
'id': '453464',
'id': '855410',
'ext': 'mp4',
'title': 'Survival Zone: Food and Water In the Savanna',
'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
'display_id': 'survival-zone-food-and-water-in-the-savanna',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web',
'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb',
},
},
{
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
'info_dict': {
'id': '440011',
'ext': 'mp4',
'title': 'Sword Swallowing #1 by Dan Meyer',
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
'display_id': 'sword-swallowing-1-by-dan-meyer',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm',
'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm',
'only_matching': True,
}
]

View File

@@ -104,7 +104,7 @@ class HRTiIE(HRTiBaseIE):
(?:
hrti:(?P<short_id>[0-9]+)|
https?://
hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
hrti\.hrt\.hr/(?:\#/)?video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
)
'''
_TESTS = [{
@@ -129,6 +129,9 @@ class HRTiIE(HRTiBaseIE):
}, {
'url': 'hrti:2181385',
'only_matching': True,
}, {
'url': 'https://hrti.hrt.hr/video/show/3873068/cuvar-dvorca-dramska-serija-14',
'only_matching': True,
}]
def _real_extract(self, url):
@@ -170,7 +173,7 @@ class HRTiIE(HRTiBaseIE):
class HRTiPlaylistIE(HRTiBaseIE):
_VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
_VALID_URL = r'https?://hrti\.hrt\.hr/(?:#/)?video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
_TESTS = [{
'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
'info_dict': {
@@ -182,6 +185,9 @@ class HRTiPlaylistIE(HRTiBaseIE):
}, {
'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
'only_matching': True,
}, {
'url': 'https://hrti.hrt.hr/video/list/category/212/ekumena',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -203,7 +203,7 @@ class PCMagIE(IGNIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'pcmag'
_EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
_TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',

View File

@@ -8,7 +8,10 @@ from ..compat import (
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import determine_ext
from ..utils import (
determine_ext,
update_url_query,
)
from .bokecc import BokeCCBaseIE
@@ -68,21 +71,22 @@ class InfoQIE(BokeCCBaseIE):
'play_path': playpath,
}]
def _extract_cookies(self, webpage):
policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
policy, signature, key_pair_id)
def _extract_cf_auth(self, webpage):
policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
return {
'Policy': policy,
'Signature': signature,
'Key-Pair-Id': key_pair_id,
}
def _extract_http_video(self, webpage):
http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
http_video_url = update_url_query(http_video_url, self._extract_cf_auth(webpage))
return [{
'format_id': 'http_video',
'url': http_video_url,
'http_headers': {
'Cookie': self._extract_cookies(webpage)
},
}]
def _extract_http_audio(self, webpage, video_id):
@@ -91,22 +95,20 @@ class InfoQIE(BokeCCBaseIE):
if not http_audio_url:
return []
cookies_header = {'Cookie': self._extract_cookies(webpage)}
# base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
# audio file seem to be missing some times even if there is a download link
# so probe URL to make sure
if not self._is_valid_url(http_audio_url, video_id, headers=cookies_header):
if not self._is_valid_url(http_audio_url, video_id):
return []
return [{
'format_id': 'http_audio',
'url': http_audio_url,
'vcodec': 'none',
'http_headers': cookies_header,
}]
def _real_extract(self, url):

View File

@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
webpage = self._download_webpage(url, title)
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
config_url = self._html_search_regex(
r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"',
webpage, 'config URL')
config_url = 'http://www.jeuxvideo.com' + config_url

View File

@@ -0,0 +1,149 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
unified_timestamp,
update_url_query,
)
class KakaoIE(InfoExtractor):
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
_TESTS = [{
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
'md5': '702b2fbdeb51ad82f5c904e8c0766340',
'info_dict': {
'id': '301965083',
'ext': 'mp4',
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動顔高低差GPも」 『乃木坂工事中』',
'uploader_id': 2671005,
'uploader': '그랑그랑이',
'timestamp': 1488160199,
'upload_date': '20170227',
}
}, {
'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
'md5': 'a8917742069a4dd442516b86e7d66529',
'info_dict': {
'id': '300103180',
'ext': 'mp4',
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
'uploader_id': 2653210,
'uploader': '쇼 음악중심',
'timestamp': 1485684628,
'upload_date': '20170129',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
player_header = {
'Referer': update_url_query(
'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
'service': 'kakao_tv',
'autoplay': '1',
'profile': 'HIGH',
'wmode': 'transparent',
})
}
QUERY_COMMON = {
'player': 'monet_html5',
'referer': url,
'uuid': '',
'service': 'kakao_tv',
'section': '',
'dteType': 'PC',
}
query = QUERY_COMMON.copy()
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
impress = self._download_json(
'%s/%s/impress' % (self._API_BASE, video_id),
video_id, 'Downloading video info',
query=query, headers=player_header)
clip_link = impress['clipLink']
clip = clip_link['clip']
title = clip.get('title') or clip_link.get('displayTitle')
tid = impress.get('tid', '')
query = QUERY_COMMON.copy()
query.update({
'tid': tid,
'profile': 'HIGH',
})
raw = self._download_json(
'%s/%s/raw' % (self._API_BASE, video_id),
video_id, 'Downloading video formats info',
query=query, headers=player_header)
formats = []
for fmt in raw.get('outputList', []):
try:
profile_name = fmt['profile']
fmt_url_json = self._download_json(
'%s/%s/raw/videolocation' % (self._API_BASE, video_id),
video_id,
'Downloading video URL for profile %s' % profile_name,
query={
'service': 'kakao_tv',
'section': '',
'tid': tid,
'profile': profile_name
}, headers=player_header, fatal=False)
if fmt_url_json is None:
continue
fmt_url = fmt_url_json['url']
formats.append({
'url': fmt_url,
'format_id': profile_name,
'width': int_or_none(fmt.get('width')),
'height': int_or_none(fmt.get('height')),
'format_note': fmt.get('label'),
'filesize': int_or_none(fmt.get('filesize'))
})
except KeyError:
pass
self._sort_formats(formats)
thumbs = []
for thumb in clip.get('clipChapterThumbnailList', []):
thumbs.append({
'url': thumb.get('thumbnailUrl'),
'id': compat_str(thumb.get('timeInSec')),
'preference': -1 if thumb.get('isDefault') else 0
})
top_thumbnail = clip.get('thumbnailUrl')
if top_thumbnail:
thumbs.append({
'url': top_thumbnail,
'preference': 10,
})
return {
'id': video_id,
'title': title,
'description': clip.get('description'),
'uploader': clip_link.get('channel', {}).get('name'),
'uploader_id': clip_link.get('channelId'),
'thumbnails': thumbs,
'timestamp': unified_timestamp(clip_link.get('createTime')),
'duration': int_or_none(clip.get('duration')),
'view_count': int_or_none(clip.get('playCount')),
'like_count': int_or_none(clip.get('likeCount')),
'comment_count': int_or_none(clip.get('commentCount')),
'formats': formats,
}

View File

@@ -287,6 +287,9 @@ class KalturaIE(InfoExtractor):
# skip for now.
if f.get('fileExt') == 'chun':
continue
# DRM-protected video, cannot be decrypted
if f.get('fileExt') == 'wvm':
continue
if not f.get('fileExt'):
# QT indicates QuickTime; some videos have broken fileExt
if f.get('containerFormat') == 'qt':

View File

@@ -1,5 +1,6 @@
from __future__ import unicode_literals
from .canvas import CanvasIE
from .common import InfoExtractor
@@ -7,7 +8,7 @@ class KetnetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
'md5': '6bdeb65998930251bbd1c510750edba9',
'info_dict': {
'id': 'zomerse-filmpjes',
'ext': 'mp4',
@@ -15,6 +16,20 @@ class KetnetIE(InfoExtractor):
'description': 'Gluur mee met Ghost Rockers op de filmset',
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
# mzid in playerConfig instead of sources
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'ext': 'flv',
'title': 'Nachtwacht: De Greystook',
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1468.03,
},
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, {
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
'only_matching': True,
@@ -38,6 +53,12 @@ class KetnetIE(InfoExtractor):
'player config'),
video_id)
mzid = config.get('mzid')
if mzid:
return self.url_result(
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
CanvasIE.ie_key(), video_id=mzid)
title = config['title']
formats = []

View File

@@ -215,3 +215,21 @@ class Laola1TvIE(Laola1TvEmbedIE):
'formats': formats,
'is_live': is_live,
}
class ITTFIE(InfoExtractor):
_VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
'only_matching': True,
}
def _real_extract(self, url):
return self.url_result(
update_url_query('https://www.laola1.tv/titanplayer.php', {
'videoid': self._match_id(url),
'type': 'V',
'lang': 'en',
'portal': 'int',
'customer': 1024,
}), Laola1TvEmbedIE.ie_key())

View File

@@ -72,15 +72,20 @@ class LiveLeakIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.liveleak.com/view?i=677_1439397581',
'info_dict': {
'id': '677_1439397581',
'title': 'Fuel Depot in China Explosion caught on video',
},
'playlist_count': 3,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
webpage)
if mobj:
return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -111,23 +116,54 @@ class LiveLeakIE(InfoExtractor):
'age_limit': age_limit,
}
info_dict = entries[0]
for idx, info_dict in enumerate(entries):
for a_format in info_dict['formats']:
if not a_format.get('height'):
a_format['height'] = int_or_none(self._search_regex(
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
default=None))
for a_format in info_dict['formats']:
if not a_format.get('height'):
a_format['height'] = int_or_none(self._search_regex(
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
default=None))
self._sort_formats(info_dict['formats'])
self._sort_formats(info_dict['formats'])
# Don't append entry ID for one-video pages to keep backward compatibility
if len(entries) > 1:
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
else:
info_dict['id'] = video_id
info_dict.update({
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
'thumbnail': video_thumbnail,
})
info_dict.update({
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
'thumbnail': video_thumbnail,
})
return info_dict
return self.playlist_result(entries, video_id, video_title)
class LiveLeakEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
# See generic.py for actual test cases
_TESTS = [{
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
'only_matching': True,
}, {
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
kind, video_id = mobj.group('kind', 'id')
if kind == 'f':
webpage = self._download_webpage(url, video_id)
liveleak_url = self._search_regex(
r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
webpage, 'LiveLeak URL', group='url')
elif kind == 'i':
liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())

View File

@@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor):
info = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None),
}
video_data = self._download_json(stream_url, content_id)
is_live = video_data.get('isLive')

View File

@@ -11,7 +11,7 @@ from ..utils import (
class LnkGoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
_VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
'info_dict': {
@@ -42,6 +42,9 @@ class LnkGoIE(InfoExtractor):
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'only_matching': True,
}]
_AGE_LIMITS = {
'N-7': 7,

View File

@@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor):
class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
_VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
@@ -110,6 +110,9 @@ class LyndaIE(LyndaBaseIE):
}, {
'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0',
'only_matching': True,
}, {
'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'only_matching': True,
}]
def _raise_unavailable(self, video_id):
@@ -253,7 +256,7 @@ class LyndaCourseIE(LyndaBaseIE):
# Course link equals to welcome/introduction video link of same course
# We will recognize it as course link
_VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
_VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -5,7 +5,7 @@ from .common import InfoExtractor
class MakerTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
_TEST = {
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',

View File

@@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor):
format_url = self._html_search_regex(
[
r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)',
r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
r'<a[^>]+href="(rtsp://[^"]+)"'
], webpage, 'format url')
formats = self._extract_wowza_formats(

View File

@@ -0,0 +1,48 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
class ManyVidsIE(InfoExtractor):
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
'info_dict': {
'id': '133957',
'ext': 'mp4',
'title': 'everthing about me (Preview)',
'view_count': int,
'like_count': int,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video URL', group='url')
title = '%s (Preview)' % self._html_search_regex(
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
like_count = int_or_none(self._search_regex(
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
view_count = int_or_none(self._html_search_regex(
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
'view count', default=None))
return {
'id': video_id,
'title': title,
'view_count': view_count,
'like_count': like_count,
'formats': [{
'url': video_url,
}],
}

View File

@@ -18,7 +18,7 @@ class MegaphoneIE(InfoExtractor):
'id': 'GLT9749789991',
'ext': 'mp3',
'title': '#97 What Kind Of Idiot Gets Phished?',
'thumbnail': 're:^https://.*\.png.*$',
'thumbnail': r're:^https://.*\.png.*$',
'duration': 1776.26375,
'author': 'Reply All',
},

View File

@@ -11,7 +11,7 @@ from ..utils import (
class MeipaiIE(InfoExtractor):
IE_DESC = '美拍'
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P<id>[0-9]+)'
_TESTS = [{
# regular uploaded video
'url': 'http://www.meipai.com/media/531697625',

View File

@@ -12,12 +12,16 @@ from ..compat import (
compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
compat_zip
)
from ..utils import (
clean_html,
ExtractorError,
int_or_none,
OnDemandPagedList,
str_to_int,
try_get,
urljoin,
)
@@ -54,27 +58,12 @@ class MixcloudIE(InfoExtractor):
'only_matching': True,
}]
_keys = [
'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };',
'pleasedontdownloadourmusictheartistswontgetpaid',
'window.addEventListener = window.addEventListener || function() {};',
'(function() { return new Date().toLocaleDateString(); })()'
]
_current_key = None
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
def _decrypt_play_info(self, play_info, video_id):
play_info = base64.b64decode(play_info.encode('ascii'))
for num, key in enumerate(self._keys, start=1):
try:
return self._parse_json(
''.join([
compat_chr(compat_ord(ch) ^ compat_ord(key[idx % len(key)]))
for idx, ch in enumerate(play_info)]),
video_id)
except ExtractorError:
if num == len(self._keys):
raise
@staticmethod
def _decrypt_xor_cipher(key, ciphertext):
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
return ''.join([
compat_chr(compat_ord(ch) ^ compat_ord(k))
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -84,52 +73,119 @@ class MixcloudIE(InfoExtractor):
webpage = self._download_webpage(url, track_id)
if not self._current_key:
js_url = self._search_regex(
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)',
webpage, 'js url', default=None)
if js_url:
js = self._download_webpage(js_url, track_id, fatal=False)
if js:
key = self._search_regex(
r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1',
js, 'key', default=None, group='key')
if key and isinstance(key, compat_str):
self._keys.insert(0, key)
self._current_key = key
# Legacy path
encrypted_play_info = self._search_regex(
r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
if encrypted_play_info is not None:
# Decode
encrypted_play_info = base64.b64decode(encrypted_play_info)
else:
# New path
full_info_json = self._parse_json(self._html_search_regex(
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
webpage, 'play info'), 'play info')
for item in full_info_json:
item_data = try_get(
item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
dict)
if try_get(item_data, lambda x: x['streamInfo']['url']):
info_json = item_data
break
else:
raise ExtractorError('Failed to extract matching stream info')
message = self._html_search_regex(
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
webpage, 'error message', default=None)
encrypted_play_info = self._search_regex(
r'm-play-info="([^"]+)"', webpage, 'play info')
js_url = self._search_regex(
r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
webpage, 'js url')
js = self._download_webpage(js_url, track_id, 'Downloading JS')
# Known plaintext attack
if encrypted_play_info:
kps = ['{"stream_url":']
kpa_target = encrypted_play_info
else:
kps = ['https://', 'http://']
kpa_target = base64.b64decode(info_json['streamInfo']['url'])
for kp in kps:
partial_key = self._decrypt_xor_cipher(kpa_target, kp)
for quote in ["'", '"']:
key = self._search_regex(
r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
js, 'encryption key', default=None)
if key is not None:
break
else:
continue
break
else:
raise ExtractorError('Failed to extract encryption key')
play_info = self._decrypt_play_info(encrypted_play_info, track_id)
if encrypted_play_info is not None:
play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
if message and 'stream_url' not in play_info:
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
song_url = play_info['stream_url']
formats = [{
'format_id': 'normal',
'url': song_url
}]
if message and 'stream_url' not in play_info:
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
thumbnail = self._proto_relative_url(self._html_search_regex(
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
uploader = self._html_search_regex(
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
uploader_id = self._search_regex(
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>',
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None))
song_url = play_info['stream_url']
else:
title = info_json['name']
thumbnail = urljoin(
'https://thumbnailer.mixcloud.com/unsafe/600x600/',
try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
uploader = try_get(info_json, lambda x: x['owner']['displayName'])
uploader_id = try_get(info_json, lambda x: x['owner']['username'])
description = try_get(info_json, lambda x: x['description'])
view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
thumbnail = self._proto_relative_url(self._html_search_regex(
r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
uploader = self._html_search_regex(
r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
uploader_id = self._search_regex(
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>',
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None))
stream_info = info_json['streamInfo']
formats = []
for url_key in ('url', 'hlsUrl', 'dashUrl'):
format_url = stream_info.get(url_key)
if not format_url:
continue
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url))
if not decrypted:
continue
if url_key == 'hlsUrl':
formats.extend(self._extract_m3u8_formats(
decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif url_key == 'dashUrl':
formats.extend(self._extract_mpd_formats(
decrypted, track_id, mpd_id='dash', fatal=False))
else:
formats.append({
'format_id': 'http',
'url': decrypted,
})
self._sort_formats(formats)
return {
'id': track_id,
'title': title,
'url': song_url,
'formats': formats,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
@@ -235,7 +291,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
functools.partial(
self._tracks_page_func,
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
self._PAGE_SIZE, use_cache=True)
self._PAGE_SIZE)
return self.playlist_result(
entries, video_id, '%s (%s)' % (username, list_type), description)

View File

@@ -8,8 +8,8 @@ from .common import InfoExtractor
class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com'
_VALID_URL = r'https?://(?:www\.)?morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
'info_dict': {
@@ -19,7 +19,10 @@ class MorningstarIE(InfoExtractor):
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
}
}
}, {
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if mgid is None or ':' not in mgid:
mgid = self._search_regex(
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
[r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
webpage, 'mgid', default=None)
if not mgid:

View File

@@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor):
else:
video_playpath = ''
video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
video_swfobj = compat_urllib_parse_unquote(video_swfobj)
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",

View File

@@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE):
release_url = self._search_regex(
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
webpage, 'release url')
theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
video_id = theplatform_path.split('/')[-1]
query = {
'mbr': 'true',

View File

@@ -43,7 +43,7 @@ class NaverIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"',
webpage)
if m_id is None:
error = self._html_search_regex(

View File

@@ -122,7 +122,7 @@ class NBAIE(TurnerBaseIE):
playlist_title = self._og_search_title(webpage, fatal=False)
entries = OnDemandPagedList(
functools.partial(self._fetch_page, team, video_id),
self._PAGE_SIZE, use_cache=True)
self._PAGE_SIZE)
return self.playlist_result(entries, team, playlist_title)

View File

@@ -15,7 +15,7 @@ from ..utils import (
class NBCIE(AdobePassIE):
_VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
{
@@ -72,6 +72,7 @@ class NBCIE(AdobePassIE):
def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + permalink
video_data = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={
'filter[permalink]': permalink,
@@ -109,10 +110,10 @@ class NBCSportsVPlayerIE(InfoExtractor):
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
_TESTS = [{
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
'info_dict': {
'id': '9CsDKds0kvHI',
'ext': 'flv',
'ext': 'mp4',
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
'timestamp': 1426270238,
@@ -120,7 +121,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
'uploader': 'NBCU-SPORTS',
}
}, {
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
'only_matching': True,
}]
@@ -134,7 +135,8 @@ class NBCSportsVPlayerIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
theplatform_url = self._og_search_video_url(webpage)
theplatform_url = self._og_search_video_url(webpage).replace(
'vplayer.nbcsports.com', 'player.theplatform.com')
return self.url_result(theplatform_url, 'ThePlatform')

View File

@@ -18,7 +18,13 @@ from ..utils import (
class NexxIE(InfoExtractor):
_VALID_URL = r'https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/(?P<id>\d+)'
_VALID_URL = r'''(?x)
(?:
https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
nexx:(?P<domain_id_s>\d+):
)
(?P<id>\d+)
'''
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
@@ -62,8 +68,18 @@ class NexxIE(InfoExtractor):
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
}, {
'url': 'nexx:748:128907',
'only_matching': True,
}]
@staticmethod
def _extract_domain_id(webpage):
mobj = re.search(
r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
webpage)
return mobj.group('id') if mobj else None
@staticmethod
def _extract_urls(webpage):
# Reference:
@@ -72,11 +88,8 @@ class NexxIE(InfoExtractor):
entries = []
# JavaScript Integration
mobj = re.search(
r'<script\b[^>]+\bsrc=["\']https?://require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
webpage)
if mobj:
domain_id = mobj.group('id')
domain_id = NexxIE._extract_domain_id(webpage)
if domain_id:
for video_id in re.findall(
r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
webpage):
@@ -112,7 +125,8 @@ class NexxIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
domain_id, video_id = mobj.group('domain_id', 'id')
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
video_id = mobj.group('id')
# Reverse engineered from JS code (see getDeviceID function)
device_id = '%d:%d:%d%d' % (

View File

@@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor):
'uploader': 'takuya0301',
'uploader_id': '2698420',
'upload_date': '20131123',
'timestamp': 1385182762,
'timestamp': int, # timestamp is unstable
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
'view_count': int,
@@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor):
'skip': 'Requires an account',
}, {
# "New" HTML5 video
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm31464864',
'md5': '351647b4917660986dc0fa8864085135',
'info_dict': {
'id': 'sm31464864',
'ext': 'mp4',
@@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor):
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
'timestamp': 1498514060,
'upload_date': '20170626',
'uploader': 'ゲス',
'uploader': 'ゲス',
'uploader_id': '40826363',
'thumbnail': r're:https?://.*',
'duration': 198,
@@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor):
'comment_count': int,
},
'skip': 'Requires an account',
}, {
# Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488',
'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
'info_dict': {
'id': 'sm18238488',
'ext': 'mp4',
'title': '【実写版】ミュータントタートルズ',
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
'timestamp': 1341160408,
'upload_date': '20120701',
'uploader': None,
'uploader_id': None,
'thumbnail': r're:https?://.*',
'duration': 5271,
'view_count': int,
'comment_count': int,
},
'skip': 'Requires an account',
}, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
@@ -395,7 +414,9 @@ class NiconicoIE(InfoExtractor):
webpage_url = get_video_info('watch_url') or url
owner = api_data.get('owner', {})
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
# in the JSON, which will cause None to be returned instead of {}.
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')

View File

@@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
js_to_json,
smuggle_url,
try_get,
)
@@ -24,8 +25,6 @@ class NoovoIE(InfoExtractor):
'timestamp': 1491399228,
'upload_date': '20170405',
'uploader_id': '618566855001',
'creator': 'vtele',
'view_count': int,
'series': 'RPM+',
},
'params': {
@@ -37,13 +36,11 @@ class NoovoIE(InfoExtractor):
'info_dict': {
'id': '5395865725001',
'title': 'Épisode 13 : Les retrouvailles',
'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
'ext': 'mp4',
'timestamp': 1492019320,
'upload_date': '20170412',
'uploader_id': '618566855001',
'creator': 'vtele',
'view_count': int,
'series': "L'amour est dans le pré",
'season_number': 5,
'episode': 'Épisode 13',
@@ -58,40 +55,46 @@ class NoovoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
video_id)['data']
webpage = self._download_webpage(url, video_id)
content = try_get(data, lambda x: x['contents'][0])
bc_url = BrightcoveNewIE._extract_url(self, webpage)
brightcove_id = data.get('brightcoveId') or content['brightcoveId']
data = self._parse_json(
self._search_regex(
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
title = try_get(
data, lambda x: x['video']['nom'],
compat_str) or self._html_search_meta(
'dcterms.Title', webpage, 'title', fatal=True)
description = self._html_search_meta(
('dcterms.Description', 'description'), webpage, 'description')
series = try_get(
data, (
lambda x: x['show']['title'],
lambda x: x['season']['show']['title']),
compat_str)
data, lambda x: x['emission']['nom']) or self._search_regex(
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
webpage, 'series', default=None)
episode = None
og = data.get('og')
if isinstance(og, dict) and og.get('type') == 'video.episode':
episode = og.get('title')
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
season = try_get(season_el, lambda x: x['nom'], compat_str)
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
video = content or data
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
episode = try_get(episode_el, lambda x: x['nom'], compat_str)
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
return {
'_type': 'url_transparent',
'ie_key': BrightcoveNewIE.ie_key(),
'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
{'geo_countries': ['CA']}),
'id': brightcove_id,
'title': video.get('title'),
'creator': video.get('source'),
'view_count': int_or_none(video.get('viewsCount')),
'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
'title': title,
'description': description,
'series': series,
'season_number': int_or_none(try_get(
data, lambda x: x['season']['seasonNumber'])),
'season': season,
'season_number': season_number,
'episode': episode,
'episode_number': int_or_none(data.get('episodeNumber')),
'episode_number': episode_number,
}

View File

@@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE):
class HetKlokhuisIE(NPODataMidEmbedIE):
IE_NAME = 'hetklokhuis'
_VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',

View File

@@ -7,7 +7,7 @@ from .common import InfoExtractor
class OnceIE(InfoExtractor):
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'

View File

@@ -13,11 +13,11 @@ from ..utils import (
class OnionStudiosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
'md5': '719d1f8c32094b8c33902c17bcae5e34',
'info_dict': {
'id': '2937',
'ext': 'mp4',
@@ -29,12 +29,15 @@ class OnionStudiosIE(InfoExtractor):
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
'only_matching': True,
}, {
'url': 'http://www.onionstudios.com/video/6139.json',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
if mobj:
return mobj.group('url')

View File

@@ -1,14 +1,244 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import os
import re
import subprocess
import tempfile
from .common import InfoExtractor
from ..compat import compat_chr
from ..utils import (
determine_ext,
ExtractorError,
from ..compat import (
compat_urlparse,
compat_kwargs,
)
from ..utils import (
check_executable,
determine_ext,
encodeArgument,
ExtractorError,
get_element_by_id,
get_exe_version,
is_outdated_version,
std_headers,
)
def cookie_to_dict(cookie):
cookie_dict = {
'name': cookie.name,
'value': cookie.value,
}
if cookie.port_specified:
cookie_dict['port'] = cookie.port
if cookie.domain_specified:
cookie_dict['domain'] = cookie.domain
if cookie.path_specified:
cookie_dict['path'] = cookie.path
if cookie.expires is not None:
cookie_dict['expires'] = cookie.expires
if cookie.secure is not None:
cookie_dict['secure'] = cookie.secure
if cookie.discard is not None:
cookie_dict['discard'] = cookie.discard
try:
if (cookie.has_nonstandard_attr('httpOnly') or
cookie.has_nonstandard_attr('httponly') or
cookie.has_nonstandard_attr('HttpOnly')):
cookie_dict['httponly'] = True
except TypeError:
pass
return cookie_dict
def cookie_jar_to_list(cookie_jar):
return [cookie_to_dict(cookie) for cookie in cookie_jar]
class PhantomJSwrapper(object):
"""PhantomJS wrapper class
This class is experimental.
"""
_TEMPLATE = r'''
phantom.onError = function(msg, trace) {{
var msgStack = ['PHANTOM ERROR: ' + msg];
if(trace && trace.length) {{
msgStack.push('TRACE:');
trace.forEach(function(t) {{
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+ (t.function ? ' (in function ' + t.function +')' : ''));
}});
}}
console.error(msgStack.join('\n'));
phantom.exit(1);
}};
var page = require('webpage').create();
var fs = require('fs');
var read = {{ mode: 'r', charset: 'utf-8' }};
var write = {{ mode: 'w', charset: 'utf-8' }};
JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
phantom.addCookie(x);
}});
page.settings.resourceTimeout = {timeout};
page.settings.userAgent = "{ua}";
page.onLoadStarted = function() {{
page.evaluate(function() {{
delete window._phantom;
delete window.callPhantom;
}});
}};
var saveAndExit = function() {{
fs.write("{html}", page.content, write);
fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
phantom.exit();
}};
page.onLoadFinished = function(status) {{
if(page.url === "") {{
page.setContent(fs.read("{html}", read), "{url}");
}}
else {{
{jscode}
}}
}};
page.open("");
'''
_TMP_FILE_NAMES = ['script', 'html', 'cookies']
@staticmethod
def _version():
return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
def __init__(self, extractor, required_version=None, timeout=10000):
self.exe = check_executable('phantomjs', ['-v'])
if not self.exe:
raise ExtractorError('PhantomJS executable not found in PATH, '
'download it from http://phantomjs.org',
expected=True)
self.extractor = extractor
if required_version:
version = self._version()
if is_outdated_version(version, required_version):
self.extractor._downloader.report_warning(
'Your copy of PhantomJS is outdated, update it to version '
'%s or newer if you encounter any errors.' % required_version)
self.options = {
'timeout': timeout,
}
self._TMP_FILES = {}
for name in self._TMP_FILE_NAMES:
tmp = tempfile.NamedTemporaryFile(delete=False)
tmp.close()
self._TMP_FILES[name] = tmp
def __del__(self):
for name in self._TMP_FILE_NAMES:
try:
os.remove(self._TMP_FILES[name].name)
except:
pass
def _save_cookies(self, url):
cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
for cookie in cookies:
if 'path' not in cookie:
cookie['path'] = '/'
if 'domain' not in cookie:
cookie['domain'] = compat_urlparse.urlparse(url).netloc
with open(self._TMP_FILES['cookies'].name, 'wb') as f:
f.write(json.dumps(cookies).encode('utf-8'))
def _load_cookies(self):
with open(self._TMP_FILES['cookies'].name, 'rb') as f:
cookies = json.loads(f.read().decode('utf-8'))
for cookie in cookies:
if cookie['httponly'] is True:
cookie['rest'] = {'httpOnly': None}
if 'expiry' in cookie:
cookie['expire_time'] = cookie['expiry']
self.extractor._set_cookie(**compat_kwargs(cookie))
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
"""
Downloads webpage (if needed) and executes JS
Params:
url: website url
html: optional, html code of website
video_id: video id
note: optional, displayed when downloading webpage
note2: optional, displayed when executing JS
headers: custom http headers
jscode: code to be executed when page is loaded
Returns tuple with:
* downloaded website (after JS execution)
* anything you print with `console.log` (but not inside `page.execute`!)
In most cases you don't need to add any `jscode`.
It is executed in `page.onLoadFinished`.
`saveAndExit();` is mandatory, use it instead of `phantom.exit()`
It is possible to wait for some element on the webpage, for example:
var check = function() {
var elementFound = page.evaluate(function() {
return document.querySelector('#b.done') !== null;
});
if(elementFound)
saveAndExit();
else
window.setTimeout(check, 500);
}
page.evaluate(function(){
document.querySelector('#a').click();
});
check();
"""
if 'saveAndExit();' not in jscode:
raise ExtractorError('`saveAndExit();` not found in `jscode`')
if not html:
html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
with open(self._TMP_FILES['html'].name, 'wb') as f:
f.write(html.encode('utf-8'))
self._save_cookies(url)
replaces = self.options
replaces['url'] = url
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
replaces['ua'] = user_agent.replace('"', '\\"')
replaces['jscode'] = jscode
for x in self._TMP_FILE_NAMES:
replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
with open(self._TMP_FILES['script'].name, 'wb') as f:
f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
if video_id is None:
self.extractor.to_screen('%s' % (note2,))
else:
self.extractor.to_screen('%s: %s' % (video_id, note2))
p = subprocess.Popen([
self.exe, '--ssl-protocol=any',
self._TMP_FILES['script'].name
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
if p.returncode != 0:
raise ExtractorError(
'Executing JS failed\n:' + encodeArgument(err))
with open(self._TMP_FILES['html'].name, 'rb') as f:
html = f.read().decode('utf-8')
self._load_cookies()
return (html, encodeArgument(out))
class OpenloadIE(InfoExtractor):
@@ -58,6 +288,8 @@ class OpenloadIE(InfoExtractor):
'only_matching': True,
}]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@staticmethod
def _extract_urls(webpage):
return re.findall(
@@ -66,47 +298,22 @@ class OpenloadIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
url = 'https://openload.co/embed/%s/' % video_id
headers = {
'User-Agent': self._USER_AGENT,
}
webpage = self._download_webpage(url, video_id, headers=headers)
if 'File not found' in webpage or 'deleted by the owner' in webpage:
raise ExtractorError('File not found', expected=True)
raise ExtractorError('File not found', expected=True, video_id=video_id)
ol_id = self._search_regex(
'<span[^>]+id="[^"]+"[^>]*>([0-9A-Za-z]+)</span>',
webpage, 'openload ID')
phantom = PhantomJSwrapper(self, required_version='2.0')
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers)
decoded = ''
a = ol_id[0:24]
b = []
for i in range(0, len(a), 8):
b.append(int(a[i:i + 8] or '0', 16))
ol_id = ol_id[24:]
j = 0
k = 0
while j < len(ol_id):
c = 128
d = 0
e = 0
f = 0
_more = True
while _more:
if j + 1 >= len(ol_id):
c = 143
f = int(ol_id[j:j + 2] or '0', 16)
j += 2
d += (f & 127) << e
e += 7
_more = f >= c
g = d ^ b[k % 3]
for i in range(4):
char_dec = (g >> 8 * i) & (c + 127)
char = compat_chr(char_dec)
if char != '#':
decoded += char
k += 1
decoded_id = get_element_by_id('streamurl', webpage)
video_url = 'https://openload.co/stream/%s?mime=true'
video_url = video_url % decoded
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@@ -114,15 +321,17 @@ class OpenloadIE(InfoExtractor):
'description', webpage, 'title', fatal=True)
entries = self._parse_html5_media_entries(url, webpage, video_id)
subtitles = entries[0]['subtitles'] if entries else None
entry = entries[0] if entries else {}
subtitles = entry.get('subtitles')
info_dict = {
'id': video_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
'url': video_url,
# Seems all videos have extensions in their titles
'ext': determine_ext(title, 'mp4'),
'subtitles': subtitles,
'http_headers': headers,
}
return info_dict

View File

@@ -6,14 +6,15 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
HEADRequest,
unified_strdate,
strip_jsonp,
int_or_none,
float_or_none,
determine_ext,
float_or_none,
HEADRequest,
int_or_none,
orderedSet,
remove_end,
strip_jsonp,
unescapeHTML,
unified_strdate,
)
@@ -307,3 +308,108 @@ class ORFIPTVIE(InfoExtractor):
'upload_date': upload_date,
'formats': formats,
}
class ORFFM4StoryIE(InfoExtractor):
IE_NAME = 'orf:fm4:story'
IE_DESC = 'fm4.orf.at stories'
_VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
_TEST = {
'url': 'http://fm4.orf.at/stories/2865738/',
'playlist': [{
'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
'info_dict': {
'id': '547792',
'ext': 'flv',
'title': 'Manu Delago und Inner Tongue live',
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
'duration': 1748.52,
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170913',
},
}, {
'md5': 'c6dd2179731f86f4f55a7b49899d515f',
'info_dict': {
'id': '547798',
'ext': 'flv',
'title': 'Manu Delago und Inner Tongue live (2)',
'duration': 1504.08,
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170913',
'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
},
}],
}
def _real_extract(self, url):
story_id = self._match_id(url)
webpage = self._download_webpage(url, story_id)
entries = []
all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
for idx, video_id in enumerate(all_ids):
data = self._download_json(
'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
video_id)[0]
duration = float_or_none(data['duration'], 1000)
video = data['sources']['q8c']
load_balancer_url = video['loadBalancerUrl']
abr = int_or_none(video.get('audioBitrate'))
vbr = int_or_none(video.get('bitrate'))
fps = int_or_none(video.get('videoFps'))
width = int_or_none(video.get('videoWidth'))
height = int_or_none(video.get('videoHeight'))
thumbnail = video.get('preview')
rendition = self._download_json(
load_balancer_url, video_id, transform_source=strip_jsonp)
f = {
'abr': abr,
'vbr': vbr,
'fps': fps,
'width': width,
'height': height,
}
formats = []
for format_id, format_url in rendition['redirect'].items():
if format_id == 'rtmp':
ff = f.copy()
ff.update({
'url': format_url,
'format_id': format_id,
})
formats.append(ff)
elif determine_ext(format_url) == 'f4m':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_id))
elif determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=format_id))
else:
continue
self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
if idx >= 1:
# Titles are duplicates, make them unique
title += ' (' + str(idx + 1) + ')'
description = self._og_search_description(webpage)
upload_date = unified_strdate(self._html_search_meta(
'dc.date', webpage, 'upload date'))
entries.append({
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats,
})
return self.playlist_result(entries)

View File

@@ -11,7 +11,7 @@ class ParliamentLiveUKIE(InfoExtractor):
_TESTS = [{
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'info_dict': {
'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'id': '1_af9nv9ym',
'ext': 'mp4',
'title': 'Home Affairs Committee',
'uploader_id': 'FFMPEG-01',
@@ -28,14 +28,14 @@ class ParliamentLiveUKIE(InfoExtractor):
webpage = self._download_webpage(
'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
widget_config = self._parse_json(self._search_regex(
r'kWidgetConfig\s*=\s*({.+});',
r'(?s)kWidgetConfig\s*=\s*({.+});',
webpage, 'kaltura widget config'), video_id)
kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
kaltura_url = 'kaltura:%s:%s' % (
widget_config['wid'][1:], widget_config['entry_id'])
event_title = self._download_json(
'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
return {
'_type': 'url_transparent',
'id': video_id,
'title': event_title,
'description': '',
'url': kaltura_url,

View File

@@ -187,7 +187,7 @@ class PBSIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:
# Direct video URL
(?:%s)/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
(?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
# Article with embedded player (or direct video)
(?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
# Player
@@ -367,6 +367,10 @@ class PBSIE(InfoExtractor):
{
'url': 'http://watch.knpb.org/video/2365616055/',
'only_matching': True,
},
{
'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=',
'only_matching': True,
}
]
_ERRORS = {

View File

@@ -0,0 +1,78 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
unified_timestamp,
)
class PopcornTVIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P<display_id>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183',
'md5': '47d65a48d147caf692ab8562fe630b45',
'info_dict': {
'id': '9183',
'display_id': 'food-wars-battaglie-culinarie-episodio-01',
'ext': 'mp4',
'title': 'Food Wars, Battaglie Culinarie | Episodio 01',
'description': 'md5:b8bea378faae4651d3b34c6e112463d0',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1497610857,
'upload_date': '20170616',
'duration': 1440,
'view_count': int,
},
}, {
'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id, video_id = mobj.group('display_id', 'id')
webpage = self._download_webpage(url, display_id)
m3u8_url = extract_attributes(
self._search_regex(
r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)',
webpage, 'content'
))['href']
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
title = self._search_regex(
r'<h1[^>]+itemprop=["\']name[^>]*>([^<]+)', webpage,
'title', default=None) or self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<article[^>]+itemprop=["\']description[^>]*>(.+?)</article>',
webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
timestamp = unified_timestamp(self._html_search_meta(
'uploadDate', webpage, 'timestamp'))
print(self._html_search_meta(
'duration', webpage))
duration = int_or_none(self._html_search_meta(
'duration', webpage), invscale=60)
view_count = int_or_none(self._html_search_meta(
'interactionCount', webpage, 'view count'))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'view_count': view_count,
'formats': formats,
}

View File

@@ -14,7 +14,7 @@ from ..utils import (
class PornFlipIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z]{11})'
_VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[0-9A-Za-z-]{11})'
_TESTS = [{
'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
'md5': '98c46639849145ae1fd77af532a9278c',
@@ -34,6 +34,12 @@ class PornFlipIE(InfoExtractor):
}, {
'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
'only_matching': True,
}, {
'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
sources = self._parse_json(js_to_json(self._search_regex(
r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
webpage, 'sources', default='{}')), video_id)
if not sources:
@@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
view_count = int_or_none(self._html_search_regex(
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
thumbnail = self._search_regex(
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
'thumbnail', fatal=False, group='url')
return {
'id': video_id,

View File

@@ -186,7 +186,7 @@ class PornHubIE(InfoExtractor):
title, thumbnail, duration = [None] * 3
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False)
view_count = self._extract_count(

View File

@@ -20,20 +20,37 @@ from ..utils import (
class RadioCanadaIE(InfoExtractor):
IE_NAME = 'radiocanada'
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
_TEST = {
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
'info_dict': {
'id': '7184272',
'ext': 'mp4',
'title': 'Le parcours du tireur capté sur vidéo',
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
'upload_date': '20141023',
_TESTS = [
{
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
'info_dict': {
'id': '7184272',
'ext': 'mp4',
'title': 'Le parcours du tireur capté sur vidéo',
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
'upload_date': '20141023',
},
'params': {
# m3u8 download
'skip_download': True,
}
},
'params': {
# m3u8 download
'skip_download': True,
},
}
{
# empty Title
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
'info_dict': {
'id': '7754998',
'ext': 'mp4',
'title': 'letelejournal22h',
'description': 'INTEGRALE WEB 22H-TJ',
'upload_date': '20170720',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -59,6 +76,7 @@ class RadioCanadaIE(InfoExtractor):
device_types.append('android')
formats = []
error = None
# TODO: extract f4m formats
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
for device_type in device_types:
@@ -84,8 +102,8 @@ class RadioCanadaIE(InfoExtractor):
if not v_url:
continue
if v_url == 'null':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
error = xpath_text(v_data, 'message')
continue
ext = determine_ext(v_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
@@ -129,6 +147,9 @@ class RadioCanadaIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
base_url + '/manifest.f4m', video_id,
f4m_id='hds', fatal=False))
if not formats and error:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)
subtitles = {}
@@ -141,7 +162,7 @@ class RadioCanadaIE(InfoExtractor):
return {
'id': video_id,
'title': get_meta('Title'),
'title': get_meta('Title') or get_meta('AV-nomEmission'),
'description': get_meta('Description') or get_meta('ShortDescription'),
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
'duration': int_or_none(get_meta('length')),

View File

@@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE):
media_type = media['type']
if 'Audio' in media_type:
relinker_info = {
'formats': {
'formats': [{
'format_id': media.get('formatoAudio'),
'url': media['audioUrl'],
'ext': media.get('formatoAudio'),
}
}]
}
elif 'Video' in media_type:
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)

Some files were not shown because too many files have changed in this diff Show More