Compare commits

...

237 Commits

Author SHA1 Message Date
2021b650dd release 2017.01.02 2017-01-02 23:55:04 +07:00
b890caaf21 [ChangeLog] Actualize 2017-01-02 23:54:29 +07:00
3783a5ccba [cctv] Relax _VALID_URL 2017-01-02 23:18:44 +07:00
327caf661a [cctv] Do not fallback on video id extracted from URL 2017-01-02 23:00:37 +07:00
ce7ccb1caa [cctv] Improve and merge with cntv (closes #879, closes #6753, closes #8541) 2017-01-02 22:55:24 +07:00
295eac6165 [cntv] Add extractor 2017-01-02 22:55:19 +07:00
d546d4c8e0 Merge pull request #11578 from rpunkfu/patch-2
Update Readme: Set HOME properly in install command
2017-01-02 13:50:26 +08:00
eec45445a8 Update Readme: Set home in sudo pip install
Hi, it's not always a default behaviour, but when you use `sudo pip` you most likely may want to use `-H` flag to set HOME value to directory of the target user :)
2017-01-02 00:22:10 +01:00
7fc06b6a15 [README.md] Update link to available YoutubeDL options 2017-01-01 23:36:52 +07:00
966815e139 [nrktv:episodes] Add support for episodes (#11571) 2017-01-01 21:26:32 +07:00
e5e19379be [ISSUE_TEMPLATE_tmpl.md] Clarify example URLs constraints for site support request 2017-01-01 15:11:49 +07:00
1f766b6e7b [arkena] Add support for video.arkena.com (closes #11568) 2017-01-01 02:46:47 +07:00
dc48a35404 release 2016.12.31 2016-12-31 23:58:41 +07:00
1ea0b727c4 [ChangeLog] Actualize 2016-12-31 23:53:30 +07:00
b6ee45e9fa Improve custom config support (closes #10648) 2016-12-31 23:41:37 +07:00
e66dca5e4a Add option --config-location
A configfile can now be passed to youtube_dl.

undo changes

Raise parser error if file not found, change to user_conf

change metavar hand helptext for --configfile

Fix help for --configfile

Update help for --configfile

Numbering placeholder in configfile error msg

minor fix

Change option --configfile top --config-file

Fix -config-file error
2016-12-31 23:04:16 +07:00
3f1ce16876 [twitch:vod] Improve _VALID_URL (closes #11537) 2016-12-31 22:40:42 +07:00
9a0f999585 [twitch] Added support for player.twitch.tv URLs (closes #11535) 2016-12-31 22:32:49 +07:00
3540fe262f [README.md] Fix spelling and harmonize line length 2016-12-31 22:29:35 +07:00
e186a9ec03 [videa] Add support for videa embeds 2016-12-31 22:05:32 +07:00
69677f3ee2 [videa] Improve and simplify (closes #8181, closes #11133) 2016-12-31 22:05:32 +07:00
e746021577 [videa] Add extractor 2016-12-31 22:05:32 +07:00
490da94edf [devscripts/buildserver] Remove unreachable except block 2016-12-31 19:17:52 +07:00
424ed37ec4 [vk] Fix postlive videos extraction 2016-12-30 04:31:19 +07:00
9cdb0a338d [vk] Extract from playerParams (closes #11555) 2016-12-30 04:21:49 +07:00
6cf261d882 [freevideo] Remove extractor (closes #11515)
Handled by generic extractor
2016-12-30 00:32:23 +07:00
df086e74e2 [showroomlive] Improve (closes #11458) 2016-12-30 00:12:35 +07:00
963bd5ecfc [showroomlive] Add extractor 2016-12-29 23:17:00 +07:00
51378d359e [xhamster] Fix duration extraction (closes #11549) 2016-12-28 23:04:46 +07:00
b63005f5af [rtve:live] Fix extraction (closes #11529) 2016-12-25 04:02:29 +07:00
4606c34e19 [extractor/common] Allow non-lang in subtitles' keys
See 264e77c406
2016-12-25 01:50:50 +08:00
53a664edf4 [brightcove:legacy] Improve embeds detection (closes #11523) 2016-12-24 22:46:27 +07:00
264e77c406 [twitch] Add support for rechat messages (closes #11524) 2016-12-24 22:10:54 +07:00
d1cd7e0ed9 Credit @wader for #11521 2016-12-24 15:00:23 +01:00
846fd69bac [acast] Add test with multiple blings 2016-12-24 14:28:30 +01:00
12da830993 [acast] Fix broken audio URL and timestamp extraction
Before first bling was used now we look for the first bling with
type BlingAudio.

Before publishingDate was a ms unix timestamp now it is iso8601.
2016-12-24 14:28:30 +01:00
e7ac722d62 [README.md] Add missing protocols to format selection section 2016-12-23 22:01:22 +07:00
19f37ce4b1 [README.md] Fix typo 2016-12-23 09:25:39 +07:00
5e77c0b58e release 2016.12.22 2016-12-22 22:52:54 +07:00
ab3091feda [ChangeLog] Actualize 2016-12-22 22:51:51 +07:00
a07588369f [common] improve detection for video only formats and m3u8 manifest(fixes #11507) 2016-12-22 10:02:56 +01:00
f5a723a78a [theplatform] pass geo verification headers to smil request(closes #10146) 2016-12-21 20:59:03 +01:00
f120646f04 [viu] pass geo verification headers to auth request 2016-12-21 20:50:10 +01:00
9c5b5f2115 [rtl2] extract more formats and metadata 2016-12-21 18:46:25 +01:00
ae806db628 [vbox7] Skip malformed JSON-LD (closes #11501) 2016-12-21 22:39:05 +07:00
bfa1073e11 [uplynk] force downloading using hls native downloader(closes #11496) 2016-12-20 19:49:45 +01:00
e029c43bd4 [laola1] add support for another extraction scenario(closes #11460) 2016-12-20 18:22:57 +01:00
90352a8041 release 2016.12.20 2016-12-20 22:39:39 +07:00
1f6a79b0af [ChangeLog] Actualize 2016-12-20 22:37:06 +07:00
3d6761ba92 [vbox7] Fix extraction (closes #11494) 2016-12-20 21:53:51 +07:00
f59d1146c0 [uktvplay] Add new extractor(closes #11027) 2016-12-20 12:52:46 +01:00
b1c357975d [piksel] Add new extractor(closes #11246) 2016-12-20 12:35:03 +01:00
d8c507c9e2 [vimeo] fix extraction for hls formats and add support for dash formats(closes #11490) 2016-12-20 12:35:03 +01:00
7fe1592073 [common] fix dash codec information for mixed videos and fragment url construction(#11490) 2016-12-20 12:35:03 +01:00
8ab7e6c4cc [kaltura] Improve widget ID extraction (closes #11480) 2016-12-20 18:45:52 +08:00
c80db5d398 [nrktv:direkte] Add support for live streams (#11488) 2016-12-19 23:47:45 +07:00
5aaf012a4e [pbs] fix extraction for geo restricted videos(#7095) 2016-12-19 16:27:12 +01:00
954529c10f [brightcove:new] skip widevine classic videos 2016-12-18 21:39:59 +01:00
ed7b333fbf [viu] extract supported hls manifest 2016-12-18 18:24:01 +01:00
723103151e [viu] improve extraction(closes #10607)(closes #11329) 2016-12-18 17:20:53 +01:00
e7b6caef24 [viu] New extractor for viu.com 2016-12-18 17:20:53 +01:00
ec79b1de1c Revert "Credit @pyx for meipai (#10718)"
This reverts commit d5e623aaa1.
2016-12-18 20:56:21 +07:00
f73d7d5074 release 2016.12.18 2016-12-18 19:50:33 +07:00
52a1d48d9f [ChangeLog] Actualize 2016-12-18 19:48:59 +07:00
d5e623aaa1 Credit @pyx for meipai (#10718) 2016-12-18 19:46:57 +07:00
199a47abba [ccma] Add new extractor(closes #11359) 2016-12-18 10:49:10 +01:00
b42a0bf360 [laola1tv] add support embed urls and improve extraction(#11460) 2016-12-17 21:48:45 +01:00
6e416b210c [nbc] fix extraction for msnbc videos(fixes #11466) 2016-12-17 18:11:13 +01:00
04bf59ff64 [extractors] Add missing twitch imports 2016-12-17 23:03:50 +07:00
87a449c1ed [extractor/common] Recognize DASH formats in html5 media entries 2016-12-17 23:03:13 +07:00
93753aad20 [twitch] Adapt to new videos pages schema (closes #11469) 2016-12-17 20:20:23 +07:00
2786818c33 [meipai] Fix regular videos extraction and improve (closes #10718) 2016-12-17 19:42:34 +07:00
9b785768ac [meipai] Add extractor 2016-12-17 19:41:35 +07:00
47c914f995 [ondemandkorea] Fix extraction (closes #10772) 2016-12-17 18:50:12 +07:00
732d116aa7 [jwplatform] Improve duration extraction 2016-12-17 18:50:07 +07:00
a495840d3b [jwplatform] Improve subtitles extraction 2016-12-17 18:50:00 +07:00
b0c65c677f [utils] Improve urljoin 2016-12-17 18:49:55 +07:00
594601f545 [ondemandkorea] Add extractor 2016-12-17 18:49:45 +07:00
0ae9560eea [vporn] Use urljoin for thumbnail 2016-12-16 23:57:51 +07:00
dc1f3a9f20 [vvvvid] do not cache the conn_id 2016-12-16 11:05:46 +01:00
7b1e80792b [vvvvid] Add new extractor(closes #5915) 2016-12-16 09:05:34 +01:00
38be3bc568 release 2016.12.15 2016-12-15 21:16:55 +07:00
d7ef47bffd [ChangeLog] Actualize 2016-12-15 21:15:45 +07:00
5c32a5be95 [openload] Recognize oload.tv URLs (#10408) 2016-12-15 17:51:26 +08:00
30918999f5 [facebook] Recognize .onion URLs (closes #11443) 2016-12-15 01:04:49 +08:00
069f918302 [vlive] Use live titles for live streams 2016-12-14 21:30:33 +07:00
89c63cc5f8 [vlive] Add video params extraction fallback and improve (closes #11375) 2016-12-14 21:05:50 +07:00
577748075b [vlive] Update extraction 2016-12-14 21:05:32 +07:00
67dcbc0add [canvas] extract dash formats 2016-12-13 17:59:22 +01:00
3a40f859b5 [melonvod] Improve (closes #11419) 2016-12-13 02:27:26 +07:00
e34c33614d [utils] Add convenience urljoin 2016-12-13 02:23:49 +07:00
abf3494ac7 [melonvod] Add extractor for vod.melon.com 2016-12-13 02:13:40 +07:00
3c1e9dc4ec release 2016.12.12 2016-12-12 01:44:50 +07:00
62faf9b55e [ChangeLog] Actualize 2016-12-12 01:41:08 +07:00
3530e0d3d9 [dplay] Use Safari user-agent for hls (closes #11418) 2016-12-12 00:58:08 +07:00
fb37eb25d9 [utils] Add common user agents map 2016-12-12 00:49:07 +07:00
d2d2495e16 [facebook] Detect login required error message 2016-12-11 01:40:30 +07:00
19b4900b7b [facebook] Improve video selection (closes #11390) 2016-12-11 01:22:01 +07:00
6ca478d44a [canalplus] Add another video id regex (closes #11399) 2016-12-11 00:45:27 +07:00
655cb545ab [mixcloud] Relax _VALID_URL (closes #11406) 2016-12-10 23:48:18 +07:00
f0b69fa91a [ctvnews] relax _VALID_URL regex(closes #11394) 2016-12-10 17:36:32 +01:00
8821a718cf [common] recognize hls manifests that contain video only formats(#11394) 2016-12-10 17:22:15 +01:00
0d7d9f9404 [rte] improve extraction(closes #10498)(closes #7746) 2016-12-10 16:34:01 +01:00
f41db40596 [prosiebensat1] extract dash formats 2016-12-10 13:29:51 +01:00
68601ef3ac [rts,srgssr] improve extraction for geo restricted videos(fixes #11089)(closes #4989) 2016-12-10 10:47:56 +01:00
18ece70c4d release 2016.12.09 2016-12-09 02:46:18 +07:00
9ed3495eae [ChangeLog] Actualize 2016-12-09 02:41:49 +07:00
6c20a0bb99 [openload] Fix extraction (closes #10408) 2016-12-09 02:15:16 +08:00
f43795e56b [pandoratv] PEP 8 and simplify 2016-12-07 23:50:10 +07:00
7441915b1e [pandoratv] Fix extraction (closes #11023) 2016-12-07 23:46:42 +07:00
283d1c6a8b [telebruxelles] extract all formats and add support for emission urls 2016-12-06 19:01:17 +01:00
875ddd7409 [bloomberg] Add another video id regex (closes #11371) 2016-12-06 00:41:03 +07:00
4afa4ff223 [1tv] Fix video id extraction 2016-12-05 23:28:57 +07:00
3ed81714d8 [fusion] Update ooyala id regex 2016-12-05 22:43:36 +07:00
4bd7d9d4ae [socks] Refine exception model for better error handling
1. ProxyError now inherits from socket.error instead of IOError

The only functions socks.py overrides are connect and connect_ex. In
Python 2.x and Python <= 3.2, socket functions raises socket.error. In
newer Python versions, those functions raises OSError instead. The name
socket.error is preserved as an alias of OSError for backward
compability. To keep socks.py compatible with Python's standard library,
it should raise the same exception as raw sockets.

See PEP 3151 (https://www.python.org/dev/peps/pep-3151/) for more
information about the change in Python 3.3.

2. Raise EOFError instead of IOError when the socket receives less data
than it expects

There's no common convention, but both ftplib and telnetlib raises
EOFError for similar situations. socks.py follows them.

Closes #11355

In #11355, only Python 2 is affected. In Python 3, both socket.error and
IOError are alias of OSError, so AbstractHTTPHandler.do_open correctly
catches the error and thus InfoExtractor._is_valid_url works fine.
2016-12-05 00:43:37 +08:00
9b5288c92a [1tv] Improve extraction and add support for playlists (closes #11335) 2016-12-04 23:35:21 +07:00
8344296619 [socks] Fix error reporting (#11355) 2016-12-03 21:53:41 +08:00
a94e7f4a0c [aenetworks] extract more formats(closes #11321) 2016-12-01 12:15:35 +01:00
d17bfe4095 [thisoldhouse] Recognize /tv-episode/ URLs and update _TESTS
Closes #11271
2016-12-01 14:56:52 +08:00
98b08f94b1 [README.md] Fix typo
Just a minor spelling mistake in the readme
2016-12-01 01:31:21 +07:00
73ec479c7d release 2016.12.01 2016-12-01 00:15:12 +07:00
f150530f4d [ChangeLog] Actualize 2016-12-01 00:13:06 +07:00
4c4765dba2 [soundcloud] Update client id (closes #11327) 2016-11-30 23:17:30 +07:00
f882554815 [comedcycentral] Give /shows/.+/full-episodes URLs to the COmedyCentralFullEpisodesIE 2016-11-30 11:52:19 +01:00
db75f14d8a [ruutu] Detect DRM videos 2016-11-30 04:19:38 +07:00
8b0d3ee64e [liveleak] Simplify and PEP 8 2016-11-29 23:42:19 +07:00
3779d524df [liveleak] Add support for youtube embeds 2016-11-29 23:37:30 +07:00
6303fc8204 [spike] Fix full episodes extraction 2016-11-29 23:06:01 +07:00
cc61fc3934 [comedycentral] Add new extractor for full-episodes
CC seems to have added yet another indirection for full episodes - the mgid is now only in a linked feed.
This may be a little brittle, but it's better than failing outright.
Plus, the current The Daily Show episode now works :)
2016-11-29 10:12:18 +01:00
c2530d3319 [teamfourstar] Simplify _VALID_URL and relax regexes 2016-11-28 23:22:29 +07:00
8953319916 [screenwavemedia] Remove extractor
Rewrite TeamFourStar and Normalboots extractors in terms of JWPlatform
2016-11-28 23:17:56 +07:00
51b1378eed Ignore and clean .swf files
Some videos on NicoNico are swf
2016-11-27 22:01:07 +08:00
2b380fc299 release 2016.11.27 2016-11-27 20:05:32 +07:00
294d4926d7 [ChangeLog] Actualize 2016-11-27 20:04:03 +07:00
83f1481baa [extractor/generic] Add support for webcaster.pro embeds 2016-11-27 19:56:32 +07:00
f25e1c8d8c [webcaster] Add support for webcaster.pro 2016-11-27 19:54:59 +07:00
6901673868 [azubu] Add support for azubu.uol.com.br (closes #11305) 2016-11-27 15:40:28 +07:00
560c8c6ec0 [viki] Prefer hls 2016-11-26 00:14:09 +07:00
9338a0eae3 [viki] Fix rtmp formats extraction (closes #11255) 2016-11-26 00:13:46 +07:00
74394b5e10 [puls4] Relax _VALID_URL (closes #11267) 2016-11-25 23:37:32 +07:00
1db058466d [vevo] Allow video info to fail in tests 2016-11-24 23:10:58 +07:00
e94eeb1dd3 [vevo] Simplify artists extraction 2016-11-24 23:09:35 +07:00
8b27d83e4e vevo: fixing naming when there are featured artists 2016-11-24 23:07:28 +07:00
8eb7b5c3f1 [mitele] Modernize and extract more metadata 2016-11-24 22:43:02 +07:00
b68599ed47 [mitele] Relax _VALID_URL 2016-11-24 21:57:53 +07:00
44444f0d3b [cbslocal] Support newyork.cbslocal.com
Closes #11285
2016-11-24 20:32:17 +08:00
c867adc68c [youtube:playlist] Pass disable_polymer in query (closes #11193, closes #11270) 2016-11-23 23:28:32 +07:00
3b5daf0736 release 2016.11.22 2016-11-22 22:32:16 +07:00
c8f56741dd [ChangeLog] Actualize 2016-11-22 22:29:37 +07:00
868630fbe5 [hellporno] Add support for hellporno.net and improve ext extraction 2016-11-22 22:16:10 +07:00
1d6ae5628f [amcnetworks] Recognize more BBC America URLs
Closes #11263
2016-11-22 20:40:57 +08:00
6334794f2a [funnyordie] Copy formats' metadata from hls and sort formats 2016-11-21 23:46:55 +07:00
4eece8ba57 [funnyordie] Improve extraction 2016-11-21 22:16:26 +07:00
2574721a81 Clean and ignore more file types
ape is another audio codec seen in kuwo. See
https://en.wikipedia.org/wiki/Monkey's_Audio
2016-11-21 12:50:13 +08:00
dbcc4a6b32 [CONTRIBUTING.md] Fix broken links (#11239) 2016-11-21 12:25:19 +08:00
0bb58a208b Merge pull request #11239 from josephfrazier/patch-1
[CONTRIBUTING.md] Fix broken link
2016-11-21 12:24:11 +08:00
dc6a9e4195 [README.md] Update link from generated CONTRIBUTING.md 2016-11-20 11:32:00 -05:00
8f8f182d0b [extractor/generic] Improve limelight embeds support 2016-11-20 02:13:21 +07:00
2176e466e0 Merge branch 'DarkstaIkers-master' 2016-11-20 00:07:35 +08:00
303b38fa84 [ChangeLog] Update for #9028 2016-11-20 00:06:44 +08:00
fb27d0ce5e Merge branch 'master' of https://github.com/DarkstaIkers/youtube-dl into DarkstaIkers-master 2016-11-20 00:05:11 +08:00
0aacd2deb1 [bandcamp] Fix free downloads extraction and extract all formats (closes #11067) 2016-11-19 04:18:21 +07:00
08ec95a6db [ChangeLog] Actualize 2016-11-19 03:10:20 +07:00
df46b19cb8 [toutv] Fix login form regex (closes #11223) 2016-11-19 01:56:31 +07:00
748a462fbe [twitter:card] Relax _VALID_URL (closes #11225) 2016-11-19 01:49:13 +07:00
c131fc3372 [tvanouvelles] Add extractor (closes #10616) 2016-11-18 01:16:33 +07:00
b25459b88a release 2016.11.18 2016-11-18 00:25:24 +07:00
5f75c4a4ad [ChangeLog] Actualize 2016-11-18 00:19:55 +07:00
689f31fde5 [devscripts/create-github-release] Fill release body from ChangeLog (closes #11094) 2016-11-18 00:17:46 +07:00
582be35847 Update coding style after pycodestyle 2.1.0
In pycodestyle 2.1.0, E305 was introduced, which requires two blank
lines after top level declarations, too.

See https://github.com/PyCQA/pycodestyle/issues/400

See also #10689; thanks @stepshal for first mentioning this issue and
initial patches
2016-11-17 19:45:42 +08:00
073d5bf583 [youtube:live] Relax _VALID_URL (closes #11164) 2016-11-16 23:15:19 +07:00
315cb86a95 Merge pull request #11210 from FooBarQuaxx/patch-2
Strip only args urls
2016-11-16 23:29:37 +08:00
b2fc1c4fb9 Add explanatory comment 2016-11-16 18:18:54 +03:00
d76767c90e [ChangeLog] Update after #11122 landed 2016-11-16 20:47:15 +08:00
eceba9f805 Merge pull request #11122 from kasper93/openload
[openload] Fix extraction.
2016-11-16 20:43:19 +08:00
MAA
d755396804 Strip only args urls 2016-11-16 09:00:30 +03:00
58355a3bf1 [vlive] Add test for #11203 2016-11-15 22:11:47 +07:00
49b69ad91c [vlive] Prefer locale over language for subtitles id 2016-11-15 22:07:17 +07:00
6b4dfa2819 release 2016.11.14.1 2016-11-14 02:48:15 +07:00
9f60134a9d [ChangeLog] Actualize 2016-11-14 02:46:12 +07:00
b3d4bd05f9 release 2016.11.14 2016-11-14 02:39:50 +07:00
dbffd00ba9 [ChangeLog] Actualize 2016-11-14 02:37:21 +07:00
50913b8241 [nrk] Improve geo restriction detection 2016-11-13 22:29:36 +07:00
7e08e2cab0 [nrk] Add X-Forwarded-For HTTP header in info dict 2016-11-13 22:28:29 +07:00
690355551c [downoader/fragment,f4m,hls] Add internal support for custom HTTP headers 2016-11-13 22:22:10 +07:00
754e6c8322 [nrk] Workaround geo restriction and improve error messages 2016-11-13 20:54:34 +07:00
e58609b22c [afreecatv] Add support for vod.afreecatv.com (closes #11174) 2016-11-13 06:02:26 +07:00
4ea4c0bb22 [extractor/common] Fix Bandwidth substitution in media template (closes #11175) 2016-11-13 05:43:34 +07:00
577281b0c6 [cda] Fix and improve extraction
Fixes #10929
2016-11-13 01:01:29 +07:00
3d2729514f [plays] Improve extraction and add support for embed URLs 2016-11-12 23:08:05 +07:00
f076d7972c [extractor/common] Improve thumbnail extraction from JSON-LD 2016-11-12 23:01:05 +07:00
cpm
8b1aeadc33 [plays] Fix extraction 2016-11-12 22:59:39 +07:00
95ad9ce573 [openload] Fix extraction.
aadecode code was restored from commit c1decda58c
with some optimizations (2x faster).

Fixes #10408
2016-11-11 15:36:57 +01:00
189935f159 [jsinterp] Fix function calls without arguments. 2016-11-11 15:36:57 +01:00
bc40b3a5ba [eagleplatform] Fix extraction (closes #11160) 2016-11-11 03:26:29 +07:00
3eaaa8abac [audioboom] Recognize /posts/ URLs (closes #11149) 2016-11-10 14:52:34 +08:00
db3367f43e release 2016.11.08.1 2016-11-08 22:30:53 +07:00
6590925c27 [ChangeLog] Actualize 2016-11-08 22:29:16 +07:00
4719af097c [extractors] Add forgotten import for espn:article 2016-11-08 22:27:02 +07:00
9946aa5ccf [franceculture] Fix extraction (closes #11140) 2016-11-08 22:26:33 +07:00
c58e07a7aa release 2016.11.08 2016-11-08 22:11:21 +07:00
f700afa24c [ChangeLog] Actualize 2016-11-08 22:09:03 +07:00
5d47b38cf5 [tmz:article] Fix extraction (closes #11052) 2016-11-08 21:53:41 +08:00
ebc7ab1e23 [espn] Fix extraction (closes #11041) 2016-11-08 00:29:12 +07:00
97726317ac [README.md] Mention HTTP headers and alternative way to obtain cookies and headers in -g FAQ 2016-11-07 23:53:22 +07:00
cb882540e8 [mitele] Fix extraction after website redesign (fixes #10824) 2016-11-07 11:13:59 +01:00
98708e6cbd [ard] Remove age restriction check (closes #11129) 2016-11-06 23:20:15 +07:00
b52c9ef165 [extractor/generic] Improve support for pornhub embeds (closes #11100) 2016-11-06 21:52:00 +07:00
e28ed498e6 [extractor/generic] Add support for redtube embds (closes #11099) 2016-11-06 21:42:41 +07:00
5021ca6c13 [redtube] Add support for embed URLs 2016-11-06 21:39:29 +07:00
37e7a71c6c [extractor/generic] Add support for drtuber embds (closes #11098) 2016-11-06 21:33:51 +07:00
f5c4b06f17 [drtuber] Fix title extraction 2016-11-06 21:29:15 +07:00
519d897049 [drtuber] Add support for embed URLs 2016-11-06 21:28:51 +07:00
b61cd51869 [yahoo] Add test and improve some content id regex 2016-11-06 21:16:33 +07:00
f420902a3b [yahoo] Add another content id regex (closes #11088) 2016-11-06 21:14:15 +07:00
de328af362 [toutv] Relax _VALID_URL (closes #11121) 2016-11-05 03:24:42 +07:00
b30e4c2754 release 2016.11.04 2016-11-04 22:07:54 +07:00
09ffe34b00 [ChangeLog] Actualize 2016-11-04 21:59:42 +07:00
640aff1d0c [anvato] Improve formats extraction 2016-11-04 21:45:24 +07:00
c897af8aac [cbslocal] Update test 2016-11-04 21:33:08 +07:00
f3c705f8ec [fox9] Add extractor (closes #11110) 2016-11-04 21:32:30 +07:00
f93ac1d175 [anvato] Extract more metadata 2016-11-04 21:17:56 +07:00
c4c9b8440c [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 manifests (closes #11113) 2016-11-04 05:02:31 +07:00
32f2627aed [vodlocker] Add another removed file pattern (closes #11106) 2016-11-03 22:22:40 +07:00
9d64e1dcdc [downloader/ism] Fix typo 2016-11-03 22:15:09 +07:00
10380e55de [downloader/ism] fix AVC Decoder Configuration Record creation in python 3 2016-11-03 16:08:57 +01:00
22979993e7 [vice] add coding cookie 2016-11-03 16:07:22 +01:00
b47ecd0b74 [vzaar] Add new extractor(closes #11093) 2016-11-03 12:50:41 +01:00
3a86b2c51e Ignore and clean .wav files 2016-11-03 18:55:55 +08:00
b811b4c93b [vice] add support for uplynk preplay videos(#11101) 2016-11-03 10:37:07 +01:00
f4dfa9a5ed [tubitv] fix extraction(closes #11061) 2016-11-03 09:04:20 +01:00
3b4b66b50c [shahid] add support for authentication(closes #11091) 2016-11-03 00:44:12 +01:00
4119a96ce5 [extractor/generic] Skip URLs we came from when delegating ISM extraction 2016-11-02 23:43:41 +07:00
26aae56690 [extractor/generic] Improve ISM extraction 2016-11-02 23:34:37 +07:00
4f9cd4d36f [radiocanada] extract subtitle(closes #11096) 2016-11-02 13:55:40 +01:00
cc99a77ac1 [extractor/generic] Add support for ISM manifests 2016-11-02 03:01:13 +07:00
6cbb20bb09 Update crunchyroll.py 2016-03-29 14:26:24 -03:00
150 changed files with 4044 additions and 1261 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.02**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.02**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.11.02
[debug] youtube-dl version 2017.01.02
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
@ -50,6 +50,8 @@ $ youtube-dl -v <your command line>
- Single video: https://youtu.be/BaW_jenozKc
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
---
### Description of your *issue*, suggested solution and other information

View File

@ -50,6 +50,8 @@ $ youtube-dl -v <your command line>
- Single video: https://youtu.be/BaW_jenozKc
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
---
### Description of your *issue*, suggested solution and other information

4
.gitignore vendored
View File

@ -30,6 +30,10 @@ updates_key.pem
*.m4v
*.mp3
*.3gp
*.wav
*.ape
*.mkv
*.swf
*.part
*.swp
test/testdata

View File

@ -190,3 +190,4 @@ John Hawkinson
Rich Leeper
Zhong Jianxin
Thor77
Mattias Wadman

View File

@ -58,7 +58,7 @@ We are then presented with a very complicated request when the original problem
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
### Is anyone going to need the feature?
@ -92,9 +92,9 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with:
@ -199,7 +199,7 @@ Assume at this point `meta`'s layout is:
}
```
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
```python
description = meta.get('summary') # correct

239
ChangeLog
View File

@ -1,3 +1,242 @@
version 2017.01.02
Extractors
* [cctv] Improve extraction (#879, #6753, #8541)
+ [nrktv:episodes] Add support for episodes (#11571)
+ [arkena] Add support for video.arkena.com (#11568)
version 2016.12.31
Core
+ Introduce --config-location option for custom configuration files (#6745,
#10648)
Extractors
+ [twitch] Add support for player.twitch.tv (#11535, #11537)
+ [videa] Add support for videa.hu (#8181, #11133)
* [vk] Fix postlive videos extraction
* [vk] Extract from playerParams (#11555)
- [freevideo] Remove extractor (#11515)
+ [showroomlive] Add support for showroom-live.com (#11458)
* [xhamster] Fix duration extraction (#11549)
* [rtve:live] Fix extraction (#11529)
* [brightcove:legacy] Improve embeds detection (#11523)
+ [twitch] Add support for rechat messages (#11524)
* [acast] Fix audio and timestamp extraction (#11521)
version 2016.12.22
Core
* [extractor/common] Improve detection of video-only formats in m3u8
manifests (#11507)
Extractors
+ [theplatform] Pass geo verification headers to SMIL request (#10146)
+ [viu] Pass geo verification headers to auth request
* [rtl2] Extract more formats and metadata
* [vbox7] Skip malformed JSON-LD (#11501)
* [uplynk] Force downloading using native HLS downloader (#11496)
+ [laola1] Add support for another extraction scenario (#11460)
version 2016.12.20
Core
* [extractor/common] Improve fragment URL construction for DASH media
* [extractor/common] Fix codec information extraction for mixed audio/video
DASH media (#11490)
Extractors
* [vbox7] Fix extraction (#11494)
+ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027)
+ [piksel] Add support for player.piksel.com (#11246)
+ [vimeo] Add support for DASH formats
* [vimeo] Fix extraction for HLS formats (#11490)
* [kaltura] Fix wrong widget ID in some cases (#11480)
+ [nrktv:direkte] Add support for live streams (#11488)
* [pbs] Fix extraction for geo restricted videos (#7095)
* [brightcove:new] Skip widevine classic videos
+ [viu] Add support for viu.com (#10607, #11329)
version 2016.12.18
Core
+ [extractor/common] Recognize DASH formats in html5 media entries
Extractors
+ [ccma] Add support for ccma.cat (#11359)
* [laola1tv] Improve extraction
+ [laola1tv] Add support embed URLs (#11460)
* [nbc] Fix extraction for MSNBC videos (#11466)
* [twitch] Adapt to new videos pages URL schema (#11469)
+ [meipai] Add support for meipai.com (#10718)
* [jwplatform] Improve subtitles and duration extraction
+ [ondemandkorea] Add support for ondemandkorea.com (#10772)
+ [vvvvid] Add support for vvvvid.it (#5915)
version 2016.12.15
Core
+ [utils] Add convenience urljoin
Extractors
+ [openload] Recognize oload.tv URLs (#10408)
+ [facebook] Recognize .onion URLs (#11443)
* [vlive] Fix extraction (#11375, #11383)
+ [canvas] Extract DASH formats
+ [melonvod] Add support for vod.melon.com (#11419)
version 2016.12.12
Core
+ [utils] Add common user agents map
+ [common] Recognize HLS manifests that contain video only formats (#11394)
Extractors
+ [dplay] Use Safari user agent for HLS (#11418)
+ [facebook] Detect login required error message
* [facebook] Improve video selection (#11390)
+ [canalplus] Add another video id pattern (#11399)
* [mixcloud] Relax URL regular expression (#11406)
* [ctvnews] Relax URL regular expression (#11394)
+ [rte] Capture and output error message (#7746, #10498)
+ [prosiebensat1] Add support for DASH formats
* [srgssr] Improve extraction for geo restricted videos (#11089)
* [rts] Improve extraction for geo restricted videos (#4989)
version 2016.12.09
Core
* [socks] Fix error reporting (#11355)
Extractors
* [openload] Fix extraction (#10408)
* [pandoratv] Fix extraction (#11023)
+ [telebruxelles] Add support for emission URLs
* [telebruxelles] Extract all formats
+ [bloomberg] Add another video id regular expression (#11371)
* [fusion] Update ooyala id regular expression (#11364)
+ [1tv] Add support for playlists (#11335)
* [1tv] Improve extraction (#11335)
+ [aenetworks] Extract more formats (#11321)
+ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
version 2016.12.01
Extractors
* [soundcloud] Update client id (#11327)
* [ruutu] Detect DRM protected videos
+ [liveleak] Add support for youtube embeds (#10688)
* [spike] Fix full episodes support (#11312)
* [comedycentral] Fix full episodes support
* [normalboots] Rewrite in terms of JWPlatform (#11184)
* [teamfourstar] Rewrite in terms of JWPlatform (#11184)
- [screenwavemedia] Remove extractor (#11184)
version 2016.11.27
Extractors
+ [webcaster] Add support for webcaster.pro
+ [azubu] Add support for azubu.uol.com.br (#11305)
* [viki] Prefer hls formats
* [viki] Fix rtmp formats extraction (#11255)
* [puls4] Relax URL regular expression (#11267)
* [vevo] Improve artist extraction (#10911)
* [mitele] Relax URL regular expression and extract more metadata (#11244)
+ [cbslocal] Recognize New York site (#11285)
+ [youtube:playlist] Pass disable_polymer in URL query (#11193)
version 2016.11.22
Extractors
* [hellporno] Fix video extension extraction (#11247)
+ [hellporno] Add support for hellporno.net (#11247)
+ [amcnetworks] Recognize more BBC America URLs (#11263)
* [funnyordie] Improve extraction (#11208)
* [extractor/generic] Improve limelight embeds support
- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
* [twitter:card] Relax URL regular expression (#11225)
+ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
version 2016.11.18
Extractors
* [youtube:live] Relax URL regular expression (#11164)
* [openload] Fix extraction (#10408, #11122)
* [vlive] Prefer locale over language for subtitles id (#11203)
version 2016.11.14.1
Core
+ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
* [extractor/common] Fix media templates with Bandwidth substitution pattern in
MPD manifests (#11175)
* [extractor/common] Improve thumbnail extraction from JSON-LD
Extractors
+ [nrk] Workaround geo restriction
+ [nrk] Improve error detection and messages
+ [afreecatv] Add support for vod.afreecatv.com (#11174)
* [cda] Fix and improve extraction (#10929, #10936)
* [plays] Fix extraction (#11165)
* [eagleplatform] Fix extraction (#11160)
+ [audioboom] Recognize /posts/ URLs (#11149)
version 2016.11.08.1
Extractors
* [espn:article] Fix support for espn.com articles
* [franceculture] Fix extraction (#11140)
version 2016.11.08
Extractors
* [tmz:article] Fix extraction (#11052)
* [espn] Fix extraction (#11041)
* [mitele] Fix extraction after website redesign (#10824)
- [ard] Remove age restriction check (#11129)
* [generic] Improve support for pornhub.com embeds (#11100)
+ [generic] Add support for redtube.com embeds (#11099)
+ [generic] Add support for drtuber.com embeds (#11098)
+ [redtube] Add support for embed URLs
+ [drtuber] Add support for embed URLs
+ [yahoo] Improve content id extraction (#11088)
* [toutv] Relax URL regular expression (#11121)
version 2016.11.04
Core
* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
manifests (#11113)
* [downloader/ism] Fix AVC Decoder Configuration Record
Extractors
+ [fox9] Add support for fox9.com (#11110)
+ [anvato] Extract more metadata and improve formats extraction
* [vodlocker] Improve removed videos detection (#11106)
+ [vzaar] Add support for vzaar.com (#11093)
+ [vice] Add support for uplynk preplay videos (#11101)
* [tubitv] Fix extraction (#11061)
+ [shahid] Add support for authentication (#11091)
+ [radiocanada] Add subtitles support (#11096)
+ [generic] Add support for ISM manifests
version 2016.11.02
Core

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete

View File

@ -29,7 +29,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex
You can also use pip:
sudo pip install --upgrade youtube-dl
sudo -H pip install --upgrade youtube-dl
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
@ -44,11 +44,7 @@ Or with [MacPorts](https://www.macports.org/):
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
# DESCRIPTION
**youtube-dl** is a command-line program to download videos from
YouTube.com and a few more sites. It requires the Python interpreter, version
2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
which means you can modify it, redistribute it or use it however you like.
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
youtube-dl [OPTIONS] URL [URL...]
@ -84,6 +80,9 @@ which means you can modify it, redistribute it or use it however you like.
configuration in ~/.config/youtube-
dl/config (%APPDATA%/youtube-dl/config.txt
on Windows)
--config-location PATH Location of the configuration file; either
the path to the config or its containing
directory.
--flat-playlist Do not extract the videos of a playlist,
only list them.
--mark-watched Mark videos watched (YouTube only)
@ -187,7 +186,7 @@ which means you can modify it, redistribute it or use it however you like.
of SIZE.
--playlist-reverse Download playlist videos in reverse order
--xattr-set-filesize Set file xattribute ytdl.filesize with
expected filesize (experimental)
expected file size (experimental)
--hls-prefer-native Use the native HLS downloader instead of
ffmpeg
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
@ -354,7 +353,7 @@ which means you can modify it, redistribute it or use it however you like.
-u, --username USERNAME Login with this account ID
-p, --password PASSWORD Account password. If this option is left
out, youtube-dl will ask interactively.
-2, --twofactor TWOFACTOR Two-factor auth code
-2, --twofactor TWOFACTOR Two-factor authentication code
-n, --netrc Use .netrc authentication data
--video-password PASSWORD Video password (vimeo, smotri, youku)
@ -638,7 +637,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
- `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use
- `container`: Name of the container format
- `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
@ -664,7 +663,7 @@ $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
# Download best format available but not better that 480p
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
# Download best video only format but no bigger that 50 MB
# Download best video only format but no bigger than 50 MB
$ youtube-dl -f 'best[filesize<50M]'
# Download best format available via direct link over HTTP/HTTPS protocol
@ -744,7 +743,7 @@ Most people asking this question are not aware that youtube-dl now defaults to d
### I get HTTP error 402 when trying to download a video. What's this?
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### Do I need any other programs?
@ -756,9 +755,9 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
@ -930,9 +929,9 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
2. Check out the source code with:
@ -1037,7 +1036,7 @@ Assume at this point `meta`'s layout is:
}
```
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
```python
description = meta.get('summary') # correct
@ -1149,7 +1148,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
```
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
@ -1252,7 +1251,7 @@ We are then presented with a very complicated request when the original problem
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
### Is anyone going to need the feature?

View File

@ -25,5 +25,6 @@ def build_completion(opt_parser):
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
f.write(filled_template)
parser = youtube_dl.parseOpts()[0]
build_completion(parser)

View File

@ -424,8 +424,6 @@ class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
self.send_header('Content-Length', len(msg))
self.end_headers()
self.wfile.write(msg)
except HTTPError as e:
self.send_response(e.code, str(e))
else:
self.send_response(500, 'Unknown build method "%s"' % action)
else:

View File

@ -2,11 +2,13 @@
from __future__ import unicode_literals
import base64
import io
import json
import mimetypes
import netrc
import optparse
import os
import re
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -90,16 +92,23 @@ class GitHubReleaser(object):
def main():
parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH')
options, args = parser.parse_args()
if len(args) != 2:
if len(args) != 3:
parser.error('Expected a version and a build directory')
version, build_path = args
changelog_file, version, build_path = args
with io.open(changelog_file, encoding='utf-8') as inf:
changelog = inf.read()
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
body = mobj.group(1) if mobj else ''
releaser = GitHubReleaser()
new_release = releaser.create_release(version, name='youtube-dl %s' % version)
new_release = releaser.create_release(
version, name='youtube-dl %s' % version, body=body)
release_id = new_release['id']
for asset in os.listdir(build_path):

View File

@ -44,5 +44,6 @@ def build_completion(opt_parser):
with open(FISH_COMPLETION_FILE, 'w') as f:
f.write(filled_template)
parser = youtube_dl.parseOpts()[0]
build_completion(parser)

View File

@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv):
out, _ = prog.communicate(secret_msg)
return out
iv = key = [0x20, 0x15] + 14 * [0]
r = openssl_encode('aes-128-cbc', key, iv)

View File

@ -32,5 +32,6 @@ def main():
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
sitesf.write(template)
if __name__ == '__main__':
main()

View File

@ -28,5 +28,6 @@ def main():
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
if __name__ == '__main__':
main()

View File

@ -59,6 +59,7 @@ def build_lazy_ie(ie, name):
s += make_valid_template.format(valid_url=ie._make_valid_url())
return s
# find the correct sorting and add the required base classes so that sublcasses
# can be correctly created
classes = _ALL_CLASSES[:-1]

View File

@ -41,5 +41,6 @@ def main():
with io.open(outfile, 'w', encoding='utf-8') as outf:
outf.write(out)
if __name__ == '__main__':
main()

View File

@ -74,5 +74,6 @@ def filter_options(readme):
return ret
if __name__ == '__main__':
main()

View File

@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
ROOT=$(pwd)
python devscripts/create-github-release.py $version "$ROOT/build/$version"
python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"

View File

@ -44,5 +44,6 @@ def build_completion(opt_parser):
with open(ZSH_COMPLETION_FILE, "w") as f:
f.write(template)
parser = youtube_dl.parseOpts()[0]
build_completion(parser)

View File

@ -131,7 +131,8 @@
- **cbsnews**: CBS News
- **cbsnews:livevideo**: CBS News Live Videos
- **CBSSports**
- **CCTV**
- **CCMA**
- **CCTV**: 央视网
- **CDA**
- **CeskaTelevize**
- **channel9**: Channel 9
@ -158,6 +159,7 @@
- **CollegeRama**
- **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralFullEpisodes**
- **ComedyCentralShortname**
- **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
@ -225,6 +227,7 @@
- **EroProfile**
- **Escapist**
- **ESPN**
- **ESPNArticle**
- **EsriVideo**
- **Europa**
- **EveryonesMixtape**
@ -247,6 +250,7 @@
- **FootyRoom**
- **Formula1**
- **FOX**
- **FOX9**
- **Foxgay**
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
@ -259,7 +263,6 @@
- **francetvinfo.fr**
- **Freesound**
- **freespeech.org**
- **FreeVideo**
- **Funimation**
- **FunnyOrDie**
- **Fusion**
@ -361,7 +364,8 @@
- **kuwo:singer**: 酷我音乐 - 歌手
- **kuwo:song**: 酷我音乐
- **la7.it**
- **Laola1Tv**
- **laola1tv**
- **laola1tv:embed**
- **LCI**
- **Lcp**
- **LcpPlay**
@ -399,6 +403,8 @@
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Meipai**: 美拍
- **MelonVOD**
- **META**
- **metacafe**
- **Metacritic**
@ -510,6 +516,8 @@
- **NRKPlaylist**
- **NRKSkole**: NRK Skole
- **NRKTV**: NRK TV and NRK Radio
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
- **NRKTVEpisodes**
- **ntv.ru**
- **Nuvid**
- **NYTimes**
@ -520,6 +528,7 @@
- **Odnoklassniki**
- **OktoberfestTV**
- **on.aol.com**
- **OnDemandKorea**
- **onet.tv**
- **onet.tv:channel**
- **OnionStudios**
@ -543,6 +552,7 @@
- **PhilharmonieDeParis**: Philharmonie de Paris
- **phoenix.de**
- **Photobucket**
- **Piksel**
- **Pinkbike**
- **Pladform**
- **play.fm**
@ -641,7 +651,6 @@
- **Screencast**
- **ScreencastOMatic**
- **ScreenJunkies**
- **ScreenwaveMedia**
- **Seeker**
- **SenateISVP**
- **SendtoNews**
@ -650,6 +659,7 @@
- **Shahid**
- **Shared**: shared.sx
- **ShareSix**
- **ShowRoomLive**
- **Sina**
- **SixPlay**
- **skynewsarabia:article**
@ -713,7 +723,7 @@
- **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel**
- **Teamcoco**
- **TeamFour**
- **TeamFourStar**
- **TechTalks**
- **techtv.mit.edu**
- **ted**
@ -769,6 +779,8 @@
- **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se
- **TVANouvelles**
- **TVANouvellesArticle**
- **TVC**
- **TVCArticle**
- **tvigle**: Интернет-телевидение Tvigle.ru
@ -780,10 +792,13 @@
- **Tweakers**
- **twitch:chapter**
- **twitch:clips**
- **twitch:past_broadcasts**
- **twitch:profile**
- **twitch:stream**
- **twitch:video**
- **twitch:videos:all**
- **twitch:videos:highlights**
- **twitch:videos:past-broadcasts**
- **twitch:videos:uploads**
- **twitch:vod**
- **twitter**
- **twitter:amplify**
@ -791,6 +806,7 @@
- **udemy**
- **udemy:course**
- **UDNEmbed**: 聯合影音
- **UKTVPlay**
- **Unistra**
- **uol.com.br**
- **uplynk**
@ -819,6 +835,7 @@
- **ViceShow**
- **Vidbit**
- **Viddler**
- **Videa**
- **video.google:search**: Google Video search
- **video.mit.edu**
- **VideoDetective**
@ -855,6 +872,9 @@
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- **Viu**
- **viu:ott**
- **viu:playlist**
- **Vivo**: vivo.sx
- **vk**: VK
- **vk:uservideos**: VK - User's Videos
@ -869,7 +889,9 @@
- **VRT**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
- **VyboryMos**
- **Vzaar**
- **Walla**
- **washingtonpost**
- **washingtonpost:article**
@ -877,6 +899,8 @@
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**
- **Webcaster**
- **WebcasterFeed**
- **WebOfStories**
- **WebOfStoriesPlaylist**
- **WeiqiTV**: WQTV

View File

@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
if __name__ == '__main__':
unittest.main()

View File

@ -51,5 +51,6 @@ class TestAES(unittest.TestCase):
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
if __name__ == '__main__':
unittest.main()

View File

@ -60,6 +60,7 @@ def _file_md5(fn):
with open(fn, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
defs = gettestcases()
@ -217,6 +218,7 @@ def generator(test_case):
return test_template
# And add them to TestDownload
for n, test_case in enumerate(defs):
test_method = generator(test_case)

View File

@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase):
_, stderr = p.communicate()
self.assertFalse(stderr)
if __name__ == '__main__':
unittest.main()

View File

@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase):
# b'xn--fiq228c' is '中文'.encode('idna')
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
if __name__ == '__main__':
unittest.main()

View File

@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase):
ie._login()
self.assertTrue('unable to log in:' in logger.messages[0])
if __name__ == '__main__':
unittest.main()

View File

@ -104,6 +104,14 @@ class TestJSInterpreter(unittest.TestCase):
}''')
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
def test_call(self):
jsi = JSInterpreter('''
function x() { return 2; }
function y(a) { return x() + a; }
function z() { return y(3); }
''')
self.assertEqual(jsi.call_function('z'), 5)
if __name__ == '__main__':
unittest.main()

View File

@ -70,6 +70,7 @@ from youtube_dl.utils import (
lowercase_escape,
url_basename,
base_url,
urljoin,
urlencode_postdata,
urshift,
update_url_query,
@ -445,6 +446,23 @@ class TestUtil(unittest.TestCase):
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
def test_urljoin(self):
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
self.assertEqual(urljoin('http://foo.de/', None), None)
self.assertEqual(urljoin('http://foo.de/', ''), None)
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None)
@ -1075,5 +1093,6 @@ The first line
self.assertEqual(get_element_by_class('foo', html), 'nice')
self.assertEqual(get_element_by_class('no-such-class', html), None)
if __name__ == '__main__':
unittest.main()

View File

@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase):
self.assertTrue(b'-p' in serr)
self.assertTrue(b'secret' not in serr)
if __name__ == '__main__':
unittest.main()

View File

@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL):
super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen
params = get_params({
'writeannotations': True,
'skip_download': True,
@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase):
def tearDown(self):
try_rm(ANNOTATIONS_FILE)
if __name__ == '__main__':
unittest.main()

View File

@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase):
for entry in result['entries']:
self.assertTrue(entry.get('title'))
if __name__ == '__main__':
unittest.main()

View File

@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig):
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
setattr(TestSignature, test_func.__name__, test_func)
for test_spec in _TESTS:
make_tfunc(*test_spec)

View File

@ -95,8 +95,7 @@ def _real_main(argv=None):
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except IOError:
sys.exit('ERROR: batch file could not be read')
all_urls = batch_urls + args
all_urls = [url.strip() for url in all_urls]
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
@ -406,7 +405,7 @@ def _real_main(argv=None):
'postprocessor_args': postprocessor_args,
'cn_verification_proxy': opts.cn_verification_proxy,
'geo_verification_proxy': opts.geo_verification_proxy,
'config_location': opts.config_location,
}
with YoutubeDL(ydl_opts) as ydl:
@ -450,4 +449,5 @@ def main(argv=None):
except KeyboardInterrupt:
sys.exit('\nERROR: Interrupted by user')
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']

View File

@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
return plaintext
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
@ -328,4 +329,5 @@ def inc(data):
break
return data
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']

View File

@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161():
return real_add_option(self, *bargs, **bkwargs)
optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:

View File

@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD):
class AVconvFD(FFmpegFD):
pass
_BY_NAME = dict(
(klass.get_basename(), klass)
for name, klass in globals().items()

View File

@ -314,7 +314,8 @@ class F4mFD(FragmentFD):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(man_url)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
@ -387,7 +388,10 @@ class F4mFD(FragmentFD):
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
try:
success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
success = ctx['dl'].download(frag_filename, {
'url': url_parsed.geturl(),
'http_headers': info_dict.get('http_headers'),
})
if not success:
return False
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')

View File

@ -9,6 +9,7 @@ from ..utils import (
error_to_compat_str,
encodeFilename,
sanitize_open,
sanitized_Request,
)
@ -37,6 +38,10 @@ class FragmentFD(FileDownloader):
def report_skip_fragment(self, fragment_name):
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
def _prepare_and_start_frag_download(self, ctx):
self._prepare_frag_download(ctx)
self._start_frag_download(ctx)

View File

@ -59,11 +59,15 @@ class HlsFD(FragmentFD):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
manifest = self.ydl.urlopen(man_url).read()
manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
s = manifest.decode('utf-8', 'ignore')
if not self.can_download(s, info_dict):
if info_dict.get('extra_param_to_segment_url'):
self.report_error('pycrypto not found. Please install it.')
return False
self.report_warning(
'hlsnative has detected features it does not support, '
'extraction will be delegated to ffmpeg')
@ -112,7 +116,10 @@ class HlsFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
success = ctx['dl'].download(frag_filename, {'url': frag_url})
success = ctx['dl'].download(frag_filename, {
'url': frag_url,
'http_headers': info_dict.get('http_headers'),
})
if not success:
return False
down, frag_sanitized = sanitize_open(frag_filename, 'rb')

View File

@ -129,7 +129,7 @@ def write_piff_header(stream, params):
sample_entry_payload += u1616.pack(params['sampling_rate'])
if fourcc == 'AACL':
smaple_entry_box = box(b'mp4a', sample_entry_payload)
sample_entry_box = box(b'mp4a', sample_entry_payload)
else:
sample_entry_payload = sample_entry_payload
sample_entry_payload += u16.pack(0) # pre defined
@ -149,9 +149,7 @@ def write_piff_header(stream, params):
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
avcc_payload += sps[1] # avc profile indication
avcc_payload += sps[2] # profile compatibility
avcc_payload += sps[3] # avc level indication
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
avcc_payload += u16.pack(len(sps))
@ -160,8 +158,8 @@ def write_piff_header(stream, params):
avcc_payload += u16.pack(len(pps))
avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
smaple_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
stsd_payload += smaple_entry_box
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box

View File

@ -8,6 +8,7 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_iso8601,
OnDemandPagedList,
)
@ -15,18 +16,33 @@ from ..utils import (
class ACastIE(InfoExtractor):
IE_NAME = 'acast'
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
_TEST = {
_TESTS = [{
# test with one bling
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
'md5': 'ada3de5a1e3a2a381327d749854788bb',
'info_dict': {
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
'ext': 'mp3',
'title': '"Where Are You?": Taipei 101, Taiwan',
'timestamp': 1196172000000,
'timestamp': 1196172000,
'upload_date': '20071127',
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
'duration': 211,
}
}
}, {
# test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': '55c0097badd7095f494c99a172f86501',
'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3',
'title': '2. Raggarmordet - Röster ur det förflutna',
'timestamp': 1477346700,
'upload_date': '20161024',
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
'duration': 2797,
}
}]
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
@ -35,11 +51,11 @@ class ACastIE(InfoExtractor):
return {
'id': compat_str(cast_data['id']),
'display_id': display_id,
'url': cast_data['blings'][0]['audio'],
'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
'title': cast_data['name'],
'description': cast_data.get('description'),
'thumbnail': cast_data.get('image'),
'timestamp': int_or_none(cast_data.get('publishingDate')),
'timestamp': parse_iso8601(cast_data.get('publishingDate')),
'duration': int_or_none(cast_data.get('duration')),
}

View File

@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE):
query = {
'mbr': 'true',
'assetTypes': 'medium_video_s3'
'assetTypes': 'high_video_s3'
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE):
'id': 'world-war-i-history',
'title': 'World War I History',
},
'playlist_mincount': 24,
'playlist_mincount': 23,
}, {
'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True,
@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE):
return self.theplatform_url_result(
release_url, video_id, {
'mbr': 'true',
'switch': 'hls'
'switch': 'hls',
'assetTypes': 'high_video_ak',
})
else:
webpage = self._download_webpage(url, topic_id)
@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE):
entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true',
'switch': 'hls'
'switch': 'hls',
'assetTypes': 'high_video_ak',
}))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))

View File

@ -11,6 +11,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
update_url_query,
xpath_element,
xpath_text,
)
@ -18,12 +19,18 @@ from ..utils import (
class AfreecaTVIE(InfoExtractor):
IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x)^
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html))
\?.*?\bnTitleNo=(?P<id>\d+)'''
_VALID_URL = r'''(?x)
https?://
(?:
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/PLAYER/STATION/
)
(?P<id>\d+)
'''
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor):
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
'only_matching': True,
}]
@staticmethod
@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor):
info_url = compat_urlparse.urlunparse(parsed_url._replace(
netloc='afbbs.afreecatv.com:8080',
path='/api/video/get_video_info.php'))
video_xml = self._download_xml(info_url, video_id)
video_xml = self._download_xml(
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
if xpath_element(video_xml, './track/video/file') is None:
raise ExtractorError('Specified AfreecaTV video does not exist',

View File

@ -10,7 +10,7 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '',
@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE):
}, {
'url': 'http://www.ifc.com/movies/chaos',
'only_matching': True,
}, {
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -157,22 +157,16 @@ class AnvatoIE(InfoExtractor):
video_data_url, video_id, transform_source=strip_jsonp,
data=json.dumps(payload).encode('utf-8'))
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
video_id = anvplayer_data['video']
access_key = anvplayer_data['accessKey']
def _get_anvato_videos(self, access_key, video_id):
video_data = self._get_video_json(access_key, video_id)
formats = []
for published_url in video_data['published_urls']:
video_url = published_url['embed_url']
media_format = published_url.get('format')
ext = determine_ext(video_url)
if ext == 'smil':
if ext == 'smil' or media_format == 'smil':
formats.extend(self._extract_smil_formats(video_url, video_id))
continue
@ -183,7 +177,7 @@ class AnvatoIE(InfoExtractor):
'tbr': tbr if tbr != 0 else None,
}
if ext == 'm3u8':
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
# Not using _extract_m3u8_formats here as individual media
# playlists are also included in published_urls.
if tbr is None:
@ -194,7 +188,7 @@ class AnvatoIE(InfoExtractor):
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
elif ext == 'mp3':
elif ext == 'mp3' or media_format == 'mp3':
a_format['vcodec'] = 'none'
else:
a_format.update({
@ -218,7 +212,19 @@ class AnvatoIE(InfoExtractor):
'formats': formats,
'title': video_data.get('def_title'),
'description': video_data.get('def_description'),
'tags': video_data.get('def_tags', '').split(','),
'categories': video_data.get('categories'),
'thumbnail': video_data.get('thumbnail'),
'timestamp': int_or_none(video_data.get(
'ts_published') or video_data.get('ts_added')),
'uploader': video_data.get('mcp_id'),
'duration': int_or_none(video_data.get('duration')),
'subtitles': subtitles,
}
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
return self._get_anvato_videos(
anvplayer_data['accessKey'], anvplayer_data['video'])

View File

@ -178,8 +178,6 @@ class ARDMediathekIE(InfoExtractor):
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
'Video %s is no longer available'),
('Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.',
'This program is only suitable for those aged 12 and older. Video %s is therefore only available between 8 pm and 6 am.'),
)
for pattern, message in ERRORS:

View File

@ -4,8 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
mimetype2ext,
@ -15,7 +17,13 @@ from ..utils import (
class ArkenaIE(InfoExtractor):
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
_VALID_URL = r'''(?x)
https?://
(?:
video\.arkena\.com/play2/embed/player\?|
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
)
'''
_TESTS = [{
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
@ -37,6 +45,9 @@ class ArkenaIE(InfoExtractor):
}, {
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
'only_matching': True,
}, {
'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled',
'only_matching': True,
}]
@staticmethod
@ -53,6 +64,14 @@ class ArkenaIE(InfoExtractor):
video_id = mobj.group('id')
account_id = mobj.group('account_id')
# Handle http://video.arkena.com/play2/embed/player URL
if not video_id:
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = qs.get('mediaId', [None])[0]
account_id = qs.get('accountId', [None])[0]
if not video_id or not account_id:
raise ExtractorError('Invalid URL', expected=True)
playlist = self._download_json(
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
% (video_id, account_id),

View File

@ -6,8 +6,8 @@ from ..utils import float_or_none
class AudioBoomIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
'info_dict': {
@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor):
'uploader': 'Steve Czaban',
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
}
}
}, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@ -11,7 +11,7 @@ from ..utils import (
class AzubuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor):
class AzubuLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$'
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
_TEST = {
_TESTS = [{
'url': 'http://www.azubu.tv/MarsTVMDLen',
'only_matching': True,
}
}, {
'url': 'http://azubu.uol.com.br/adolfz',
'only_matching': True,
}]
def _real_extract(self, url):
user = self._match_id(url)

View File

@ -1,7 +1,9 @@
from __future__ import unicode_literals
import json
import random
import re
import time
from .common import InfoExtractor
from ..compat import (
@ -12,6 +14,9 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_filesize,
unescapeHTML,
update_url_query,
)
@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor):
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
webpage, 'video id')
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
# We get the dictionary of the track from some javascript code
all_info = self._parse_json(self._search_regex(
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
info = all_info[0]
# We pick mp3-320 for now, until format selection can be easily implemented.
mp3_info = info['downloads']['mp3-320']
# If we try to use this url it says the link has expired
initial_url = mp3_info['url']
m_url = re.match(
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
initial_url)
# We build the url we will use to get the final track url
# This url is build in Bandcamp in the script download_bunde_*.js
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
# If we could correctly generate the .rand field the url would be
# in the "download_url" key
final_url = self._proto_relative_url(self._search_regex(
r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:')
download_webpage = self._download_webpage(
download_link, video_id, 'Downloading free downloads page')
blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'),
video_id, transform_source=unescapeHTML)
info = blob['digital_items'][0]
downloads = info['downloads']
track = info['title']
artist = info.get('artist')
title = '%s - %s' % (artist, track) if artist else track
download_formats = {}
for f in blob['download_formats']:
name, ext = f.get('name'), f.get('file_extension')
if all(isinstance(x, compat_str) for x in (name, ext)):
download_formats[name] = ext.strip('.')
formats = []
for format_id, f in downloads.items():
format_url = f.get('url')
if not format_url:
continue
# Stat URL generation algorithm is reverse engineered from
# download_*_bundle_*.js
stat_url = update_url_query(
format_url.replace('/download/', '/statdownload/'), {
'.rand': int(time.time() * 1000 * random.random()),
})
format_id = f.get('encoding_name') or format_id
stat = self._download_json(
stat_url, video_id, 'Downloading %s JSON' % format_id,
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
fatal=False)
if not stat:
continue
retry_url = stat.get('retry_url')
if not isinstance(retry_url, compat_str):
continue
formats.append({
'url': self._proto_relative_url(retry_url, 'http:'),
'ext': download_formats.get(format_id),
'format_id': format_id,
'format_note': f.get('description'),
'filesize': parse_filesize(f.get('size_mb')),
'vcodec': 'none',
})
self._sort_formats(formats)
return {
'id': video_id,
'title': info['title'],
'ext': 'mp3',
'vcodec': 'none',
'url': final_url,
'title': title,
'thumbnail': info.get('thumb_url'),
'uploader': info.get('artist'),
'artist': artist,
'track': track,
'formats': formats,
}

View File

@ -45,7 +45,8 @@ class BloombergIE(InfoExtractor):
name = self._match_id(url)
webpage = self._download_webpage(url, name)
video_id = self._search_regex(
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
webpage, 'id', group='url', default=None)
if not video_id:
bplayer_data = self._parse_json(self._search_regex(

View File

@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(
r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
webpage)
r'''(?x)
<meta\s+
(?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
''', webpage)
if url_m:
url = unescapeHTML(url_m.group(1))
url = unescapeHTML(url_m.group('url'))
# Some sites don't add it, we can't download with this url, for example:
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
if 'playerKey' in url or 'videoId' in url:
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
return [url]
matches = re.findall(
@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor):
url, smuggled_data = unsmuggle_url(url, {})
# Change the 'videoId' and others field to '@videoPlayer'
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url)
@ -548,7 +551,7 @@ class BrightcoveNewIE(InfoExtractor):
container = source.get('container')
ext = mimetype2ext(source.get('type'))
src = source.get('src')
if ext == 'ism':
if ext == 'ism' or container == 'WVM':
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:

View File

@ -105,7 +105,8 @@ class CanalplusIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
r'id=["\']canal_video_player(?P<id>\d+)'],
r'id=["\']canal_video_player(?P<id>\d+)',
r'data-video=["\'](?P<id>\d+)'],
webpage, 'video id', group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)

View File

@ -89,6 +89,9 @@ class CanvasIE(InfoExtractor):
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, display_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id=format_type, fatal=False))
else:
formats.append({
'format_id': format_type,

View File

@ -283,11 +283,6 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
if len(formats) < 2:
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
# Despite metadata in m3u8 all video+audio formats are
# actually video-only (no audio)
for f in formats:
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
f['acodec'] = 'none'
self._sort_formats(formats)
info = {

View File

@ -4,11 +4,14 @@ from __future__ import unicode_literals
from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse
from ..utils import unified_timestamp
from ..utils import (
parse_iso8601,
unified_timestamp,
)
class CBSLocalIE(AnvatoIE):
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
_TESTS = [{
# Anvato backend
@ -22,6 +25,7 @@ class CBSLocalIE(AnvatoIE):
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
@ -35,6 +39,7 @@ class CBSLocalIE(AnvatoIE):
'Syndication\\Curb.tv',
'Content\\News'
],
'tags': ['CBS 2 News Evening'],
},
}, {
# SendtoNews embed
@ -47,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': 1479962220,
'upload_date': '20161124',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
}]
def _real_extract(self, url):
@ -62,8 +92,11 @@ class CBSLocalIE(AnvatoIE):
info_dict = self._extract_anvato_videos(webpage, display_id)
time_str = self._html_search_regex(
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
timestamp = unified_timestamp(time_str)
r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
if time_str:
timestamp = unified_timestamp(time_str)
else:
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
info_dict.update({
'display_id': display_id,

View File

@ -0,0 +1,99 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
clean_html,
)
class CCMAIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': {
'id': '5630208',
'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1470918540,
'upload_date': '20160811',
}
}, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425',
'info_dict': {
'id': '943685',
'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20171205',
'timestamp': 1512507300,
}
}]
def _real_extract(self, url):
media_type, media_id = re.match(self._VALID_URL, url).groups()
media_data = {}
formats = []
profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
for i, profile in enumerate(profiles):
md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type,
'idint': media_id,
'profile': profile,
}, fatal=False)
if md:
media_data = md
media_url = media_data.get('media', {}).get('url')
if media_url:
formats.append({
'format_id': profile,
'url': media_url,
'quality': i,
})
self._sort_formats(formats)
informacio = media_data['informacio']
title = informacio['titol']
durada = informacio.get('durada', {})
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
subtitles = {}
subtitols = media_data.get('subtitols', {})
if subtitols:
sub_url = subtitols.get('url')
if sub_url:
subtitles.setdefault(
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
'url': sub_url,
})
thumbnails = []
imatges = media_data.get('imatges', {})
if imatges:
thumbnail_url = imatges.get('url')
if thumbnail_url:
thumbnails = [{
'url': thumbnail_url,
'width': int_or_none(imatges.get('amplada')),
'height': int_or_none(imatges.get('alcada')),
}]
return {
'id': media_id,
'title': title,
'description': clean_html(informacio.get('descripcio')),
'duration': duration,
'timestamp': timestamp,
'thumnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}

View File

@ -4,27 +4,84 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import float_or_none
from ..compat import compat_str
from ..utils import (
float_or_none,
try_get,
unified_timestamp,
)
class CCTVIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:.+?\.)?
(?:
cctv\.(?:com|cn)|
cntv\.cn
)/
(?:
video/[^/]+/(?P<id>[0-9a-f]{32})|
\d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
)'''
IE_DESC = '央视网'
_VALID_URL = r'https?://(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)'
_TESTS = [{
'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
'md5': '819c7b49fc3927d529fb4cd555621823',
'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml',
'md5': 'd61ec00a493e09da810bf406a078f691',
'info_dict': {
'id': '454368eb19ad44a1925bf1eb96140a61',
'id': '5ecdbeab623f4973b40ff25f18b174e8',
'ext': 'mp4',
'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
}
'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)',
'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95',
'duration': 98,
'uploader': 'songjunjie',
'timestamp': 1455279956,
'upload_date': '20160212',
},
}, {
'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml',
'info_dict': {
'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae',
'ext': 'mp4',
'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)',
'description': '2月4日蒙特泽莫罗透露了关于“车王”舒马赫恢复情况但情况是否属实遭到了质疑。',
'duration': 37,
'uploader': 'shujun',
'timestamp': 1454677291,
'upload_date': '20160205',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml',
'info_dict': {
'id': '4bb9bb4db7a6471ba85fdeda5af0381e',
'ext': 'mp4',
'title': 'NHnews008 ANNUAL POLITICAL SEASON',
'description': 'Four Comprehensives',
'duration': 60,
'uploader': 'zhangyunlei',
'timestamp': 1425385521,
'upload_date': '20150303',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml',
'info_dict': {
'id': 'b15f009ff45c43968b9af583fc2e04b2',
'ext': 'mp4',
'title': 'Путь,усыпанный космеями Серия 1',
'description': 'Путь, усыпанный космеями',
'duration': 2645,
'uploader': 'renxue',
'timestamp': 1477479241,
'upload_date': '20161026',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
'only_matching': True,
}, {
'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44',
'only_matching': True,
}, {
'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
'only_matching': True,
}, {
'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
'only_matching': True,
@ -34,20 +91,63 @@ class CCTVIE(InfoExtractor):
}]
def _real_extract(self, url):
video_id, display_id = re.match(self._VALID_URL, url).groups()
if not video_id:
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
webpage, 'video_id')
api_data = self._download_json(
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
[r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
r'"changePlayer\s*\(\s*["\']([\da-fA-F]+)',
r'"load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)'],
webpage, 'video id')
data = self._download_json(
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id,
query={
'pid': video_id,
'url': url,
'idl': 32,
'idlr': 32,
'modifyed': 'false',
})
title = data['title']
formats = []
video = data.get('video')
if isinstance(video, dict):
for quality, chapters_key in enumerate(('lowChapters', 'chapters')):
video_url = try_get(
video, lambda x: x[chapters_key][0]['url'], compat_str)
if video_url:
formats.append({
'url': video_url,
'format_id': 'http',
'quality': quality,
'preference': -1,
})
hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
if hls_url:
hls_url = re.sub(r'maxbr=\d+&?', '', hls_url)
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
uploader = data.get('editer_name')
description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(data.get('f_pgmtime'))
duration = float_or_none(try_get(video, lambda x: x['totalLength']))
return {
'id': video_id,
'title': api_data['title'],
'formats': self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
'title': title,
'description': description,
'uploader': uploader,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}

View File

@ -5,14 +5,16 @@ import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
ExtractorError,
parse_duration
float_or_none,
int_or_none,
parse_duration,
)
class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_BASE_URL = 'http://www.cda.pl/'
_TESTS = [{
'url': 'http://www.cda.pl/video/5749950c',
'md5': '6f844bf51b15f31fae165365707ae970',
@ -21,6 +23,9 @@ class CDAIE(InfoExtractor):
'ext': 'mp4',
'height': 720,
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
'description': 'md5:269ccd135d550da90d1662651fcb9772',
'thumbnail': 're:^https?://.*\.jpg$',
'average_rating': float,
'duration': 39
}
}, {
@ -30,6 +35,11 @@ class CDAIE(InfoExtractor):
'id': '57413289',
'ext': 'mp4',
'title': 'Lądowanie na lotnisku na Maderze',
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'crash404',
'view_count': int,
'average_rating': float,
'duration': 137
}
}, {
@ -39,31 +49,55 @@ class CDAIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage(
self._BASE_URL + '/video/' + video_id, video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
formats = []
uploader = self._search_regex(r'''(?x)
<(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
''', webpage, 'uploader', default=None, group='uploader')
view_count = self._search_regex(
r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
'view_count', default=None)
average_rating = self._search_regex(
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
webpage, 'rating', fatal=False, group='rating_value')
info_dict = {
'id': video_id,
'title': title,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'uploader': uploader,
'view_count': int_or_none(view_count),
'average_rating': float_or_none(average_rating),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'duration': None,
}
def extract_format(page, version):
unpacked = decode_packed_codes(page)
format_url = self._search_regex(
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
'%s url' % version, fatal=False, group='url')
if not format_url:
json_str = self._search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
'%s player_json' % version, fatal=False, group='player_data')
if not json_str:
return
player_data = self._parse_json(
json_str, '%s player_data' % version, fatal=False)
if not player_data:
return
video = player_data.get('video')
if not video or 'file' not in video:
self.report_warning('Unable to extract %s version information' % version)
return
f = {
'url': format_url,
'url': video['file'],
}
m = re.search(
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
@ -75,9 +109,7 @@ class CDAIE(InfoExtractor):
})
info_dict['formats'].append(f)
if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex(
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
unpacked, 'duration', fatal=False, group='duration'))
info_dict['duration'] = parse_duration(video.get('duration'))
extract_format(webpage, 'default')
@ -85,7 +117,8 @@ class CDAIE(InfoExtractor):
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
webpage):
webpage = self._download_webpage(
href, video_id, 'Downloading %s version information' % resolution, fatal=False)
self._BASE_URL + href, video_id,
'Downloading %s version information' % resolution, fatal=False)
if not webpage:
# Manually report warning because empty page is returned when
# invalid version is requested.

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(video-clips|episodes|cc-studios|video-collections|full-episodes|shows)
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@ -27,6 +27,40 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
}]
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(?:full-episodes|shows(?=/[^/]+/full-episodes))
/(?P<id>[^?]+)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
'info_dict': {
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
'title': 'November 28, 2016 - Ryan Speedo Green',
},
'playlist_count': 4,
}, {
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed')
feed = self._parse_json(feed_json, playlist_id)
zones = feed['manifest']['zones']
video_zone = zones['t2_lc_promo1']
feed = self._download_json(video_zone['feed'], playlist_id)
mgid = feed['result']['data']['id']
videos_info = self._get_videos_info(mgid)
return videos_info
class ToshIE(MTVServicesInfoExtractor):
IE_DESC = 'Tosh.0'
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'

View File

@ -59,6 +59,7 @@ from ..utils import (
parse_m3u8_attributes,
extract_attributes,
parse_codecs,
urljoin,
)
@ -188,9 +189,10 @@ class InfoExtractor(object):
uploader_url: Full URL to a personal webpage of the video uploader.
location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format
{language: subformats}. "subformats" is a list sorted from
lower to higher preference, each element is a dictionary
with the "ext" entry and one of:
{tag: subformats}. "tag" is usually a language code, and
"subformats" is a list sorted from lower to higher
preference, each element is a dictionary with the "ext"
entry and one of:
* "data": The subtitles file contents
* "url": A URL pointing to the subtitles file
"ext" will be calculated from URL if missing
@ -886,7 +888,7 @@ class InfoExtractor(object):
'url': e.get('contentUrl'),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'thumbnail': e.get('thumbnailUrl'),
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')),
@ -1224,6 +1226,7 @@ class InfoExtractor(object):
'protocol': entry_protocol,
'preference': preference,
}]
audio_in_video_stream = {}
last_info = {}
last_media = {}
for line in m3u8_doc.splitlines():
@ -1233,25 +1236,32 @@ class InfoExtractor(object):
media = parse_m3u8_attributes(line)
media_type = media.get('TYPE')
if media_type in ('VIDEO', 'AUDIO'):
group_id = media.get('GROUP-ID')
media_url = media.get('URI')
if media_url:
format_id = []
for v in (media.get('GROUP-ID'), media.get('NAME')):
for v in (group_id, media.get('NAME')):
if v:
format_id.append(v)
formats.append({
f = {
'format_id': '-'.join(format_id),
'url': format_url(media_url),
'language': media.get('LANGUAGE'),
'vcodec': 'none' if media_type == 'AUDIO' else None,
'ext': ext,
'protocol': entry_protocol,
'preference': preference,
})
}
if media_type == 'AUDIO':
f['vcodec'] = 'none'
if group_id and not audio_in_video_stream.get(group_id):
audio_in_video_stream[group_id] = False
formats.append(f)
else:
# When there is no URI in EXT-X-MEDIA let this tag's
# data be used by regular URI lines below
last_media = media
if media_type == 'AUDIO' and group_id:
audio_in_video_stream[group_id] = True
elif line.startswith('#') or not line.strip():
continue
else:
@ -1280,9 +1290,10 @@ class InfoExtractor(object):
}
resolution = last_info.get('RESOLUTION')
if resolution:
width_str, height_str = resolution.split('x')
f['width'] = int(width_str)
f['height'] = int(height_str)
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
if mobj:
f['width'] = int(mobj.group('width'))
f['height'] = int(mobj.group('height'))
# Unified Streaming Platform
mobj = re.search(
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
@ -1294,6 +1305,9 @@ class InfoExtractor(object):
'abr': abr,
})
f.update(parse_codecs(last_info.get('CODECS')))
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
# TODO: update acodec for for audio only formats with the same GROUP-ID
f['acodec'] = 'none'
formats.append(f)
last_info = {}
last_media = {}
@ -1623,11 +1637,6 @@ class InfoExtractor(object):
extract_Initialization(segment_template)
return ms_info
def combine_url(base_url, target_url):
if re.match(r'^https?://', target_url):
return target_url
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = []
for period in mpd_doc.findall(_add_ns('Period')):
@ -1677,12 +1686,11 @@ class InfoExtractor(object):
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
'fps': int_or_none(representation_attrib.get('frameRate')),
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
'format_note': 'DASH %s' % content_type,
'filesize': filesize,
}
f.update(parse_codecs(representation_attrib.get('codecs')))
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
@ -1702,7 +1710,7 @@ class InfoExtractor(object):
representation_ms_info['fragments'] = [{
'url': media_template % {
'Number': segment_number,
'Bandwidth': representation_attrib.get('bandwidth'),
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
},
'duration': segment_duration,
} for segment_number in range(
@ -1720,7 +1728,7 @@ class InfoExtractor(object):
def add_segment_url():
segment_url = media_template % {
'Time': segment_time,
'Bandwidth': representation_attrib.get('bandwidth'),
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
'Number': segment_number,
}
representation_ms_info['fragments'].append({
@ -1766,7 +1774,7 @@ class InfoExtractor(object):
f['fragments'].append({'url': initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
for fragment in f['fragments']:
fragment['url'] = combine_url(base_url, fragment['url'])
fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats
@ -1880,7 +1888,7 @@ class InfoExtractor(object):
})
return formats
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
@ -1897,11 +1905,16 @@ class InfoExtractor(object):
def _media_formats(src, cur_media_type):
full_url = absolute_url(src)
if determine_ext(full_url) == 'm3u8':
ext = determine_ext(full_url)
if ext == 'm3u8':
is_plain_url = False
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
full_url, video_id, mpd_id=mpd_id)
else:
is_plain_url = True
formats = [{

View File

@ -236,7 +236,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
output += """ScaledBorderAndShadow: yes
output += """ScaledBorderAndShadow: no
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding

View File

@ -8,7 +8,7 @@ from ..utils import orderedSet
class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
@ -40,6 +40,9 @@ class CTVNewsIE(InfoExtractor):
}, {
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
'only_matching': True,
}, {
'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -8,6 +8,7 @@ import time
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
USER_AGENTS,
int_or_none,
update_url_query,
)
@ -102,10 +103,16 @@ class DPlayIE(InfoExtractor):
manifest_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
# ourselves
# ourselves. Also fragments' URLs are only served signed for
# Safari user agent.
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
for m3u8_format in m3u8_formats:
m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
m3u8_format.update({
'url': update_url_query(m3u8_format['url'], query),
'http_headers': {
'User-Agent': USER_AGENTS['Safari'],
},
})
formats.extend(m3u8_formats)
elif protocol == 'hds':
formats.extend(self._extract_f4m_formats(

View File

@ -10,8 +10,8 @@ from ..utils import (
class DrTuberIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
'info_dict': {
@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
}, {
'url': 'http://www.drtuber.com/embed/489939',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
webpage)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
webpage = self._download_webpage(
'http://www.drtuber.com/video/%s' % video_id, display_id)
video_url = self._html_search_regex(
r'<source src="([^"]+)"', webpage, 'video URL')
title = self._html_search_regex(
(r'class="title_watch"[^>]*><p>([^<]+)<',
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
r'<p[^>]+class="title_substrate">([^<]+)</p>',
r'<title>([^<]+) - \d+'),
webpage, 'title')

View File

@ -4,11 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
url_basename,
)
@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
try:
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
except ExtractorError as ee:
@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor):
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id,
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
mp4_url = self._get_video_url(
m3u8_formats_dict = {}
for f in m3u8_formats:
if f.get('height') is not None:
m3u8_formats_dict[f['height']] = f
mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
video_id, 'Downloading mp4 JSON')
mp4_url_basename = url_basename(mp4_url)
for m3u8_format in m3u8_formats:
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
if mobj:
http_format = m3u8_format.copy()
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
if not self._is_valid_url(video_url, video_id):
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not isinstance(format_url, compat_str):
continue
http_format.update({
'url': video_url,
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(http_format)
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):
f = m3u8_formats_dict[height].copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
else:
f = {
'format_id': 'http-%s' % format_id,
'height': int_or_none(format_id),
}
f['url'] = format_url
formats.append(f)
self._sort_formats(formats)

View File

@ -1,38 +1,117 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
unified_timestamp,
)
class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079',
'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'id': '10365079',
'ext': 'mp4',
'title': '30 for 30 Shorts: Judging Jewell',
'description': None,
'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
'timestamp': 1390936111,
'upload_date': '20140128',
},
'params': {
'skip_download': True,
},
'add_ie': ['OoyalaExternal'],
}, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663',
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': {
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
'id': '2743663',
'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season',
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
'timestamp': 1449446454,
'upload_date': '20151207',
},
'params': {
'skip_download': True,
},
'add_ie': ['OoyalaExternal'],
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip/_/id/17989860',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
clip = self._download_json(
'http://api-app.espn.com/v1/video/clips/%s' % video_id,
video_id)['videos'][0]
title = clip['headline']
format_urls = set()
formats = []
def traverse_source(source, base_source_id=None):
for source_id, source in source.items():
if isinstance(source, compat_str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
source,
'%s-%s' % (base_source_id, source_id)
if base_source_id else source_id)
def extract_source(source_url, source_id=None):
if source_url in format_urls:
return
format_urls.add(source_url)
ext = determine_ext(source_url)
if ext == 'smil':
formats.extend(self._extract_smil_formats(
source_url, video_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
source_url, video_id, f4m_id=source_id, fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=source_id, fatal=False))
else:
formats.append({
'url': source_url,
'format_id': source_id,
})
traverse_source(clip['links']['source'])
self._sort_formats(formats)
description = clip.get('caption') or clip.get('description')
thumbnail = clip.get('thumbnail')
duration = int_or_none(clip.get('duration'))
timestamp = unified_timestamp(clip.get('originalPublishDate'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class ESPNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True,
}, {
@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
}, {
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
'only_matching': True,
}, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
webpage, 'video id', group='id')
cms = 'espn'
if 'data-source="intl"' in webpage:
cms = 'intl'
player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
player = self._download_webpage(
player_url, video_id)
pcode = self._search_regex(
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
title = remove_end(
self._og_search_title(webpage),
'- ESPN Video').strip()
return {
'_type': 'url_transparent',
'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
'ie_key': 'OoyalaExternal',
'title': title,
}
return self.url_result(
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())

View File

@ -150,6 +150,7 @@ from .cbsnews import (
)
from .cbssports import CBSSportsIE
from .ccc import CCCIE
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
@ -180,6 +181,7 @@ from .cnn import (
from .coub import CoubIE
from .collegerama import CollegeRamaIE
from .comedycentral import (
ComedyCentralFullEpisodesIE,
ComedyCentralIE,
ComedyCentralShortnameIE,
ComedyCentralTVIE,
@ -267,7 +269,10 @@ from .engadget import EngadgetIE
from .eporner import EpornerIE
from .eroprofile import EroProfileIE
from .escapist import EscapistIE
from .espn import ESPNIE
from .espn import (
ESPNIE,
ESPNArticleIE,
)
from .esri import EsriVideoIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
@ -296,6 +301,7 @@ from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
from .fourtube import FourTubeIE
from .fox import FOXIE
from .fox9 import FOX9IE
from .foxgay import FoxgayIE
from .foxnews import (
FoxNewsIE,
@ -314,7 +320,6 @@ from .francetv import (
)
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freevideo import FreeVideoIE
from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
@ -441,7 +446,10 @@ from .kuwo import (
KuwoMvIE,
)
from .la7 import LA7IE
from .laola1tv import Laola1TvIE
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
)
from .lci import LCIIE
from .lcp import (
LcpPlayIE,
@ -493,6 +501,8 @@ from .mangomolo import (
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
@ -644,6 +654,8 @@ from .nrk import (
NRKPlaylistIE,
NRKSkoleIE,
NRKTVIE,
NRKTVDirekteIE,
NRKTVEpisodesIE,
)
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
@ -656,6 +668,7 @@ from .nzz import NZZIE
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
from .ondemandkorea import OnDemandKoreaIE
from .onet import (
OnetIE,
OnetChannelIE,
@ -686,6 +699,7 @@ from .periscope import (
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pladform import PladformIE
from .playfm import PlayFMIE
@ -800,7 +814,6 @@ from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
from .screenjunkies import ScreenJunkiesIE
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
@ -812,6 +825,7 @@ from .shared import (
VivoIE,
)
from .sharesix import ShareSixIE
from .showroomlive import ShowRoomLiveIE
from .sina import SinaIE
from .sixplay import SixPlayIE
from .skynewsarabia import (
@ -893,6 +907,7 @@ from .teachertube import (
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele13 import Tele13IE
@ -961,6 +976,10 @@ from .tv2 import (
)
from .tv3 import TV3IE
from .tv4 import TV4IE
from .tvanouvelles import (
TVANouvellesIE,
TVANouvellesArticleIE,
)
from .tvc import (
TVCIE,
TVCArticleIE,
@ -989,7 +1008,10 @@ from .twitch import (
TwitchChapterIE,
TwitchVodIE,
TwitchProfileIE,
TwitchAllVideosIE,
TwitchUploadsIE,
TwitchPastBroadcastsIE,
TwitchHighlightsIE,
TwitchStreamIE,
TwitchClipsIE,
)
@ -1003,6 +1025,7 @@ from .udemy import (
UdemyCourseIE
)
from .udn import UDNEmbedIE
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .uol import UOLIE
@ -1042,6 +1065,7 @@ from .vice import (
from .viceland import VicelandIE
from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videa import VideaIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE
@ -1086,6 +1110,11 @@ from .viki import (
VikiIE,
VikiChannelIE,
)
from .viu import (
ViuIE,
ViuPlaylistIE,
ViuOTTIE,
)
from .vk import (
VKIE,
VKUserVideosIE,
@ -1100,7 +1129,9 @@ from .vporn import VpornIE
from .vrt import VRTIE
from .vube import VubeIE
from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
@ -1112,6 +1143,10 @@ from .wdr import (
WDRIE,
WDRMobileIE,
)
from .webcaster import (
WebcasterIE,
WebcasterFeedIE,
)
from .webofstories import (
WebOfStoriesIE,
WebOfStoriesPlaylistIE,

View File

@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
https?://
(?:[\w-]+\.)?facebook\.com/
(?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
(?:[^#]*?\#!/)?
(?:
(?:
@ -150,6 +150,9 @@ class FacebookIE(InfoExtractor):
}, {
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
'only_matching': True,
}, {
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
'only_matching': True,
}]
@staticmethod
@ -244,8 +247,10 @@ class FacebookIE(InfoExtractor):
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
for item in server_js_data.get('instances', []):
if item[1][0] == 'VideoConfig':
video_data = item[2][0]['videoData']
break
video_item = item[2][0]
if video_item.get('video_id') == video_id:
video_data = video_item['videoData']
break
if not video_data:
if not fatal_if_no_video:
@ -255,6 +260,8 @@ class FacebookIE(InfoExtractor):
raise ExtractorError(
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
expected=True)
elif '>You must log in to continue' in webpage:
self.raise_login_required()
else:
raise ExtractorError('Cannot parse data')

View File

@ -2,7 +2,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
int_or_none,
qualities,
@ -22,8 +25,7 @@ class FirstTVIE(InfoExtractor):
'info_dict': {
'id': '40049',
'ext': 'mp4',
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
'description': 'md5:36a39c1d19618fec57d12efe212a8370',
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'upload_date': '20150212',
'duration': 2694,
@ -34,8 +36,7 @@ class FirstTVIE(InfoExtractor):
'info_dict': {
'id': '364746',
'ext': 'mp4',
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
'description': 'md5:a242eea0031fd180a4497d52640a9572',
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'upload_date': '20160407',
'duration': 179,
@ -44,6 +45,17 @@ class FirstTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00',
'info_dict': {
'id': '14:00',
'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал',
'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd',
},
'playlist_count': 13,
}, {
'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
'only_matching': True,
}]
def _real_extract(self, url):
@ -51,43 +63,66 @@ class FirstTVIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
playlist_url = compat_urlparse.urljoin(url, self._search_regex(
r'data-playlist-url="([^"]+)', webpage, 'playlist url'))
r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'playlist url', group='url'))
item = self._download_json(playlist_url, display_id)[0]
video_id = item['id']
quality = qualities(('ld', 'sd', 'hd', ))
formats = []
for f in item.get('mbr', []):
src = f.get('src')
if not src:
continue
fname = f.get('name')
formats.append({
'url': src,
'format_id': fname,
'quality': quality(fname),
parsed_url = compat_urlparse.urlparse(playlist_url)
qs = compat_urlparse.parse_qs(parsed_url.query)
item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]')
items = self._download_json(playlist_url, display_id)
if item_ids:
items = [
item for item in items
if item.get('uid') and compat_str(item['uid']) in item_ids]
else:
items = [items[0]]
entries = []
QUALITIES = ('ld', 'sd', 'hd', )
for item in items:
title = item['title']
quality = qualities(QUALITIES)
formats = []
for f in item.get('mbr', []):
src = f.get('src')
if not src or not isinstance(src, compat_str):
continue
tbr = int_or_none(self._search_regex(
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
formats.append({
'url': src,
'format_id': f.get('name'),
'tbr': tbr,
'quality': quality(f.get('name')),
})
self._sort_formats(formats)
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
duration = int_or_none(item.get('duration') or self._html_search_meta(
'video:duration', webpage, 'video duration', fatal=False))
upload_date = unified_strdate(self._html_search_meta(
'ya:ovs:upload_date', webpage, 'upload date', default=None))
entries.append({
'id': compat_str(item.get('id') or item['uid']),
'thumbnail': thumbnail,
'title': title,
'upload_date': upload_date,
'duration': int_or_none(duration),
'formats': formats
})
self._sort_formats(formats)
title = self._html_search_regex(
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
r"'title'\s*:\s*'([^']+)'"),
webpage, 'title', default=None) or item['title']
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None)
description = self._html_search_regex(
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
webpage, 'description', default=None) or self._html_search_meta(
'description', webpage, 'description')
duration = int_or_none(self._html_search_meta(
'video:duration', webpage, 'video duration', fatal=False))
upload_date = unified_strdate(self._html_search_meta(
'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
'description', webpage, 'description', default=None)
return {
'id': video_id,
'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
'title': title,
'description': description,
'upload_date': upload_date,
'duration': int_or_none(duration),
'formats': formats
}
return self.playlist_result(entries, display_id, title, description)

View File

@ -0,0 +1,43 @@
# coding: utf-8
from __future__ import unicode_literals
from .anvato import AnvatoIE
from ..utils import js_to_json
class FOX9IE(AnvatoIE):
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
_TESTS = [{
'url': 'http://www.fox9.com/news/215123287-story',
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
'info_dict': {
'id': '314473',
'ext': 'mp4',
'title': 'Bear climbs tree in downtown Duluth',
'description': 'md5:6a36bfb5073a411758a752455408ac90',
'duration': 51,
'timestamp': 1478123580,
'upload_date': '20161102',
'uploader': 'EPFOX',
'categories': ['News', 'Sports'],
'tags': ['news', 'video'],
},
}, {
'url': 'http://www.fox9.com/news/investigators/214070684-story',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._parse_json(
self._search_regex(
r'AnvatoPlaylist\s*\(\s*(\[.+?\])\s*\)\s*;',
webpage, 'anvato playlist'),
video_id, transform_source=js_to_json)[0]['video']
return self._get_anvato_videos(
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
video_id)

View File

@ -29,7 +29,7 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<a[^>]+href="([^"]+)"',
r'(?s)<div[^>]+class="[^"]*?title-zone-diffusion[^"]*?"[^>]*>.*?<button[^>]+data-asset-source="([^"]+)"',
webpage, 'video path')
title = self._og_search_title(webpage)
@ -38,7 +38,7 @@ class FranceCultureIE(InfoExtractor):
'(?s)<div[^>]+class="date"[^>]*>.*?<span[^>]+class="inner"[^>]*>([^<]+)<',
webpage, 'upload date', fatal=False))
thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-pagespeed-(?:lazy|high-res)-src="([^"]+)"',
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+data-dejavu-src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'(?s)<div id="emission".*?<span class="author">(.*?)</span>',

View File

@ -1,38 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class FreeVideoIE(InfoExtractor):
_VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
_TEST = {
'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
'info_dict': {
'id': 'vysukany-zadecek-22033',
'ext': 'mp4',
'title': 'vysukany-zadecek-22033',
'age_limit': 18,
},
'skip': 'Blocked outside .cz',
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, handle = self._download_webpage_handle(url, video_id)
if '//www.czechav.com/' in handle.geturl():
raise ExtractorError(
'Access to freevideo is blocked from your location',
expected=True)
video_url = self._search_regex(
r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"',
webpage, 'video URL')
return {
'id': video_id,
'url': video_url,
'title': video_id,
'age_limit': 18,
}

View File

@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor):
'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': 're:^http:.*\.jpg$',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
'only_matching': True,
@ -51,19 +54,45 @@ class FunnyOrDieIE(InfoExtractor):
formats = []
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
source_formats = list(filter(
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
m3u8_formats))
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)]
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
bitrates.sort()
for bitrate in bitrates:
for link in links:
formats.append({
'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])),
'format_id': '%s-%d' % (link[1], bitrate),
'vbr': bitrate,
})
if source_formats:
self._sort_formats(source_formats)
for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
for path, ext in links:
ff = f.copy()
if ff:
if ext != 'mp4':
ff = dict(
[(k, v) for k, v in ff.items()
if k in ('height', 'width', 'format_id')])
ff.update({
'format_id': ff['format_id'].replace('hls', ext),
'ext': ext,
'protocol': 'http',
})
else:
ff.update({
'format_id': '%s-%d' % (ext, bitrate),
'vbr': bitrate,
})
ff['url'] = self._proto_relative_url(
'%s%d.%s' % (path, bitrate, ext))
formats.append(ff)
self._check_formats(formats, video_id)
formats.extend(m3u8_formats)
self._sort_formats(
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
subtitles = {}
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):

View File

@ -29,7 +29,7 @@ class FusionIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex(
r'data-video-id=(["\'])(?P<code>.+?)\1',
r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
webpage, 'ooyala code', group='code')
return OoyalaIE._build_url_result(ooyala_code)

View File

@ -47,6 +47,8 @@ from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .redtube import RedTubeIE
from .vimeo import VimeoIE
from .dailymotion import (
DailymotionIE,
@ -54,10 +56,10 @@ from .dailymotion import (
)
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
from .videomore import VideomoreIE
from .webcaster import WebcasterFeedIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
@ -73,6 +75,8 @@ from .facebook import FacebookIE
from .soundcloud import SoundcloudIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE
from .piksel import PikselIE
from .videa import VideaIE
class GenericIE(InfoExtractor):
@ -341,10 +345,10 @@ class GenericIE(InfoExtractor):
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
},
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer' in the
# http requests
{
# embedded brightcove video
# it also tests brightcove videos that need to set the 'Referer'
# in the http requests
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': {
@ -358,6 +362,24 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# embedded with itemprop embedURL and video id spelled as `idVideo`
'add_id': ['BrightcoveLegacy'],
'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
'info_dict': {
'id': '5255628253001',
'ext': 'mp4',
'title': 'md5:37c519b1128915607601e75a87995fc0',
'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
'uploader': 'BFM BUSINESS',
'uploader_id': '876450612001',
'timestamp': 1482255315,
'upload_date': '20161220',
},
'params': {
'skip_download': True,
},
},
{
# https://github.com/rg3/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
@ -970,6 +992,20 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
{
# Kaltura embedded, some fileExt broken (#11480)
'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
'info_dict': {
'id': '1_sgtvehim',
'ext': 'mp4',
'title': 'Our "Standard Models" of particle physics and cosmology',
'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
'timestamp': 1321158993,
'upload_date': '20111113',
'uploader_id': 'kps1',
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@ -1187,16 +1223,6 @@ class GenericIE(InfoExtractor):
'duration': 248.667,
},
},
# ScreenwaveMedia embed
{
'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
'md5': '24ace5baba0d35d55c6810b51f34e9e0',
'info_dict': {
'id': 'cinemasnob-55d26273809dd',
'ext': 'mp4',
'title': 'cinemasnob',
},
},
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
@ -1397,6 +1423,15 @@ class GenericIE(InfoExtractor):
},
'playlist_mincount': 3,
},
{
# Videa embeds
'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
'info_dict': {
'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
},
'playlist_mincount': 2,
},
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@ -1634,6 +1669,10 @@ class GenericIE(InfoExtractor):
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
elif doc.tag == 'SmoothStreamingMedia':
info_dict['formats'] = self._parse_ism_formats(doc, url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats'])
@ -1977,11 +2016,6 @@ class GenericIE(InfoExtractor):
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
# Look for embedded PornHub player
pornhub_url = PornHubIE._extract_url(webpage)
if pornhub_url:
return self.url_result(pornhub_url, 'PornHub')
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls:
@ -1992,6 +2026,21 @@ class GenericIE(InfoExtractor):
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
# Look for embedded PornHub player
pornhub_urls = PornHubIE._extract_urls(webpage)
if pornhub_urls:
return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
# Look for embedded DrTuber player
drtuber_urls = DrTuberIE._extract_urls(webpage)
if drtuber_urls:
return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
# Look for embedded RedTube player
redtube_urls = RedTubeIE._extract_urls(webpage)
if redtube_urls:
return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@ -2124,6 +2173,11 @@ class GenericIE(InfoExtractor):
if videomore_url:
return self.url_result(videomore_url)
# Look for Webcaster embeds
webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
if webcaster_url:
return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
# Look for Playwire embeds
mobj = re.search(
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
@ -2190,11 +2244,6 @@ class GenericIE(InfoExtractor):
if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform')
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
# Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage)
if digiteka_url:
@ -2205,6 +2254,11 @@ class GenericIE(InfoExtractor):
if arkena_url:
return self.url_result(arkena_url, ArkenaIE.ie_key())
# Look for Piksel embeds
piksel_url = PikselIE._extract_url(webpage)
if piksel_url:
return self.url_result(piksel_url, PikselIE.ie_key())
# Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj:
@ -2216,6 +2270,16 @@ class GenericIE(InfoExtractor):
return self.url_result('limelight:%s:%s' % (
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
mobj = re.search(
r'''(?sx)
<object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*?
<param[^>]+
name=(["\'])flashVars\2[^>]+
value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32})
''', webpage)
if mobj:
return self.url_result('limelight:media:%s' % mobj.group('id'))
# Look for AdobeTVVideo embeds
mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
@ -2304,6 +2368,11 @@ class GenericIE(InfoExtractor):
if dbtv_urls:
return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
# Look for Videa embeds
videa_urls = VideaIE._extract_urls(webpage)
if videa_urls:
return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')
@ -2449,6 +2518,21 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
# Just matching .ism/manifest is not enough to be reliably sure
# whether it's actually an ISM manifest or some other streaming
# manifest since there are various streaming URL formats
# possible (see [1]) as well as some other shenanigans like
# .smil/manifest URLs that actually serve an ISM (see [2]) and
# so on.
# Thus the most reasonable way to solve this is to delegate
# to generic extractor in order to look into the contents of
# the manifest itself.
# 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
# 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
entry_info_dict = self.url_result(
smuggle_url(video_url, {'to_generic': True}),
GenericIE.ie_key())
else:
entry_info_dict['url'] = video_url

View File

@ -6,12 +6,13 @@ from .common import InfoExtractor
from ..utils import (
js_to_json,
remove_end,
determine_ext,
)
class HellPornoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
'md5': '1fee339c610d2049699ef2aa699439f1',
'info_dict': {
@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
}
}
}, {
'url': 'http://hellporno.net/v/186271/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor):
video_id = flashvars.get('video_id')
thumbnail = flashvars.get('preview_url')
ext = flashvars.get('postfix', '.mp4')[1:]
ext = determine_ext(flashvars.get('postfix'), 'mp4')
formats = []
for video_url_key in ['video_url', 'video_alt_url']:

View File

@ -11,6 +11,7 @@ from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
urljoin,
)
@ -110,10 +111,14 @@ class JWPlatformBaseIE(InfoExtractor):
tracks = video_data.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
if track.get('file') and track.get('kind') == 'captions':
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track['file'])
})
if track.get('kind') != 'captions':
continue
track_url = urljoin(base_url, track.get('file'))
if not track_url:
continue
subtitles.setdefault(track.get('label') or 'en', []).append({
'url': self._proto_relative_url(track_url)
})
entries.append({
'id': this_video_id,
@ -121,7 +126,7 @@ class JWPlatformBaseIE(InfoExtractor):
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
'formats': formats,
})

View File

@ -107,7 +107,7 @@ class KalturaIE(InfoExtractor):
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4),
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or
re.search(
r'''(?xs)
@ -266,6 +266,9 @@ class KalturaIE(InfoExtractor):
# skip for now.
if f.get('fileExt') == 'chun':
continue
if not f.get('fileExt') and f.get('containerFormat') == 'qt':
# QT indicates QuickTime; some videos have broken fileExt
f['fileExt'] = 'mov'
video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g

View File

@ -1,25 +1,115 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
ExtractorError,
sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_element,
xpath_text,
urljoin,
update_url_query,
)
class Laola1TvEmbedIE(InfoExtractor):
IE_NAME = 'laola1tv:embed'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
_TEST = {
# flashvars.premium = "false";
'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
'info_dict': {
'id': '708065',
'ext': 'mp4',
'title': 'MA Long CHN - FAN Zhendong CHN',
'uploader': 'ITTF - International Table Tennis Federation',
'upload_date': '20161211',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
flash_vars = self._search_regex(
r'(?s)flashvars\s*=\s*({.+?});', webpage, 'flash vars')
def get_flashvar(x, *args, **kwargs):
flash_var = self._search_regex(
r'%s\s*:\s*"([^"]+)"' % x,
flash_vars, x, default=None)
if not flash_var:
flash_var = self._search_regex([
r'flashvars\.%s\s*=\s*"([^"]+)"' % x,
r'%s\s*=\s*"([^"]+)"' % x],
webpage, x, *args, **kwargs)
return flash_var
hd_doc = self._download_xml(
'http://www.laola1.tv/server/hd_video.php', video_id, query={
'play': get_flashvar('streamid'),
'partner': get_flashvar('partnerid'),
'portal': get_flashvar('portalid'),
'lang': get_flashvar('sprache'),
'v5ident': '',
})
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
title = _v('title', fatal=True)
token_url = None
premium = get_flashvar('premium', default=None)
if premium:
token_url = update_url_query(
_v('url', fatal=True), {
'timestamp': get_flashvar('timestamp'),
'auth': get_flashvar('auth'),
})
else:
data_abo = urlencode_postdata(
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
token_url = self._download_json(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
video_id, query={
'videoId': _v('id'),
'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
'label': _v('label'),
'area': _v('area'),
}, data=data_abo)['data']['stream-access'][0]
token_doc = self._download_xml(
token_url, video_id, 'Downloading token',
headers=self.geo_verification_headers())
token_attrib = xpath_element(token_doc, './/token').attrib
if token_attrib['status'] != '0':
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_akamai_formats(
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
video_id)
self._sort_formats(formats)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
is_live = _v('islive') == 'true'
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'upload_date': unified_strdate(_v('time_date')),
'uploader': _v('meta_organisation'),
'categories': categories,
'is_live': is_live,
'formats': formats,
}
class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
IE_NAME = 'laola1tv'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
@ -67,85 +157,20 @@ class Laola1TvIE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('slug')
kind = mobj.group('kind')
lang = mobj.group('lang')
portal = mobj.group('portal')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if 'Dieser Livestream ist bereits beendet.' in webpage:
raise ExtractorError('This live stream has already finished.', expected=True)
iframe_url = self._search_regex(
iframe_url = urljoin(url, self._search_regex(
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
webpage, 'iframe url')
video_id = self._search_regex(
r'videoid=(\d+)', iframe_url, 'video id')
iframe = self._download_webpage(compat_urlparse.urljoin(
url, iframe_url), display_id, 'Downloading iframe')
partner_id = self._search_regex(
r'partnerid\s*:\s*(["\'])(?P<partner_id>.+?)\1',
iframe, 'partner id', group='partner_id')
hd_doc = self._download_xml(
'http://www.laola1.tv/server/hd_video.php?%s'
% compat_urllib_parse_urlencode({
'play': video_id,
'partner': partner_id,
'portal': portal,
'lang': lang,
'v5ident': '',
}), display_id)
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
title = _v('title', fatal=True)
VS_TARGETS = {
'video': '2',
'livestream': '17',
}
req = sanitized_Request(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
compat_urllib_parse_urlencode({
'videoId': video_id,
'target': VS_TARGETS.get(kind, '2'),
'label': _v('label'),
'area': _v('area'),
}),
urlencode_postdata(
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(',')))))
token_url = self._download_json(req, display_id)['data']['stream-access'][0]
token_doc = self._download_xml(token_url, display_id, 'Downloading token')
token_attrib = xpath_element(token_doc, './/token').attrib
token_auth = token_attrib['auth']
if token_auth in ('blocked', 'restricted', 'error'):
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_f4m_formats(
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
video_id, f4m_id='hds')
self._sort_formats(formats)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
webpage, 'iframe url'))
return {
'id': video_id,
'_type': 'url',
'display_id': display_id,
'title': title,
'upload_date': unified_strdate(_v('time_date')),
'uploader': _v('meta_organisation'),
'categories': categories,
'is_live': _v('islive') == 'true',
'formats': formats,
'url': iframe_url,
'ie_key': 'Laola1TvEmbed',
}

View File

@ -54,6 +54,22 @@ class LiveLeakIE(InfoExtractor):
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
'thumbnail': 're:^https?://.*\.jpg$'
}
}, {
# Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521
'url': 'http://m.liveleak.com/view?i=763_1473349649',
'add_ie': ['Youtube'],
'info_dict': {
'id': '763_1473349649',
'ext': 'mp4',
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
'uploader': 'Ziz',
'upload_date': '20160908',
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
},
'params': {
'skip_download': True,
},
}]
@staticmethod
@ -87,7 +103,7 @@ class LiveLeakIE(InfoExtractor):
else:
# Maybe an embed?
embed_url = self._search_regex(
r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
@ -107,6 +123,7 @@ class LiveLeakIE(InfoExtractor):
'format_note': s.get('label'),
'url': s['file'],
} for i, s in enumerate(sources)]
for i, s in enumerate(sources):
# Removing '.h264_*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see

View File

@ -0,0 +1,104 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
unified_timestamp,
)
class MeipaiIE(InfoExtractor):
IE_DESC = '美拍'
_VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P<id>[0-9]+)'
_TESTS = [{
# regular uploaded video
'url': 'http://www.meipai.com/media/531697625',
'md5': 'e3e9600f9e55a302daecc90825854b4f',
'info_dict': {
'id': '531697625',
'ext': 'mp4',
'title': '#葉子##阿桑##余姿昀##超級女聲#',
'description': '#葉子##阿桑##余姿昀##超級女聲#',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 152,
'timestamp': 1465492420,
'upload_date': '20160609',
'view_count': 35511,
'creator': '她她-TATA',
'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'],
}
}, {
# record of live streaming
'url': 'http://www.meipai.com/media/585526361',
'md5': 'ff7d6afdbc6143342408223d4f5fb99a',
'info_dict': {
'id': '585526361',
'ext': 'mp4',
'title': '姿昀和善願 練歌練琴啦😁😁😁',
'description': '姿昀和善願 練歌練琴啦😁😁😁',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 5975,
'timestamp': 1474311799,
'upload_date': '20160919',
'view_count': 1215,
'creator': '她她-TATA',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(
webpage, default=None) or self._html_search_regex(
r'<title[^>]*>([^<]+)</title>', webpage, 'title')
formats = []
# recorded playback of live streaming
m3u8_url = self._html_search_regex(
r'file:\s*encodeURIComponent\((["\'])(?P<url>(?:(?!\1).)+)\1\)',
webpage, 'm3u8 url', group='url', default=None)
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
if not formats:
# regular uploaded video
video_url = self._search_regex(
r'data-video=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url',
group='url', default=None)
if video_url:
formats.append({
'url': video_url,
'format_id': 'http',
})
timestamp = unified_timestamp(self._og_search_property(
'video:release_date', webpage, 'release date', fatal=False))
tags = self._og_search_property(
'video:tag', webpage, 'tags', default='').split(',')
view_count = int_or_none(self._html_search_meta(
'interactionCount', webpage, 'view count'))
duration = parse_duration(self._html_search_meta(
'duration', webpage, 'duration'))
creator = self._og_search_property(
'video:director', webpage, 'creator', fatal=False)
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'duration': duration,
'timestamp': timestamp,
'view_count': view_count,
'creator': creator,
'tags': tags,
'formats': formats,
}

View File

@ -0,0 +1,72 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
urljoin,
)
class MelonVODIE(InfoExtractor):
_VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734',
'info_dict': {
'id': '50158734',
'ext': 'mp4',
'title': "Jessica 'Wonderland' MV Making Film",
'thumbnail': 're:^https?://.*\.jpg$',
'artist': 'Jessica (제시카)',
'upload_date': '20161212',
'duration': 203,
},
'params': {
'skip_download': 'm3u8 download',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
play_info = self._download_json(
'http://vod.melon.com/video/playerInfo.json', video_id,
note='Downloading player info JSON', query={'mvId': video_id})
title = play_info['mvInfo']['MVTITLE']
info = self._download_json(
'http://vod.melon.com/delivery/streamingInfo.json', video_id,
note='Downloading streaming info JSON',
query={
'contsId': video_id,
'contsType': 'VIDEO',
})
stream_info = info['streamingInfo']
formats = self._extract_m3u8_formats(
stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)
artist_list = play_info.get('artistList')
artist = None
if isinstance(artist_list, list):
artist = ', '.join(
[a['ARTISTNAMEWEBLIST']
for a in artist_list if a.get('ARTISTNAMEWEBLIST')])
thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath'))
duration = int_or_none(stream_info.get('playTime'))
upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None
return {
'id': video_id,
'title': title,
'artist': artist,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'formats': formats
}

View File

@ -1,19 +1,20 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import uuid
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
get_element_by_attribute,
int_or_none,
remove_start,
extract_attributes,
determine_ext,
smuggle_url,
parse_duration,
)
@ -72,76 +73,133 @@ class MiTeleBaseIE(InfoExtractor):
}
class MiTeleIE(MiTeleBaseIE):
class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
# MD5 is unstable
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': {
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
'display_id': 'programa-144',
'id': '57b0dfb9c715da65618b4afa',
'ext': 'mp4',
'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
'series': 'Diario de',
'season': 'La redacción',
'season_number': 14,
'season_id': 'diario_de_t14_11981',
'episode': 'Programa 144',
'episode_number': 3,
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913,
},
'add_ie': ['Ooyala'],
}, {
# no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': {
'id': 'eLZSwoEd1S3pVyUm8lc6F',
'display_id': 'programa-226',
'id': '57b0de3dc915da14058b4876',
'ext': 'mp4',
'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio',
'season': 'Temporada 6',
'season_number': 6,
'season_id': 'cuarto_milenio_t06_12715',
'episode': 'Programa 226',
'episode_number': 24,
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 7312,
'duration': 7313,
},
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(url, display_id)
gigya_url = self._search_regex(
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
video_id, 'Downloading gigya script')
info = self._get_player_info(url, webpage)
# Get a appKey/uuid for getting the session key
appKey_var = self._search_regex(
r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
gigya_sc, 'appKey variable')
appKey = self._search_regex(
r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
title = self._search_regex(
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
webpage, 'title', default=None)
session_json = self._download_json(
'https://appgrid-api.cloud.accedo.tv/session',
video_id, 'Downloading session keys', query={
'appKey': appKey,
'uuid': compat_str(uuid.uuid4()),
})
mobj = re.search(r'''(?sx)
class="Destacado-text"[^>]*>.*?<h1>\s*
<span>(?P<series>[^<]+)</span>\s*
<span>(?P<season>[^<]+)</span>\s*
<span>(?P<episode>[^<]+)</span>''', webpage)
series, season, episode = mobj.groups() if mobj else [None] * 3
paths = self._download_json(
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
video_id, 'Downloading paths JSON',
query={'sessionKey': compat_str(session_json['sessionKey'])})
if not title:
if mobj:
title = '%s - %s - %s' % (series, season, episode)
else:
title = remove_start(self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
source = self._download_json(
'http://%s%s%s/docs/%s' % (
ooyala_s['base_url'], ooyala_s['full_path'],
ooyala_s['provider_id'], video_id),
video_id, 'Downloading data JSON', query={
'include_titles': 'Series,Season',
'product_name': 'test',
'format': 'full',
})['hits']['hits'][0]['_source']
info.update({
'display_id': display_id,
embedCode = source['offers'][0]['embed_codes'][0]
titles = source['localizable_titles'][0]
title = titles.get('title_medium') or titles['title_long']
description = titles.get('summary_long') or titles.get('summary_medium')
def get(key1, key2):
value1 = source.get(key1)
if not value1 or not isinstance(value1, list):
return
if not isinstance(value1[0], dict):
return
return value1[0].get(key2)
series = get('localizable_titles_series', 'title_medium')
season = get('localizable_titles_season', 'title_medium')
season_number = int_or_none(source.get('season_number'))
season_id = source.get('season_id')
episode = titles.get('title_sort_name')
episode_number = int_or_none(source.get('episode_number'))
duration = parse_duration(get('videos', 'duration'))
return {
'_type': 'url_transparent',
# for some reason only HLS is supported
'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}),
'id': video_id,
'title': title,
'description': get_element_by_attribute('class', 'text', webpage),
'description': description,
'series': series,
'season': season,
'season_number': season_number,
'season_id': season_id,
'episode': episode,
})
return info
'episode_number': episode_number,
'duration': duration,
'thumbnail': get('images', 'url'),
}

View File

@ -22,7 +22,7 @@ from ..utils import (
class MixcloudIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
_VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
IE_NAME = 'mixcloud'
_TESTS = [{
@ -51,6 +51,9 @@ class MixcloudIE(InfoExtractor):
'view_count': int,
'like_count': int,
},
}, {
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
'only_matching': True,
}]
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js

View File

@ -78,11 +78,6 @@ class MSNIE(InfoExtractor):
m3u8_formats = self._extract_m3u8_formats(
format_url, display_id, 'mp4',
m3u8_id='hls', fatal=False)
# Despite metadata in m3u8 all video+audio formats are
# actually video-only (no audio)
for f in m3u8_formats:
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
f['acodec'] = 'none'
formats.extend(m3u8_formats)
else:
formats.append({

View File

@ -13,6 +13,7 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
HEADRequest,
NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
strip_or_none,
@ -201,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
[self._get_video_info(item) for item in idoc.findall('.//item')],
playlist_title=title, playlist_description=description)
def _extract_mgid(self, webpage):
def _extract_mgid(self, webpage, default=NO_DEFAULT):
try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
@ -221,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
sm4_embed = self._html_search_meta(
'sm4:video:embed', webpage, 'sm4 embed', default='')
mgid = self._search_regex(
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default)
return mgid
def _real_extract(self, url):

View File

@ -9,6 +9,7 @@ from ..utils import (
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
)
@ -208,7 +209,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'md5': 'af1adfa51312291a017720403826bb64',
'info_dict': {
'id': '269389891880',
'id': 'p_tweet_snow_140529',
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
@ -232,7 +233,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
'info_dict': {
'id': '394064451844',
'id': 'nn_netcast_150204',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
@ -245,7 +246,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'a49e173825e5fcd15c13fc297fced39d',
'info_dict': {
'id': '529953347624',
'id': 'x_lon_vwhorn_150922',
'ext': 'mp4',
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
@ -258,7 +259,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
'md5': '118d7ca3f0bea6534f119c68ef539f71',
'info_dict': {
'id': '669831235788',
'id': 'tdy_al_space_160420',
'ext': 'mp4',
'title': 'See the aurora borealis from space in stunning new NASA video',
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
@ -271,7 +272,7 @@ class NBCNewsIE(ThePlatformIE):
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
'id': '314487875924',
'id': 'n_hayes_Aimm_140801_272214',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
@ -279,7 +280,6 @@ class NBCNewsIE(ThePlatformIE):
'timestamp': 1406937606,
'upload_date': '20140802',
'uploader': 'NBCU-NEWS',
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
},
},
{
@ -311,28 +311,41 @@ class NBCNewsIE(ThePlatformIE):
else:
# "feature" and "nightly-news" pages use theplatform.com
video_id = mobj.group('mpx_id')
if not video_id.isdigit():
webpage = self._download_webpage(url, video_id)
info = None
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'],
webpage, 'bootstrap json', default=None)
webpage = self._download_webpage(url, video_id)
filter_param = 'byId'
bootstrap_json = self._search_regex(
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
webpage, 'bootstrap json', default=None)
if bootstrap_json:
bootstrap = self._parse_json(
bootstrap_json, video_id, transform_source=unescapeHTML)
info = None
if 'results' in bootstrap:
info = bootstrap['results'][0]['video']
elif 'video' in bootstrap:
info = bootstrap['video']
elif 'msnbcVideoInfo' in bootstrap:
info = bootstrap['msnbcVideoInfo']['meta']
elif 'msnbcThePlatform' in bootstrap:
info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
else:
info = bootstrap
video_id = info['mpxId']
if 'guid' in info:
video_id = info['guid']
filter_param = 'byGuid'
elif 'mpxId' in info:
video_id = info['mpxId']
return {
'_type': 'url_transparent',
'id': video_id,
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
'ie_key': 'ThePlatformFeed',
}

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .screenwavemedia import ScreenwaveMediaIE
from .jwplatform import JWPlatformIE
from ..utils import (
unified_strdate,
@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
'add_ie': ['ScreenwaveMedia'],
'add_ie': ['JWPlatform'],
}
def _real_extract(self, url):
@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor):
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date', fatal=False))
screenwavemedia_url = self._html_search_regex(
ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
group='url')
jwplatform_url = JWPlatformIE._extract_url(webpage)
return {
'_type': 'url_transparent',
'id': video_id,
'url': screenwavemedia_url,
'ie_key': ScreenwaveMediaIE.ie_key(),
'url': jwplatform_url,
'ie_key': JWPlatformIE.ie_key(),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),

View File

@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
import random
import re
from .common import InfoExtractor
@ -14,6 +15,25 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
_faked_ip = None
def _download_webpage_handle(self, *args, **kwargs):
# NRK checks X-Forwarded-For HTTP header in order to figure out the
# origin of the client behind proxy. This allows to bypass geo
# restriction by faking this header's value to some Norway IP.
# We will do so once we encounter any geo restriction error.
if self._faked_ip:
# NB: str is intentional
kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip
return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs)
def _fake_ip(self):
# Use fake IP from 37.191.128.0/17 in order to workaround geo
# restriction
def octet(lb=0, ub=255):
return random.randint(lb, ub)
self._faked_ip = '37.191.%d.%d' % (octet(128), octet())
def _real_extract(self, url):
video_id = self._match_id(url)
@ -24,8 +44,17 @@ class NRKBaseIE(InfoExtractor):
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {}
entries = []
conviva = data.get('convivaStatistics') or {}
live = (data.get('mediaElementType') == 'Live' or
data.get('isLive') is True or conviva.get('isLive'))
def make_title(t):
return self._live_title(t) if live else t
media_assets = data.get('mediaAssets')
if media_assets and isinstance(media_assets, list):
def video_id_and_title(idx):
@ -39,6 +68,13 @@ class NRKBaseIE(InfoExtractor):
if not formats:
continue
self._sort_formats(formats)
# Some f4m streams may not work with hdcore in fragments' URLs
for f in formats:
extra_param = f.get('extra_param_to_segment_url')
if extra_param and 'hdcore' in extra_param:
del f['extra_param_to_segment_url']
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
@ -50,10 +86,11 @@ class NRKBaseIE(InfoExtractor):
})
entries.append({
'id': asset.get('carrierId') or entry_id,
'title': entry_title,
'title': make_title(entry_title),
'duration': duration,
'subtitles': subtitles,
'formats': formats,
'http_headers': http_headers,
})
if not entries:
@ -64,18 +101,30 @@ class NRKBaseIE(InfoExtractor):
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
'title': title,
'title': make_title(title),
'duration': duration,
'formats': formats,
}]
if not entries:
if data.get('usageRights', {}).get('isGeoBlocked'):
raise ExtractorError(
'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
expected=True)
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type and not self._faked_ip:
self.report_warning(
'Video is geo restricted, trying to fake IP')
self._fake_ip()
return self._real_extract(url)
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
conviva = data.get('convivaStatistics') or {}
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
@ -158,7 +207,15 @@ class NRKIE(NRKBaseIE):
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
_VALID_URL = r'''(?x)
https?://
(?:tv|radio)\.nrk(?:super)?\.no/
(?:serie/[^/]+|program)/
(?![Ee]pisodes)%s
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
_API_HOST = 'psapi-we.nrk.no'
_TESTS = [{
@ -224,9 +281,43 @@ class NRKTVIE(NRKBaseIE):
}]
class NRKPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
class NRKTVDirekteIE(NRKTVIE):
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://tv.nrk.no/direkte/nrk1',
'only_matching': True,
}, {
'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
'only_matching': True,
}]
class NRKPlaylistBaseIE(InfoExtractor):
def _extract_description(self, webpage):
pass
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
for video_id in re.findall(self._ITEM_RE, webpage)
]
playlist_title = self. _extract_title(webpage)
playlist_description = self._extract_description(webpage)
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
class NRKPlaylistIE(NRKPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
_ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
_TESTS = [{
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
'info_dict': {
@ -245,23 +336,28 @@ class NRKPlaylistIE(InfoExtractor):
'playlist_count': 5,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
def _extract_title(self, webpage):
return self._og_search_title(webpage, fatal=False)
webpage = self._download_webpage(url, playlist_id)
def _extract_description(self, webpage):
return self._og_search_description(webpage)
entries = [
self.url_result('nrk:%s' % video_id, 'NRK')
for video_id in re.findall(
r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
webpage)
]
playlist_title = self._og_search_title(webpage)
playlist_description = self._og_search_description(webpage)
class NRKTVEpisodesIE(NRKPlaylistBaseIE):
_VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
_ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
_TESTS = [{
'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
'info_dict': {
'id': '69031',
'title': 'Nytt på nytt, sesong: 201210',
},
'playlist_count': 4,
}]
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
def _extract_title(self, webpage):
return self._html_search_regex(
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
class NRKSkoleIE(InfoExtractor):

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .jwplatform import JWPlatformBaseIE
from ..utils import (
ExtractorError,
js_to_json,
)
class OnDemandKoreaIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
'id': 'ask-us-anything-e43',
'ext': 'mp4',
'title': 'Ask Us Anything : E43',
'thumbnail': 're:^https?://.*\.jpg$',
},
'params': {
'skip_download': 'm3u8 download'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage:
# Page sometimes returns captcha page with HTTP 403
raise ExtractorError(
'Unable to access page. You may have been blocked.',
expected=True)
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
'This content is not available in your region')
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(
'This video is only available to ODK PLUS members.',
expected=True)
title = self._og_search_title(webpage)
jw_config = self._parse_json(
self._search_regex(
r'(?s)jwplayer\(([\'"])(?:(?!\1).)+\1\)\.setup\s*\((?P<options>.+?)\);',
webpage, 'jw config', group='options'),
video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data(
jw_config, video_id, require_title=False, m3u8_id='hls',
base_url=url)
info.update({
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
})
return info

View File

@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor):
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
def _extract(self, content_tree_url, video_id, domain='example.org'):
def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor):
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
compat_urllib_parse_urlencode({
'domain': domain,
'supportedFormats': 'mp4,rtmp,m3u8,hds',
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds',
}), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code]
@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE):
url, smuggled_data = unsmuggle_url(url, {})
embed_code = self._match_id(url)
domain = smuggled_data.get('domain')
supportedformats = smuggled_data.get('supportedformats')
content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
return self._extract(content_tree_url, embed_code, domain)
return self._extract(content_tree_url, embed_code, domain, supportedformats)
class OoyalaExternalIE(OoyalaBaseIE):

View File

@ -1,11 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals, division
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
)
from ..compat import compat_chr
from ..utils import (
determine_ext,
ExtractorError,
@ -13,7 +10,7 @@ from ..utils import (
class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
_TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o',
@ -54,6 +51,9 @@ class OpenloadIE(InfoExtractor):
# for title and ext
'url': 'https://openload.co/embed/Sxz5sADo82g/',
'only_matching': True,
}, {
'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
'only_matching': True,
}]
def _real_extract(self, url):
@ -63,29 +63,20 @@ class OpenloadIE(InfoExtractor):
if 'File not found' in webpage or 'deleted by the owner' in webpage:
raise ExtractorError('File not found', expected=True)
# The following decryption algorithm is written by @yokrysty and
# declared to be freely used in youtube-dl
# See https://github.com/rg3/youtube-dl/issues/10408
enc_data = self._html_search_regex(
r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
webpage, 'encrypted data')
ol_id = self._search_regex(
'<span[^>]+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)</span>',
webpage, 'openload ID')
magic = compat_ord(enc_data[-1])
video_url_chars = []
first_two_chars = int(float(ol_id[0:][:2]))
urlcode = ''
num = 2
for idx, c in enumerate(enc_data):
j = compat_ord(c)
if j == magic:
j -= 1
elif j == magic - 1:
j += 1
if j >= 33 and j <= 126:
j = ((j + 14) % 94) + 33
if idx == len(enc_data) - 1:
j += 3
video_url_chars += compat_chr(j)
while num < len(ol_id):
urlcode += compat_chr(int(float(ol_id[num:][:3])) -
first_two_chars * int(float(ol_id[num + 3:][:2])))
num += 5
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
video_url = 'https://openload.co/stream/' + urlcode
title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@ -104,5 +95,4 @@ class OpenloadIE(InfoExtractor):
'ext': determine_ext(title),
'subtitles': subtitles,
}
return info_dict

View File

@ -11,6 +11,7 @@ from ..utils import (
float_or_none,
parse_duration,
str_to_int,
urlencode_postdata,
)
@ -56,6 +57,22 @@ class PandoraTVIE(InfoExtractor):
r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
if not height:
continue
play_url = self._download_json(
'http://m.pandora.tv/?c=api&m=play_url', video_id,
data=urlencode_postdata({
'prgid': video_id,
'runtime': info.get('runtime'),
'vod_url': format_url,
}),
headers={
'Origin': url,
'Content-Type': 'application/x-www-form-urlencoded',
})
format_url = play_url.get('url')
if not format_url:
continue
formats.append({
'format_id': '%sp' % height,
'url': format_url,

View File

@ -350,6 +350,15 @@ class PBSIE(InfoExtractor):
410: 'This video has expired and is no longer available for online streaming.',
}
def _real_initialize(self):
cookie = (self._download_json(
'http://localization.services.pbs.org/localize/auto/cookie/',
None, headers=self.geo_verification_headers(), fatal=False) or {}).get('cookie')
if cookie:
station = self._search_regex(r'#?s=\["([^"]+)"', cookie, 'station')
if station:
self._set_cookie('.pbs.org', 'pbsol.station', station)
def _extract_webpage(self, url):
mobj = re.match(self._VALID_URL, url)
@ -476,7 +485,8 @@ class PBSIE(InfoExtractor):
redirect_info = self._download_json(
'%s?format=json' % redirect['url'], display_id,
'Downloading %s video url info' % (redirect_id or num))
'Downloading %s video url info' % (redirect_id or num),
headers=self.geo_verification_headers())
if redirect_info['status'] == 'error':
raise ExtractorError(

View File

@ -0,0 +1,106 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
dict_get,
int_or_none,
unescapeHTML,
parse_iso8601,
)
class PikselIE(InfoExtractor):
_VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
_TEST = {
'url': 'http://player.piksel.com/v/nv60p12f',
'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
'info_dict': {
'id': 'nv60p12f',
'ext': 'mp4',
'title': 'فن الحياة - الحلقة 1',
'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
'timestamp': 1465231790,
'upload_date': '20160606',
}
}
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
app_token = self._search_regex(
r'clientAPI\s*:\s*"([^"]+)"', webpage, 'app token')
response = self._download_json(
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
video_id, query={
'v': video_id
})['response']
failure = response.get('failure')
if failure:
raise ExtractorError(response['failure']['reason'], expected=True)
video_data = response['WsProgramResponse']['program']['asset']
title = video_data['title']
formats = []
m3u8_url = dict_get(video_data, [
'm3u8iPadURL',
'ipadM3u8Url',
'm3u8AndroidURL',
'm3u8iPhoneURL',
'iphoneM3u8Url'])
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
for asset_file in video_data.get('assetFiles', []):
# TODO: extract rtmp formats
http_url = asset_file.get('http_url')
if not http_url:
continue
tbr = None
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
if asset_type == 'video':
tbr = vbr + abr
elif asset_type == 'audio':
tbr = abr
format_id = ['http']
if tbr:
format_id.append(compat_str(tbr))
formats.append({
'format_id': '-'.join(format_id),
'url': unescapeHTML(http_url),
'vbr': vbr,
'abr': abr,
'width': int_or_none(asset_file.get('videoWidth')),
'height': int_or_none(asset_file.get('videoHeight')),
'filesize': int_or_none(asset_file.get('filesize')),
'tbr': tbr,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnailUrl'),
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
}

View File

@ -8,30 +8,31 @@ from ..utils import int_or_none
class PlaysTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
_TEST = {
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
_VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
_TESTS = [{
'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
'md5': 'dfeac1198506652b5257a62762cec7bc',
'info_dict': {
'id': '56af17f56c95335490',
'ext': 'mp4',
'title': 'When you outplay the Azir wall',
'title': 'Bjergsen - When you outplay the Azir wall',
'description': 'Posted by Bjergsen',
}
}
}, {
'url': 'https://plays.tv/embeds/56af17f56c95335490',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(
'https://plays.tv/video/%s' % video_id, video_id)
info = self._search_json_ld(webpage, video_id,)
title = self._og_search_title(webpage)
content = self._parse_json(
self._search_regex(
r'R\.bindContent\(({.+?})\);', webpage,
'content'), video_id)['content']
mpd_url, sources = re.search(
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
content).groups()
webpage).groups()
formats = self._extract_mpd_formats(
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
@ -42,10 +43,11 @@ class PlaysTVIE(InfoExtractor):
})
self._sort_formats(formats)
return {
info.update({
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
'formats': formats,
}
})
return info

View File

@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[0-9a-z]+)
(?P<id>[\da-z]+)
'''
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
'only_matching': True,
}]
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
if mobj:
return mobj.group('url')
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
webpage)
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(

View File

@ -85,6 +85,9 @@ class ProSiebenSat1BaseIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
source_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif mimetype == 'application/dash+xml':
formats.extend(self._extract_mpd_formats(
source_url, clip_id, mpd_id='dash', fatal=False))
else:
tbr = fix_bitrate(source['bitrate'])
if protocol in ('rtmp', 'rtmpe'):

Some files were not shown because too many files have changed in this diff Show More