Compare commits
193 Commits
2016.08.24
...
2016.09.11
Author | SHA1 | Date | |
---|---|---|---|
|
0307d6fba6 | ||
|
fc150cba1d | ||
|
d667ab7fad | ||
|
eb87d4545a | ||
|
1c81476cbb | ||
|
bc9186c882 | ||
|
6599c72527 | ||
|
6bb05b32a9 | ||
|
fea74acad8 | ||
|
f01115c933 | ||
|
2cdbc06a1f | ||
|
2cb93afcd8 | ||
|
bfcda07a27 | ||
|
001a5fd3d7 | ||
|
1e35999c1e | ||
|
2512b17493 | ||
|
56c0ead4d3 | ||
|
7324243750 | ||
|
84a18e9b90 | ||
|
b29f842e0e | ||
|
f009fcac0d | ||
|
6c3affcb18 | ||
|
1e19ff2984 | ||
|
c6129feb7f | ||
|
bb5ebd4453 | ||
|
cb9cbd84ed | ||
|
4d5726b0d7 | ||
|
4614ad7b59 | ||
|
b717837190 | ||
|
2abad67e52 | ||
|
ad0e2b3359 | ||
|
37720844f6 | ||
|
6cfcb8ac36 | ||
|
7a979da8cb | ||
|
2fdc7b0e04 | ||
|
010d034fca | ||
|
02e552886f | ||
|
25042f7372 | ||
|
3f612f0767 | ||
|
17bf6e71cc | ||
|
881f35479d | ||
|
89f257d6e5 | ||
|
e78a5428b6 | ||
|
6656a82481 | ||
|
d7e794928d | ||
|
9c27188988 | ||
|
b84d311d53 | ||
|
f87feb4b68 | ||
|
2841bdcebb | ||
|
84b91dd4e3 | ||
|
92c9c2a88b | ||
|
9d54b02bae | ||
|
846d8b76a0 | ||
|
aa3f9fe695 | ||
|
8258f4457c | ||
|
948cd5b72d | ||
|
8d3737cda7 | ||
|
155bc674c4 | ||
|
c33c962adf | ||
|
bdcc046d12 | ||
|
a493f10208 | ||
|
f3eeaacb4e | ||
|
b4d6a85d60 | ||
|
0b36a96212 | ||
|
bc22a79694 | ||
|
340e31ca74 | ||
|
973dee491f | ||
|
1f85029d82 | ||
|
95be19d436 | ||
|
95843da529 | ||
|
abf2c79f95 | ||
|
b49ad71ce1 | ||
|
9127e1533d | ||
|
78e762d23c | ||
|
4809490108 | ||
|
8112bfeaba | ||
|
d9606d9b6c | ||
|
433af6ad30 | ||
|
feaa5ad787 | ||
|
100bd86a68 | ||
|
0def758782 | ||
|
919cf1a62f | ||
|
b29cd56591 | ||
|
622638512b | ||
|
37c7490ac6 | ||
|
091624f9da | ||
|
7e5dc339de | ||
|
4a69fa04e0 | ||
|
2e99cd30c3 | ||
|
25afc2a783 | ||
|
9603b66012 | ||
|
45aab4d30b | ||
|
ed2bfe93aa | ||
|
cdc783510b | ||
|
cf0efe9636 | ||
|
dedb177029 | ||
|
86c3bbbced | ||
|
4b3a607658 | ||
|
3a7d35b982 | ||
|
6496ccb413 | ||
|
3fcce30289 | ||
|
c2b2c7e138 | ||
|
dacb3a864a | ||
|
6066d03db0 | ||
|
6562d34a8c | ||
|
5e9e3d0f6b | ||
|
349fc5c705 | ||
|
2c3e0af93e | ||
|
6150502e47 | ||
|
b207d5ebd4 | ||
|
4191779dcd | ||
|
f97ec8bcb9 | ||
|
8276d3b87a | ||
|
af95ee94b4 | ||
|
8fb6af6bba | ||
|
f6af0f888b | ||
|
e816c9d158 | ||
|
9250181f37 | ||
|
f096ec2625 | ||
|
4c8ab6fd71 | ||
|
05d4612947 | ||
|
746a695b36 | ||
|
165c54e97d | ||
|
2896dd73bc | ||
|
f8fd510eb4 | ||
|
7a3e849f6e | ||
|
196c6ba067 | ||
|
165620e320 | ||
|
4fd350611c | ||
|
263fef43de | ||
|
a249ab83cb | ||
|
f7043ef39c | ||
|
64fc49aba0 | ||
|
245023a861 | ||
|
3c77a54d5d | ||
|
da30a20a4d | ||
|
1fe48afea5 | ||
|
42e05be867 | ||
|
fe45b0e060 | ||
|
a06e1498aa | ||
|
5a80e7b43a | ||
|
3fb2a23029 | ||
|
7be15d4097 | ||
|
cd10b3ea63 | ||
|
547993dcd0 | ||
|
6c9b71bc08 | ||
|
93b8404599 | ||
|
9ba1e1dcc0 | ||
|
b8079a40bc | ||
|
5bc8a73af6 | ||
|
b3eaeded12 | ||
|
ec65b391cb | ||
|
2982514072 | ||
|
98908bcf7c | ||
|
04b32c8f96 | ||
|
40eec6b15c | ||
|
39efc6e3e0 | ||
|
1198fe14a1 | ||
|
71e90766b5 | ||
|
d7aae610f6 | ||
|
92c27a0dbf | ||
|
d181cff685 | ||
|
3b4b82d4ce | ||
|
545ef4f531 | ||
|
906b87cf5f | ||
|
b281aad2dc | ||
|
6b18a24e6e | ||
|
c9de980106 | ||
|
f9b373afda | ||
|
298a120ab7 | ||
|
e3faecde30 | ||
|
a0f071a50d | ||
|
20bad91d76 | ||
|
b54a2da433 | ||
|
dc2c37f316 | ||
|
c1f62dd338 | ||
|
5a3efcd27c | ||
|
4c8f9c2577 | ||
|
f26a298247 | ||
|
ea01cdbf61 | ||
|
6a76b53355 | ||
|
d37708fc86 | ||
|
5c13c28566 | ||
|
f70e9229e6 | ||
|
30afe4aeb2 | ||
|
75fa990dc6 | ||
|
f39ffc5877 | ||
|
07ea9c9b05 | ||
|
073ac1225f | ||
|
0c6422cdd6 | ||
|
08773689f3 | ||
|
0c75abbb7b | ||
|
97653f81b2 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24.1**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.08.24.1
|
||||
[debug] youtube-dl version 2016.09.11.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
AUTHORS
4
AUTHORS
@@ -181,3 +181,7 @@ Nehal Patel
|
||||
Rob van Bekkum
|
||||
Petr Zvoníček
|
||||
Pratyush Singh
|
||||
Aleksander Nitecki
|
||||
Sebastian Blunt
|
||||
Matěj Cepl
|
||||
Xie Yanbo
|
||||
|
148
ChangeLog
148
ChangeLog
@@ -1,3 +1,151 @@
|
||||
version 2016.09.11.1
|
||||
|
||||
Extractors
|
||||
+ [tube8] Extract categories and tags (#10579)
|
||||
+ [pornhub] Extract categories and tags (#10499)
|
||||
* [openload] Temporary fix (#10408)
|
||||
+ [foxnews] Add support Fox News articles (#10598)
|
||||
* [viafree] Improve video id extraction (#10615)
|
||||
* [iwara] Fix extraction after relaunch (#10462, #3215)
|
||||
+ [tfo] Add extractor for tfo.org
|
||||
* [lrt] Fix audio extraction (#10566)
|
||||
* [9now] Fix extraction (#10561)
|
||||
+ [canalplus] Add support for c8.fr (#10577)
|
||||
* [newgrounds] Fix uploader extraction (#10584)
|
||||
+ [polskieradio:category] Add support for category lists (#10576)
|
||||
+ [ketnet] Add extractor for ketnet.be (#10343)
|
||||
+ [canvas] Add support for een.be (#10605)
|
||||
+ [telequebec] Add extractor for telequebec.tv (#1999)
|
||||
* [parliamentliveuk] Fix extraction (#9137)
|
||||
|
||||
|
||||
version 2016.09.08
|
||||
|
||||
Extractors
|
||||
+ [jwplatform] Extract height from format label
|
||||
+ [yahoo] Extract Brightcove Legacy Studio embeds (#9345)
|
||||
* [videomore] Fix extraction (#10592)
|
||||
* [foxgay] Fix extraction (#10480)
|
||||
+ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709)
|
||||
* [gamestar] Fix metadata extraction (#10479)
|
||||
* [puls4] Fix extraction (#10583)
|
||||
+ [cctv] Add extractor for CCTV and CNTV (#8153)
|
||||
+ [lci] Add extractor for lci.fr (#10573)
|
||||
+ [wat] Extract DASH formats
|
||||
+ [viafree] Improve video id detection (#10569)
|
||||
+ [trutv] Add extractor for trutv.com (#10519)
|
||||
+ [nick] Add support for nickelodeon.nl (#10559)
|
||||
+ [abcotvs:clips] Add support for clips.abcotvs.com
|
||||
+ [abcotvs] Add support for ABC Owned Television Stations sites (#9551)
|
||||
+ [miaopai] Add extractor for miaopai.com (#10556)
|
||||
* [gamestar] Fix metadata extraction (#10479)
|
||||
+ [bilibili] Add support for episodes (#10190)
|
||||
+ [tvnoe] Add extractor for tvnoe.cz (#10524)
|
||||
|
||||
|
||||
version 2016.09.04.1
|
||||
|
||||
Core
|
||||
* In DASH downloader if the first segment fails, abort the whole download
|
||||
process to prevent throttling (#10497)
|
||||
+ Add support for --skip-unavailable-fragments and --fragment retries in
|
||||
hlsnative downloader (#10165, #10448).
|
||||
+ Add support for --skip-unavailable-fragments in DASH downloader
|
||||
+ Introduce --skip-unavailable-fragments option for fragment based downloaders
|
||||
that allows to skip fragments unavailable due to a HTTP error
|
||||
* Fix extraction of video/audio entries with src attribute in
|
||||
_parse_html5_media_entries (#10540)
|
||||
|
||||
Extractors
|
||||
* [theplatform] Relax URL regular expression (#10546)
|
||||
* [youtube:playlist] Extend URL regular expression
|
||||
* [rottentomatoes] Delegate extraction to internetvideoarchive extractor
|
||||
* [internetvideoarchive] Extract all formats
|
||||
* [pornvoisines] Fix extraction (#10469)
|
||||
* [rottentomatoes] Fix extraction (#10467)
|
||||
* [espn] Extend URL regular expression (#10549)
|
||||
* [vimple] Extend URL regular expression (#10547)
|
||||
* [youtube:watchlater] Fix extraction (#10544)
|
||||
* [youjizz] Fix extraction (#10437)
|
||||
+ [foxnews] Add support for FoxNews Insider (#10445)
|
||||
+ [fc2] Recognize Flash player URLs (#10512)
|
||||
|
||||
|
||||
version 2016.09.03
|
||||
|
||||
Core
|
||||
* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in
|
||||
_extract_m3u8_formats (#10522)
|
||||
* Handle semicolon in mimetype2ext
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add support for rental videos' previews (#10532)
|
||||
* [youtube:playlist] Fallback to video extraction for video/playlist URLs when
|
||||
no playlist is actually served (#10537)
|
||||
+ [drtv] Add support for dr.dk/nyheder (#10536)
|
||||
+ [facebook:plugins:video] Add extractor (#10530)
|
||||
+ [go] Add extractor for *.go.com sites
|
||||
* [adobepass] Check for authz_token expiration (#10527)
|
||||
* [nytimes] improve extraction
|
||||
* [thestar] Fix extraction (#10465)
|
||||
* [glide] Fix extraction (#10478)
|
||||
- [exfm] Remove extractor (#10482)
|
||||
* [youporn] Fix categories and tags extraction (#10521)
|
||||
+ [curiositystream] Add extractor for app.curiositystream.com
|
||||
- [thvideo] Remove extractor (#10464)
|
||||
* [movingimage] Fix for the new site name (#10466)
|
||||
+ [cbs] Add support for once formats (#10515)
|
||||
* [limelight] Skip ism snd duplicate manifests
|
||||
+ [porncom] Extract categories and tags (#10510)
|
||||
+ [facebook] Extract timestamp (#10508)
|
||||
+ [yahoo] Extract more formats
|
||||
|
||||
|
||||
version 2016.08.31
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505)
|
||||
* [bandcamp:album] Fix title extraction (#10455)
|
||||
* [pyvideo] Fix extraction (#10468)
|
||||
+ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016)
|
||||
* [9c9media] Extract more metadata
|
||||
* [9c9media] Fix multiple stacks extraction (#10016)
|
||||
* [adultswim] Improve video info extraction (#10492)
|
||||
* [vodplatform] Improve embed regular expression
|
||||
- [played] Remove extractor (#10470)
|
||||
+ [tbs] Add extractor for tbs.com and tntdrama.com (#10222)
|
||||
+ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110)
|
||||
* [adultswim] Rework in terms of turner extractor
|
||||
* [cnn] Rework in terms of turner extractor
|
||||
* [nba] Rework in terms of turner extractor
|
||||
+ [turner] Add base extractor for Turner Broadcasting System based sites
|
||||
* [bilibili] Fix extraction (#10375)
|
||||
* [openload] Fix extraction (#10408)
|
||||
|
||||
|
||||
version 2016.08.28
|
||||
|
||||
Core
|
||||
+ Add warning message that ffmpeg doesn't support SOCKS
|
||||
* Improve thumbnail sorting
|
||||
+ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats
|
||||
* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative
|
||||
+ Add ac-3 to the list of audio codecs in parse_codecs
|
||||
|
||||
Extractors
|
||||
* [periscope:user] Fix extraction (#10453)
|
||||
* [douyutv] Fix extraction (#10153, #10318, #10444)
|
||||
+ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424)
|
||||
- [trutube] Remove extractor (#10438)
|
||||
+ [usanetwork] Add extractor for usanetwork.com
|
||||
* [crackle] Fix extraction (#10333)
|
||||
* [spankbang] Fix description and uploader extraction (#10339)
|
||||
* [discoverygo] Detect cable provider restricted videos (#10425)
|
||||
+ [cbc] Add support for watch.cbc.ca
|
||||
* [kickstarter] Silent the warning for og:description (#10415)
|
||||
* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
|
||||
|
||||
|
||||
version 2016.08.24.1
|
||||
|
||||
Extractors
|
||||
|
59
README.md
59
README.md
@@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
@@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||
is 10), or "infinite" (DASH only)
|
||||
is 10), or "infinite" (DASH and hlsnative
|
||||
only)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH and
|
||||
hlsnative only)
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
@@ -412,11 +417,19 @@ You can configure youtube-dl by placing any supported command line option to a c
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
-x
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
# Lines starting with # are comments
|
||||
|
||||
# Always extract audio
|
||||
-x
|
||||
|
||||
# Do not copy the mtime
|
||||
--no-mtime
|
||||
|
||||
# Use this proxy
|
||||
--proxy 127.0.0.1:3128
|
||||
|
||||
# Save all videos under Movies directory in your home directory
|
||||
-o ~/Movies/%(title)s.%(ext)s
|
||||
```
|
||||
|
||||
Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
|
||||
@@ -730,7 +743,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
|
||||
|
||||
### I have downloaded a video but how can I play it?
|
||||
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/).
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
|
||||
|
||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||
|
||||
@@ -812,10 +825,42 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt
|
||||
|
||||
### How do I pass cookies to youtube-dl?
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
|
||||
|
||||
### How do I stream directly to media player?
|
||||
|
||||
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
|
||||
|
||||
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
||||
|
||||
### How do I download only new videos from a playlist?
|
||||
|
||||
Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
|
||||
|
||||
For example, at first,
|
||||
|
||||
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||
|
||||
will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
|
||||
|
||||
youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
|
||||
|
||||
### Should I add `--hls-prefer-native` into my config?
|
||||
|
||||
When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed.
|
||||
|
||||
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
|
||||
|
||||
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
|
||||
|
||||
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
|
||||
|
||||
### Can you add support for this anime video site, or site which shows current movies for free?
|
||||
|
||||
As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
|
||||
|
@@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e
|
||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||
|
||||
read -p "Is ChangeLog up to date? (y/n) " -n 1
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make clean
|
||||
if $skip_tests ; then
|
||||
|
@@ -13,13 +13,16 @@
|
||||
- **5min**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
- **9c9media:stack**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
- **abc.net.au:iview**
|
||||
- **Abc7News**
|
||||
- **abcnews**
|
||||
- **abcnews:video**
|
||||
- **abcotvs**: ABC Owned Television Stations
|
||||
- **abcotvs:clips**
|
||||
- **AcademicEarth:Course**
|
||||
- **acast**
|
||||
- **acast:channel**
|
||||
@@ -115,14 +118,18 @@
|
||||
- **Canvas**
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:watch**
|
||||
- **cbc.ca:watch:video**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CCTV**
|
||||
- **CDA**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
@@ -166,6 +173,8 @@
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
- **CWTV**
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
@@ -218,13 +227,14 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **FacebookPluginsVideo**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **fc2:embed**
|
||||
- **Fczenit**
|
||||
- **features.aol.com**
|
||||
- **fernsehkritik.tv**
|
||||
@@ -237,7 +247,9 @@
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **Foxgay**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
@@ -266,6 +278,7 @@
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **Go**
|
||||
- **GodTube**
|
||||
- **GodTV**
|
||||
- **Golem**
|
||||
@@ -314,6 +327,7 @@
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Iwara**
|
||||
- **Izlesene**
|
||||
- **JeuxVideo**
|
||||
- **Jove**
|
||||
@@ -327,6 +341,7 @@
|
||||
- **KarriereVideos**
|
||||
- **keek**
|
||||
- **KeezMovies**
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KonserthusetPlay**
|
||||
@@ -342,6 +357,7 @@
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
@@ -380,6 +396,7 @@
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
- **MGTV**: 芒果TV
|
||||
- **MiaoPai**
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **Minoto**
|
||||
@@ -401,6 +418,7 @@
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviezine**
|
||||
- **MovingImage**
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **mtg**: MTG services
|
||||
@@ -448,6 +466,7 @@
|
||||
- **NextMediaActionNews**: 蘋果日報 - 動新聞
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
- **nhl.com**
|
||||
- **nhl.com:news**: NHL news
|
||||
- **nhl.com:videocenter**
|
||||
@@ -456,7 +475,6 @@
|
||||
- **nick.de**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **NineCNineMedia**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
@@ -514,7 +532,6 @@
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
@@ -526,6 +543,7 @@
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **PornCom**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
@@ -566,6 +584,7 @@
|
||||
- **revision3:embed**
|
||||
- **RICE**
|
||||
- **RingTV**
|
||||
- **RMCDecouverte**
|
||||
- **RockstarGames**
|
||||
- **RoosterTeeth**
|
||||
- **RottenTomatoes**
|
||||
@@ -655,7 +674,6 @@
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
@@ -672,6 +690,7 @@
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
@@ -686,9 +705,11 @@
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleQuebec**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
@@ -697,8 +718,6 @@
|
||||
- **TheStar**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **THVideo**
|
||||
- **THVideoPlaylist**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
@@ -712,8 +731,7 @@
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **trollvids**
|
||||
- **TruTube**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
- **tudou**
|
||||
@@ -735,6 +753,7 @@
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **TVNoe**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
@@ -758,6 +777,7 @@
|
||||
- **uplynk:preplay**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
- **USAToday**
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
|
@@ -39,6 +39,7 @@ from youtube_dl.utils import (
|
||||
is_html,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
@@ -625,6 +626,14 @@ class TestUtil(unittest.TestCase):
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_mimetype2ext(self):
|
||||
self.assertEqual(mimetype2ext(None), None)
|
||||
self.assertEqual(mimetype2ext('video/x-flv'), 'flv')
|
||||
self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8')
|
||||
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
|
@@ -1256,8 +1256,10 @@ class YoutubeDL(object):
|
||||
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('preference'), t.get('width'), t.get('height'),
|
||||
t.get('id'), t.get('url')))
|
||||
t.get('preference') if t.get('preference') is not None else -1,
|
||||
t.get('width') if t.get('width') is not None else -1,
|
||||
t.get('height') if t.get('height') is not None else -1,
|
||||
t.get('id') if t.get('id') is not None else '', t.get('url')))
|
||||
for i, t in enumerate(thumbnails):
|
||||
t['url'] = sanitize_url(t['url'])
|
||||
if t.get('width') and t.get('height'):
|
||||
|
@@ -318,6 +318,7 @@ def _real_main(argv=None):
|
||||
'nooverwrites': opts.nooverwrites,
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
|
@@ -38,8 +38,10 @@ class DashSegmentsFD(FragmentFD):
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
def append_url_to_file(target_url, tmp_filename, segment_name):
|
||||
def process_segment(segment, tmp_filename, fatal):
|
||||
target_url, segment_name = segment
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
@@ -52,26 +54,35 @@ class DashSegmentsFD(FragmentFD):
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attemps
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# So, we will retry all fragments that fail with 404 HTTP error for now.
|
||||
if err.code != 404:
|
||||
raise
|
||||
# Retry fragment
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(segment_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(segment_name)
|
||||
return True
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
return True
|
||||
|
||||
if initialization_url:
|
||||
append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
|
||||
segments_to_download = [(initialization_url, 'Init')] if initialization_url else []
|
||||
segments_to_download.extend([
|
||||
(segment_url, 'Seg%d' % i)
|
||||
for i, segment_url in enumerate(segment_urls)])
|
||||
|
||||
for i, segment in enumerate(segments_to_download):
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
if not process_segment(segment, ctx['tmpfilename'], fatal):
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
|
@@ -220,6 +220,12 @@ class FFmpegFD(ExternalFD):
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
proxy = 'http://%s' % proxy
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
# We could switch to the following code if we are able to detect version properly
|
||||
|
@@ -6,6 +6,7 @@ import time
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
)
|
||||
@@ -22,13 +23,19 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
Available options:
|
||||
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
|
||||
fragment_retries: Number of times to retry a fragment for HTTP error (DASH
|
||||
and hlsnative only)
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, fragment_name, count, retries):
|
||||
def report_retry_fragment(self, err, fragment_name, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (fragment_name, count, self.format_retries(retries)))
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
|
||||
|
||||
def report_skip_fragment(self, fragment_name):
|
||||
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
|
@@ -13,6 +13,7 @@ from .fragment import FragmentFD
|
||||
from .external import FFmpegFD
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_error,
|
||||
compat_urlparse,
|
||||
compat_struct_pack,
|
||||
)
|
||||
@@ -83,7 +84,14 @@ class HlsFD(FragmentFD):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
extra_query = None
|
||||
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
|
||||
if extra_param_to_segment_url:
|
||||
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
|
||||
i = 0
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
@@ -96,15 +104,37 @@ class HlsFD(FragmentFD):
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
|
||||
if extra_param_to_segment_url:
|
||||
frag_url = update_url_query(frag_url, extra_param_to_segment_url)
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
frag_name = 'Frag%d' % i
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
# First we try to retry then either skip or abort.
|
||||
# See https://github.com/rg3/youtube-dl/issues/10165,
|
||||
# https://github.com/rg3/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
self.report_skip_fragment(frag_name)
|
||||
continue
|
||||
self.report_error(
|
||||
'giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
frag_content = AES.new(
|
||||
@@ -112,7 +142,7 @@ class HlsFD(FragmentFD):
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
# We only download the first fragment during the test
|
||||
if self.params.get('test', False):
|
||||
if test:
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
@@ -120,12 +150,12 @@ class HlsFD(FragmentFD):
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
|
||||
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
|
||||
if not re.match(r'^https?://', decrypt_info['URI']):
|
||||
decrypt_info['URI'] = compat_urlparse.urljoin(
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_param_to_segment_url:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url)
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
|
@@ -100,6 +100,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||
@@ -112,6 +113,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1471719600,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -12,7 +12,7 @@ from ..compat import compat_urlparse
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||
@@ -49,7 +49,7 @@ class AbcNewsVideoIE(AMPIE):
|
||||
|
||||
class AbcNewsIE(InfoExtractor):
|
||||
IE_NAME = 'abcnews'
|
||||
_VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
|
@@ -1,13 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class Abc7NewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://abc7news\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||
class ABCOTVSIE(InfoExtractor):
|
||||
IE_NAME = 'abcotvs'
|
||||
IE_DESC = 'ABC Owned Television Stations'
|
||||
_VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
|
||||
@@ -15,7 +21,7 @@ class Abc7NewsIE(InfoExtractor):
|
||||
'id': '472581',
|
||||
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
|
||||
'ext': 'mp4',
|
||||
'title': 'East Bay museum celebrates history of synthesized music',
|
||||
'title': 'East Bay museum celebrates vintage synthesizers',
|
||||
'description': 'md5:a4f10fb2f2a02565c1749d4adbab4b10',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1421123075,
|
||||
@@ -41,7 +47,7 @@ class Abc7NewsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
m3u8 = self._html_search_meta(
|
||||
'contentURL', webpage, 'm3u8 url', fatal=True)
|
||||
'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0]
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
@@ -66,3 +72,41 @@ class Abc7NewsIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ABCOTVSClipsIE(InfoExtractor):
|
||||
IE_NAME = 'abcotvs:clips'
|
||||
_VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://clips.abcotvs.com/kabc/video/214814',
|
||||
'info_dict': {
|
||||
'id': '214814',
|
||||
'ext': 'mp4',
|
||||
'title': 'SpaceX launch pad explosion destroys rocket, satellite',
|
||||
'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
|
||||
'upload_date': '20160901',
|
||||
'timestamp': 1472756695,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
|
||||
title = video_data['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videoURL'].split('?')[0], video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('thumbnailURL'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('pubDate')),
|
||||
'formats': formats,
|
||||
}
|
@@ -37,6 +37,10 @@ class AdobePassIE(InfoExtractor):
|
||||
return self._search_regex(
|
||||
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
|
||||
|
||||
def is_expired(token, date_ele):
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
|
||||
return token_expires and token_expires <= int(time.time())
|
||||
|
||||
mvpd_headers = {
|
||||
'ap_42': 'anonymous',
|
||||
'ap_11': 'Linux i686',
|
||||
@@ -47,11 +51,8 @@ class AdobePassIE(InfoExtractor):
|
||||
guid = xml_text(resource, 'guid')
|
||||
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token:
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires')))
|
||||
if token_expires and token_expires <= int(time.time()):
|
||||
authn_token = None
|
||||
requestor_info = {}
|
||||
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
# TODO add support for other TV Providers
|
||||
mso_id = 'DTV'
|
||||
@@ -98,6 +99,8 @@ class AdobePassIE(InfoExtractor):
|
||||
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
|
||||
|
||||
authz_token = requestor_info.get(guid)
|
||||
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
|
||||
authz_token = None
|
||||
if not authz_token:
|
||||
authorize = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
|
||||
|
@@ -3,16 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
class AdultSwimIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -96,7 +94,8 @@ class AdultSwimIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -148,7 +147,10 @@ class AdultSwimIE(InfoExtractor):
|
||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||
video_info = bootstrapped_data['slugged_video']
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
|
||||
video_info = bootstrapped_data.get(
|
||||
'heroMetadata', {}).get('trailer', {}).get('video')
|
||||
if not video_info:
|
||||
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
|
||||
if not video_info:
|
||||
raise ExtractorError('Unable to find video info')
|
||||
|
||||
@@ -162,70 +164,38 @@ class AdultSwimIE(InfoExtractor):
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.'
|
||||
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
||||
expected=True)
|
||||
if video_info.get('auth') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream or clips')
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
episode_description = video_info['description']
|
||||
episode_duration = video_info.get('duration')
|
||||
episode_description = video_info.get('description')
|
||||
episode_duration = int_or_none(video_info.get('duration'))
|
||||
view_count = int_or_none(video_info.get('views'))
|
||||
|
||||
entries = []
|
||||
for part_num, segment_id in enumerate(segment_ids):
|
||||
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id
|
||||
|
||||
segement_info = self._extract_cvp_info(
|
||||
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
|
||||
segment_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
|
||||
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
})
|
||||
segment_title = '%s - %s' % (show_title, episode_title)
|
||||
if len(segment_ids) > 1:
|
||||
segment_title += ' Part %d' % (part_num + 1)
|
||||
|
||||
idoc = self._download_xml(
|
||||
segment_url, segment_title,
|
||||
'Downloading segment information', 'Unable to download segment information')
|
||||
|
||||
segment_duration = float_or_none(
|
||||
xpath_text(idoc, './/trt', 'segment duration').strip())
|
||||
|
||||
formats = []
|
||||
file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
|
||||
|
||||
unique_urls = []
|
||||
unique_file_els = []
|
||||
for file_el in file_els:
|
||||
media_url = file_el.text
|
||||
if not media_url or determine_ext(media_url) == 'f4m':
|
||||
continue
|
||||
if file_el.text not in unique_urls:
|
||||
unique_urls.append(file_el.text)
|
||||
unique_file_els.append(file_el)
|
||||
|
||||
for file_el in unique_file_els:
|
||||
bitrate = file_el.attrib.get('bitrate')
|
||||
ftype = file_el.attrib.get('type')
|
||||
media_url = file_el.text
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, segment_title, 'mp4', preference=0,
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
'url': file_el.text.strip(),
|
||||
# The bitrate may not be a number (for example: 'iphone')
|
||||
'tbr': int(bitrate) if bitrate.isdigit() else None,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
segement_info.update({
|
||||
'id': segment_id,
|
||||
'title': segment_title,
|
||||
'formats': formats,
|
||||
'duration': segment_duration,
|
||||
'description': episode_description
|
||||
'description': episode_description,
|
||||
})
|
||||
entries.append(segement_info)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
@@ -234,5 +204,6 @@ class AdultSwimIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
'title': '%s - %s' % (show_title, episode_title),
|
||||
'description': episode_description,
|
||||
'duration': episode_duration
|
||||
'duration': episode_duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@@ -238,7 +238,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = '(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'md5': 'd216c3a86493f9322545e045ddc3eb35',
|
||||
|
@@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'uploader_id': 'dotscale',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
# with escaped quote in title
|
||||
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
|
||||
'info_dict': {
|
||||
'title': '"Entropy" EP',
|
||||
'uploader_id': 'jstrecords',
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
title = self._search_regex(
|
||||
r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = title.replace(r'\"', '"')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
|
@@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class BetIE(MTVServicesInfoExtractor):
|
||||
@@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor):
|
||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return compat_urllib_parse_urlencode({
|
||||
return {
|
||||
'uuid': uri,
|
||||
})
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
|
||||
|
@@ -1,33 +1,27 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'id': '1074402',
|
||||
'ext': 'mp4',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
@@ -41,24 +35,28 @@ class BiliBiliIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
'info_dict': {
|
||||
'id': '1507019',
|
||||
'id': '1041170',
|
||||
'ext': 'mp4',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'duration': 3382.259,
|
||||
'timestamp': 1396530060,
|
||||
'upload_date': '20140403',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '枫叶逝去',
|
||||
'uploader_id': '520116',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '7802182',
|
||||
'id': '4808130',
|
||||
'ext': 'mp4',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'duration': 1493.995,
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
@@ -66,100 +64,95 @@ class BiliBiliIE(InfoExtractor):
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '2880301',
|
||||
'id': '1867637',
|
||||
'ext': 'mp4',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'duration': 5760.0,
|
||||
'uploader': '黑夜为猫',
|
||||
'uploader_id': '610729',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# Just to test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['upload time'],
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/v/40068',
|
||||
'md5': '08d539a0884f3deb7b698fb13ba69696',
|
||||
'info_dict': {
|
||||
'id': '40068',
|
||||
'ext': 'mp4',
|
||||
'duration': 1402.357,
|
||||
'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
'thumbnail': 're:^http?://.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
_APP_KEY = '6f90a59ac58a4123'
|
||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
if 'anime/v' not in url:
|
||||
cid = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))['cid'][0]
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
js = self._download_json(
|
||||
'http://bangumi.bilibili.com/web_api/get_source', video_id,
|
||||
data=urlencode_postdata({'episode_id': video_id}),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
|
||||
cid = js['result']['cid']
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid)
|
||||
sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign),
|
||||
video_id, note='Downloading video info page')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
for idx, durl in enumerate(video_info['durl']):
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
for backup_url in durl['backup_url']:
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
'preference': -2 if 'hd.mp4' in backup_url else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'id': '%s_part%s' % (video_id, idx),
|
||||
'duration': float_or_none(durl.get('length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
timestamp = None
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False))
|
||||
thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
|
@@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor):
|
||||
(?:(?:www|m)\.)?canalplus\.fr|
|
||||
(?:www\.)?piwiplus\.fr|
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
@@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'canalplus': 'cplus',
|
||||
'piwiplus': 'teletoon',
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
@@ -1,11 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||
@@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
|
||||
'display_id': 'herbekijk-sorry-voor-alles',
|
||||
'ext': 'mp4',
|
||||
'title': 'Herbekijk Sorry voor alles',
|
||||
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 3788.06,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, display_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._search_regex(
|
||||
title = (self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage)).strip()
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), display_id)
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
|
36
youtube_dl/extractor/cartoonnetwork.py
Normal file
36
youtube_dl/extractor/cartoonnetwork.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
|
||||
'info_dict': {
|
||||
'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starfire the Cat Lady',
|
||||
'description': 'Robin decides to become a cat so that Starfire will finally love him.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
|
||||
query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
|
||||
return self._extract_cvp_info(
|
||||
'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
})
|
@@ -9,10 +9,19 @@ from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
xpath_with_ns,
|
||||
find_xpath_attr,
|
||||
parse_iso8601,
|
||||
parse_age_limit,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
@@ -114,6 +123,7 @@ class CBCIE(InfoExtractor):
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player'
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
@@ -167,3 +177,165 @@ class CBCPlayerIE(InfoExtractor):
|
||||
}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class CBCWatchBaseIE(InfoExtractor):
|
||||
_device_id = None
|
||||
_device_token = None
|
||||
_API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
|
||||
_NS_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||
result = self._download_xml(url, video_id, headers={
|
||||
'X-Clearleap-DeviceId': self._device_id,
|
||||
'X-Clearleap-DeviceToken': self._device_token,
|
||||
})
|
||||
error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
|
||||
if error_message:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
|
||||
return result
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._device_id or not self._device_token:
|
||||
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
||||
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||
if not self._device_id or not self._device_token:
|
||||
result = self._download_xml(
|
||||
self._API_BASE_URL + 'device/register',
|
||||
None, data=b'<device><type>web</type></device>')
|
||||
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||
self._downloader.cache.store(
|
||||
'cbcwatch', 'device', {
|
||||
'id': self._device_id,
|
||||
'token': self._device_token,
|
||||
})
|
||||
|
||||
def _parse_rss_feed(self, rss):
|
||||
channel = xpath_element(rss, 'channel', fatal=True)
|
||||
|
||||
def _add_ns(path):
|
||||
return xpath_with_ns(path, self._NS_MAP)
|
||||
|
||||
entries = []
|
||||
for item in channel.findall('item'):
|
||||
guid = xpath_text(item, 'guid', fatal=True)
|
||||
title = xpath_text(item, 'title', fatal=True)
|
||||
|
||||
media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
|
||||
content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
|
||||
content_url = content.attrib['url']
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail.get('profile'),
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
timestamp = None
|
||||
release_date = find_xpath_attr(
|
||||
item, _add_ns('media:credit'), 'role', 'releaseDate')
|
||||
if release_date is not None:
|
||||
timestamp = parse_iso8601(release_date.text)
|
||||
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': content_url,
|
||||
'id': guid,
|
||||
'title': title,
|
||||
'description': xpath_text(item, 'description'),
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(content.get('duration')),
|
||||
'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
|
||||
'episode': xpath_text(item, _add_ns('clearleap:episode')),
|
||||
'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
|
||||
'series': xpath_text(item, _add_ns('clearleap:series')),
|
||||
'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
|
||||
'thumbnails': thumbnails,
|
||||
'ie_key': 'CBCWatchVideo',
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, xpath_text(channel, 'guid'),
|
||||
xpath_text(channel, 'title'),
|
||||
xpath_text(channel, 'description'))
|
||||
|
||||
|
||||
class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||
IE_NAME = 'cbc.ca:watch:video'
|
||||
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
result = self._call_api(url, video_id)
|
||||
|
||||
m3u8_url = xpath_text(result, 'url', fatal=True)
|
||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||
if len(formats) < 2:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
# Despite metadata in m3u8 all video+audio formats are
|
||||
# actually video-only (no audio)
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = 'none'
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
rss = xpath_element(result, 'rss')
|
||||
if rss:
|
||||
info.update(self._parse_rss_feed(rss)['entries'][0])
|
||||
del info['url']
|
||||
del info['_type']
|
||||
del info['ie_key']
|
||||
return info
|
||||
|
||||
|
||||
class CBCWatchIE(CBCWatchBaseIE):
|
||||
IE_NAME = 'cbc.ca:watch'
|
||||
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
||||
'info_dict': {
|
||||
'id': '38e815a-009e3ab12e4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Customer (Dis)Service',
|
||||
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
||||
'upload_date': '20160219',
|
||||
'timestamp': 1455840000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}, {
|
||||
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||
'info_dict': {
|
||||
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||
'title': 'Arthur',
|
||||
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
rss = self._call_api('web/browse/' + video_id, video_id)
|
||||
return self._parse_rss_feed(rss)
|
||||
|
@@ -51,7 +51,7 @@ class CBSIE(CBSBaseIE):
|
||||
path = 'dJ5BDC/media/guid/2198311517/' + guid
|
||||
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid)
|
||||
for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
|
||||
for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
|
||||
try:
|
||||
tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0])
|
||||
formats.extend(tp_formats)
|
||||
|
53
youtube_dl/extractor/cctv.py
Normal file
53
youtube_dl/extractor/cctv.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class CCTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:.+?\.)?
|
||||
(?:
|
||||
cctv\.(?:com|cn)|
|
||||
cntv\.cn
|
||||
)/
|
||||
(?:
|
||||
video/[^/]+/(?P<id>[0-9a-f]{32})|
|
||||
\d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
|
||||
'md5': '819c7b49fc3927d529fb4cd555621823',
|
||||
'info_dict': {
|
||||
'id': '454368eb19ad44a1925bf1eb96140a61',
|
||||
'ext': 'mp4',
|
||||
'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
|
||||
webpage, 'video_id')
|
||||
api_data = self._download_json(
|
||||
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
|
||||
m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': api_data['title'],
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
|
||||
'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
|
||||
}
|
@@ -3,14 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_basename,
|
||||
)
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
class CNNIE(TurnerBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
|
||||
|
||||
@@ -25,6 +22,7 @@ class CNNIE(InfoExtractor):
|
||||
'duration': 135,
|
||||
'upload_date': '20130609',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29',
|
||||
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
|
||||
@@ -34,7 +32,8 @@ class CNNIE(InfoExtractor):
|
||||
'title': "Student's epic speech stuns new freshmen",
|
||||
'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||
'upload_date': '20130821',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||
@@ -44,7 +43,8 @@ class CNNIE(InfoExtractor):
|
||||
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html',
|
||||
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
|
||||
@@ -54,7 +54,11 @@ class CNNIE(InfoExtractor):
|
||||
'title': '5 stunning stats about Netflix',
|
||||
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.',
|
||||
'upload_date': '20160819',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
@@ -79,72 +83,21 @@ class CNNIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _extract_timestamp(self, video_data):
|
||||
# TODO: fix timestamp extraction
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, path, page_title = re.match(self._VALID_URL, url).groups()
|
||||
if sub_domain not in ('money', 'edition'):
|
||||
sub_domain = 'edition'
|
||||
config = self._CONFIG[sub_domain]
|
||||
info_url = config['data_src'] % path
|
||||
info = self._download_xml(info_url, page_title)
|
||||
|
||||
formats = []
|
||||
rex = re.compile(r'''(?x)
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
(?:_(?P<bitrate>[0-9]+)k)?
|
||||
''')
|
||||
for f in info.findall('files/file'):
|
||||
video_url = config['media_src'] + f.text.strip()
|
||||
fdct = {
|
||||
'format_id': f.attrib['bitrate'],
|
||||
'url': video_url,
|
||||
}
|
||||
|
||||
mf = rex.match(f.attrib['bitrate'])
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mf = rex.search(f.text)
|
||||
if mf:
|
||||
fdct['width'] = int(mf.group('width'))
|
||||
fdct['height'] = int(mf.group('height'))
|
||||
fdct['tbr'] = int_or_none(mf.group('bitrate'))
|
||||
else:
|
||||
mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate'])
|
||||
if mi:
|
||||
if mi.group(1) == 'audio':
|
||||
fdct['vcodec'] = 'none'
|
||||
fdct['ext'] = 'm4a'
|
||||
else:
|
||||
fdct['tbr'] = int(mi.group(1))
|
||||
|
||||
formats.append(fdct)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'height': int(t.attrib['height']),
|
||||
'width': int(t.attrib['width']),
|
||||
'url': t.text,
|
||||
} for t in info.findall('images/image')]
|
||||
|
||||
metas_el = info.find('metas')
|
||||
upload_date = (
|
||||
metas_el.attrib.get('version') if metas_el is not None else None)
|
||||
|
||||
duration_el = info.find('length')
|
||||
duration = parse_duration(duration_el.text)
|
||||
|
||||
return {
|
||||
'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': info.find('description').text,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
return self._extract_cvp_info(
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
class CNNBlogsIE(InfoExtractor):
|
||||
@@ -159,6 +112,7 @@ class CNNBlogsIE(InfoExtractor):
|
||||
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
|
||||
'upload_date': '20140209',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
@@ -181,9 +135,10 @@ class CNNArticleIE(InfoExtractor):
|
||||
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
|
||||
'ext': 'mp4',
|
||||
'title': 'Obama: Cyberattack not an act of war',
|
||||
'description': 'md5:51ce6750450603795cad0cdfbd7d05c5',
|
||||
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
|
||||
'upload_date': '20141221',
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'add_ie': ['CNN'],
|
||||
}
|
||||
|
||||
|
@@ -1163,13 +1163,6 @@ class InfoExtractor(object):
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
@@ -1180,6 +1173,13 @@ class InfoExtractor(object):
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
# We should try extracting formats only from master playlists [1], i.e.
|
||||
# playlists that describe available qualities. On the other hand media
|
||||
# playlists [2] should be returned as is since they contain just the media
|
||||
@@ -1201,27 +1201,44 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
last_info = None
|
||||
last_media = None
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
last_media = parse_m3u8_attributes(line)
|
||||
media = parse_m3u8_attributes(line)
|
||||
media_type = media.get('TYPE')
|
||||
if media_type in ('VIDEO', 'AUDIO'):
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
format_id = []
|
||||
for v in (media.get('GROUP-ID'), media.get('NAME')):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(media_url),
|
||||
'language': media.get('LANGUAGE'),
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
})
|
||||
else:
|
||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||
# data be used by regular URI lines below
|
||||
last_media = media
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
if last_info is None:
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media_name
|
||||
stream_name = last_info.get('NAME') or last_media.get('NAME')
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
@@ -1252,11 +1269,9 @@ class InfoExtractor(object):
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if last_media is not None:
|
||||
f['m3u8_media'] = last_media
|
||||
last_media = None
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
@@ -1734,7 +1749,7 @@ class InfoExtractor(object):
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
if src:
|
||||
_, formats = _media_formats(src)
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
media_info['thumbnail'] = media_attributes.get('poster')
|
||||
if media_content:
|
||||
|
@@ -1,5 +1,5 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import unicode_literals, division
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
@@ -8,12 +8,22 @@ from ..utils import int_or_none
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||
'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934',
|
||||
'info_dict': {
|
||||
'id': '2496419',
|
||||
'id': '2498934',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heavy Lies the Head',
|
||||
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||
'title': 'Everybody Respects A Bloody Nose',
|
||||
'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 906,
|
||||
'series': 'Comedians In Cars Getting Coffee',
|
||||
'season_number': 8,
|
||||
'episode_number': 4,
|
||||
'subtitles': {
|
||||
'en-US': [{
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -21,12 +31,8 @@ class CrackleIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
@@ -48,16 +54,21 @@ class CrackleIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config_doc = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16',
|
||||
video_id, 'Downloading config')
|
||||
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
thumbnail = None
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
thumbnail = None
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
@@ -76,7 +87,7 @@ class CrackleIE(InfoExtractor):
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||
'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
@@ -85,7 +96,7 @@ class CrackleIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||
'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
|
@@ -1,11 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
@@ -18,13 +20,27 @@ class CTVIE(InfoExtractor):
|
||||
'timestamp': 1442624700,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
if domain == 'thecomedynetwork':
|
||||
domain = 'comedy'
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:ctv_web:%s' % video_id,
|
||||
'url': '9c9media:%s_web:%s' % (domain, video_id),
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
120
youtube_dl/extractor/curiositystream.py
Normal file
120
youtube_dl/extractor/curiositystream.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CuriosityStreamBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'curiositystream'
|
||||
_auth_token = None
|
||||
_API_BASE_URL = 'https://api.curiositystream.com/v1/'
|
||||
|
||||
def _handle_errors(self, result):
|
||||
error = result.get('error', {}).get('message')
|
||||
if error:
|
||||
if isinstance(error, dict):
|
||||
error = ', '.join(error.values())
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
headers = {}
|
||||
if self._auth_token:
|
||||
headers['X-Auth-Token'] = self._auth_token
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + path, video_id, headers=headers)
|
||||
self._handle_errors(result)
|
||||
return result['data']
|
||||
|
||||
def _real_initialize(self):
|
||||
(email, password) = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
result = self._download_json(
|
||||
self._API_BASE_URL + 'login', None, data=urlencode_postdata({
|
||||
'email': email,
|
||||
'password': password,
|
||||
}))
|
||||
self._handle_errors(result)
|
||||
self._auth_token = result['message']['auth_token']
|
||||
|
||||
def _extract_media_info(self, media):
|
||||
video_id = compat_str(media['id'])
|
||||
limelight_media_id = media['limelight_media_id']
|
||||
title = media['title']
|
||||
|
||||
subtitles = {}
|
||||
for closed_caption in media.get('closed_captions', []):
|
||||
sub_url = closed_caption.get('file')
|
||||
if not sub_url:
|
||||
continue
|
||||
lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:' + limelight_media_id,
|
||||
'title': title,
|
||||
'description': media.get('description'),
|
||||
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'tags': media.get('tags'),
|
||||
'subtitles': subtitles,
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/video/2',
|
||||
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Did You Develop The Internet?',
|
||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||
'timestamp': 1448388615,
|
||||
'upload_date': '20151124',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
media = self._call_api('media/' + video_id, video_id)
|
||||
return self._extract_media_info(media)
|
||||
|
||||
|
||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||
IE_NAME = 'curiositystream:collection'
|
||||
_VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://app.curiositystream.com/collection/2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'Curious Minds: The Internet',
|
||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
collection = self._call_api(
|
||||
'collections/' + collection_id, collection_id)
|
||||
entries = []
|
||||
for media in collection.get('media', []):
|
||||
entries.append(self._extract_media_info(media))
|
||||
return self.playlist_result(
|
||||
entries, collection_id,
|
||||
collection.get('title'), collection.get('description'))
|
@@ -394,7 +394,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
|
||||
|
||||
class DailymotionCloudIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL_PREFIX = r'http://api\.dmcloud\.net/(?:player/)?embed/'
|
||||
_VALID_URL_PREFIX = r'https?://api\.dmcloud\.net/(?:player/)?embed/'
|
||||
_VALID_URL = r'%s[^/]+/(?P<id>[^/?]+)' % _VALID_URL_PREFIX
|
||||
_VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX
|
||||
|
||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -53,7 +54,14 @@ class DiscoveryGoIE(InfoExtractor):
|
||||
|
||||
title = video['name']
|
||||
|
||||
stream = video['stream']
|
||||
stream = video.get('stream')
|
||||
if not stream:
|
||||
if video.get('authenticated') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream')
|
||||
STREAM_URL_SUFFIX = 'streamUrl'
|
||||
formats = []
|
||||
for stream_kind in ('', 'hds'):
|
||||
|
@@ -3,9 +3,17 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (ExtractorError, unescapeHTML)
|
||||
from ..compat import (compat_str, compat_basestring)
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
@@ -21,7 +29,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -37,7 +44,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'douyu小漠',
|
||||
'uploader_id': '3769985',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -54,7 +60,6 @@ class DouyuTVIE(InfoExtractor):
|
||||
'description': 're:.*m7show@163\.com.*',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -65,6 +70,10 @@ class DouyuTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf
|
||||
# is encrypted originally, but ffdec can dump memory to get the decrypted one.
|
||||
_API_KEY = 'A12Svb&%1UUmf@hC'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@@ -75,74 +84,56 @@ class DouyuTVIE(InfoExtractor):
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
config = None
|
||||
# Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache"
|
||||
# Retry with different parameters - same parameters cause same errors
|
||||
for i in range(5):
|
||||
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||
room_id, int(time.time()))
|
||||
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||
room = self._download_json(
|
||||
'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
|
||||
note='Downloading room info')['data']
|
||||
|
||||
config_page = self._download_webpage(
|
||||
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||
video_id)
|
||||
try:
|
||||
config = self._parse_json(config_page, video_id, fatal=False)
|
||||
except ExtractorError:
|
||||
# Wait some time before retrying to get a different time() value
|
||||
self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. '
|
||||
'Waiting for %(timeout)s seconds before retrying')
|
||||
continue
|
||||
else:
|
||||
break
|
||||
if config is None:
|
||||
raise ExtractorError('Unable to fetch API result')
|
||||
|
||||
data = config['data']
|
||||
|
||||
error_code = config.get('error', 0)
|
||||
if error_code is not 0:
|
||||
error_desc = 'Server reported error %i' % error_code
|
||||
if isinstance(data, (compat_str, compat_basestring)):
|
||||
error_desc += ': ' + data
|
||||
raise ExtractorError(error_desc, expected=True)
|
||||
|
||||
show_status = data.get('show_status')
|
||||
# 1 = live, 2 = offline
|
||||
if show_status == '2':
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
|
||||
tt = compat_str(int(time.time() / 60))
|
||||
did = uuid.uuid4().hex.upper()
|
||||
|
||||
sign_content = ''.join((room_id, did, self._API_KEY, tt))
|
||||
sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest()
|
||||
|
||||
flv_data = compat_urllib_parse_urlencode({
|
||||
'cdn': 'ws',
|
||||
'rate': '0',
|
||||
'tt': tt,
|
||||
'did': did,
|
||||
'sign': sign,
|
||||
})
|
||||
|
||||
video_info = self._download_json(
|
||||
'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id,
|
||||
data=flv_data, note='Downloading video info',
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
error_code = video_info.get('error', 0)
|
||||
if error_code is not 0:
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
'%s reported error %i' % (self.IE_NAME, error_code),
|
||||
expected=True)
|
||||
|
||||
base_url = data['rtmp_url']
|
||||
live_path = data['rtmp_live']
|
||||
base_url = video_info['data']['rtmp_url']
|
||||
live_path = video_info['data']['rtmp_live']
|
||||
|
||||
title = self._live_title(unescapeHTML(data['room_name']))
|
||||
description = data.get('show_details')
|
||||
thumbnail = data.get('room_src')
|
||||
video_url = '%s/%s' % (base_url, live_path)
|
||||
|
||||
uploader = data.get('nickname')
|
||||
uploader_id = data.get('owner_uid')
|
||||
|
||||
multi_formats = data.get('rtmp_multi_bitrate')
|
||||
if not isinstance(multi_formats, dict):
|
||||
multi_formats = {}
|
||||
multi_formats['live'] = live_path
|
||||
|
||||
formats = [{
|
||||
'url': '%s/%s' % (base_url, format_path),
|
||||
'format_id': format_id,
|
||||
'preference': 1 if format_id == 'live' else 0,
|
||||
} for format_id, format_path in multi_formats.items()]
|
||||
self._sort_formats(formats)
|
||||
title = self._live_title(unescapeHTML(room['room_name']))
|
||||
description = room.get('notice')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@@ -4,26 +4,45 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
|
||||
'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
|
||||
'md5': '25e659cccc9a2ed956110a299fdf5983',
|
||||
'info_dict': {
|
||||
'id': 'panisk-paske-5',
|
||||
'id': 'klassen-darlig-taber-10',
|
||||
'ext': 'mp4',
|
||||
'title': 'Panisk Påske (5)',
|
||||
'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
|
||||
'timestamp': 1426984612,
|
||||
'upload_date': '20150322',
|
||||
'duration': 1455,
|
||||
'title': 'Klassen - Dårlig taber (10)',
|
||||
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
|
||||
'timestamp': 1471991907,
|
||||
'upload_date': '20160823',
|
||||
'duration': 606.84,
|
||||
},
|
||||
}
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
|
||||
'md5': '2c37175c718155930f939ef59952474a',
|
||||
'info_dict': {
|
||||
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
|
||||
'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
|
||||
'timestamp': 1472800279,
|
||||
'upload_date': '20160902',
|
||||
'duration': 131.4,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -35,7 +54,8 @@ class DRTVIE(InfoExtractor):
|
||||
'Video %s is not available' % video_id, expected=True)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
|
||||
webpage, 'video id')
|
||||
|
||||
programcard = self._download_json(
|
||||
@@ -43,9 +63,12 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, 'Downloading video JSON')
|
||||
data = programcard['Data'][0]
|
||||
|
||||
title = data['Title']
|
||||
description = data['Description']
|
||||
timestamp = parse_iso8601(data['CreatedTime'])
|
||||
title = remove_end(self._og_search_title(
|
||||
webpage, default=None), ' | TV | DR') or data['Title']
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or data.get('Description')
|
||||
|
||||
timestamp = parse_iso8601(data.get('CreatedTime'))
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
@@ -56,16 +79,18 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset['Kind'] == 'Image':
|
||||
thumbnail = asset['Uri']
|
||||
elif asset['Kind'] == 'VideoResource':
|
||||
duration = asset['DurationInMilliseconds'] / 1000.0
|
||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
||||
for link in asset['Links']:
|
||||
uri = link['Uri']
|
||||
target = link['Target']
|
||||
format_id = target
|
||||
if asset.get('Kind') == 'Image':
|
||||
thumbnail = asset.get('Uri')
|
||||
elif asset.get('Kind') == 'VideoResource':
|
||||
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
|
||||
restricted_to_denmark = asset.get('RestrictedToDenmark')
|
||||
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
|
||||
for link in asset.get('Links', []):
|
||||
uri = link.get('Uri')
|
||||
if not uri:
|
||||
continue
|
||||
target = link.get('Target')
|
||||
format_id = target or ''
|
||||
preference = None
|
||||
if spoken_subtitles:
|
||||
preference = -1
|
||||
@@ -76,8 +101,8 @@ class DRTVIE(InfoExtractor):
|
||||
video_id, preference, f4m_id=format_id))
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
uri, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=format_id))
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=preference, m3u8_id=format_id))
|
||||
else:
|
||||
bitrate = link.get('Bitrate')
|
||||
if bitrate:
|
||||
@@ -85,7 +110,7 @@ class DRTVIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': format_id,
|
||||
'tbr': bitrate,
|
||||
'tbr': int_or_none(bitrate),
|
||||
'ext': link.get('FileFormat'),
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
@@ -94,12 +119,18 @@ class DRTVIE(InfoExtractor):
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
|
||||
if not subs.get('Uri'):
|
||||
continue
|
||||
lang = subs.get('Language') or 'da'
|
||||
subtitles.setdefault(LANGS.get(lang, lang), []).append({
|
||||
'url': subs['Uri'],
|
||||
'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
|
||||
})
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
raise ExtractorError(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
|
||||
self.raise_geo_restricted(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.',
|
||||
expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -5,7 +5,7 @@ from ..utils import remove_end
|
||||
|
||||
|
||||
class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
@@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,58 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = 'exfm'
|
||||
IE_DESC = 'ex.fm'
|
||||
_VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
|
||||
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://ex.fm/song/eh359',
|
||||
'md5': 'e45513df5631e6d760970b14cc0c11e7',
|
||||
'info_dict': {
|
||||
'id': '44216187',
|
||||
'ext': 'mp3',
|
||||
'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive',
|
||||
'uploader': 'deadjournalist',
|
||||
'upload_date': '20120424',
|
||||
'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive',
|
||||
},
|
||||
'note': 'Soundcloud song',
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
{
|
||||
'url': 'http://ex.fm/song/wddt8',
|
||||
'md5': '966bd70741ac5b8570d8e45bfaed3643',
|
||||
'info_dict': {
|
||||
'id': 'wddt8',
|
||||
'ext': 'mp3',
|
||||
'title': 'Safe and Sound',
|
||||
'uploader': 'Capital Cities',
|
||||
},
|
||||
'skip': 'The site is down too often',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group('id')
|
||||
info_url = 'http://ex.fm/api/v3/song/%s' % song_id
|
||||
info = self._download_json(info_url, song_id)['song']
|
||||
song_url = info['url']
|
||||
if re.match(self._SOUNDCLOUD_URL, song_url) is not None:
|
||||
self.to_screen('Soundcloud song detected')
|
||||
return self.url_result(song_url.replace('/stream', ''), 'Soundcloud')
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['title'],
|
||||
'thumbnail': info['image']['large'],
|
||||
'uploader': info['artist'],
|
||||
'view_count': info['loved_count'],
|
||||
}
|
@@ -5,11 +5,14 @@ from .abc import (
|
||||
ABCIE,
|
||||
ABCIViewIE,
|
||||
)
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .abcnews import (
|
||||
AbcNewsIE,
|
||||
AbcNewsVideoIE,
|
||||
)
|
||||
from .abcotvs import (
|
||||
ABCOTVSIE,
|
||||
ABCOTVSClipsIE,
|
||||
)
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .acast import (
|
||||
ACastIE,
|
||||
@@ -127,9 +130,12 @@ from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
)
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
CBCWatchVideoIE,
|
||||
CBCWatchIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
@@ -140,6 +146,7 @@ from .cbsnews import (
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
@@ -191,6 +198,10 @@ from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
CuriosityStreamIE,
|
||||
CuriosityStreamCollectionIE,
|
||||
)
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .dailymotion import (
|
||||
@@ -254,13 +265,18 @@ from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPluginsVideoIE,
|
||||
)
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fc2 import (
|
||||
FC2IE,
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
@@ -275,7 +291,11 @@ from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
@@ -312,6 +332,7 @@ from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .godtv import GodTVIE
|
||||
from .golem import GolemIE
|
||||
@@ -375,6 +396,7 @@ from .ivi import (
|
||||
IviCompilationIE
|
||||
)
|
||||
from .ivideon import IvideonIE
|
||||
from .iwara import IwaraIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
@@ -387,6 +409,7 @@ from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
@@ -405,6 +428,7 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
LcpIE,
|
||||
@@ -455,6 +479,7 @@ from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .mgtv import MGTVIE
|
||||
from .miaopai import MiaoPaiIE
|
||||
from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyIE,
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
@@ -483,6 +508,7 @@ from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
@@ -539,6 +565,7 @@ from .nextmedia import (
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhk import NhkVodIE
|
||||
from .nhl import (
|
||||
NHLVideocenterIE,
|
||||
NHLNewsIE,
|
||||
@@ -550,7 +577,10 @@ from .nick import (
|
||||
NickDeIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaStackIE,
|
||||
NineCNineMediaIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
@@ -632,7 +662,6 @@ from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
@@ -644,7 +673,10 @@ from .pluralsight import (
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import PokemonIE
|
||||
from .polskieradio import PolskieRadioIE
|
||||
from .polskieradio import (
|
||||
PolskieRadioIE,
|
||||
PolskieRadioCategoryIE,
|
||||
)
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -698,6 +730,7 @@ from .revision3 import (
|
||||
)
|
||||
from .rice import RICEIE
|
||||
from .ringtv import RingTVIE
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .roosterteeth import RoosterTeethIE
|
||||
@@ -800,7 +833,6 @@ from .srgssr import (
|
||||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
@@ -820,6 +852,7 @@ from .tagesschau import (
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
@@ -834,10 +867,12 @@ from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import TeleQuebecIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
@@ -862,16 +897,11 @@ from .tnaflix import (
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
)
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trollvids import TrollvidsIE
|
||||
from .trutube import TruTubeIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
from .tudou import (
|
||||
@@ -901,6 +931,7 @@ from .tvc import (
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvp import (
|
||||
TVPEmbedIE,
|
||||
TVPIE,
|
||||
@@ -945,6 +976,7 @@ from .uplynk import (
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
from .usatoday import USATodayIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .ustudio import (
|
||||
|
@@ -15,6 +15,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
@@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
'uploader': 'Tennis on Facebook',
|
||||
'upload_date': '20140908',
|
||||
'timestamp': 1410199200,
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
@@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
@@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||
'md5': 'b2c28d528273b323abe5c6ab59f0f030',
|
||||
'info_dict': {
|
||||
'id': '957955867617029',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||
'uploader': 'Demy de Zeeuw',
|
||||
'upload_date': '20160110',
|
||||
'timestamp': 1452431627,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
@@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
@@ -340,3 +351,32 @@ class FacebookIE(InfoExtractor):
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
video_id, fatal_if_no_video=True)
|
||||
return info_dict
|
||||
|
||||
|
||||
class FacebookPluginsVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
|
||||
'md5': '5954e92cdfe51fe5782ae9bda7058a07',
|
||||
'info_dict': {
|
||||
'id': '10154383743583686',
|
||||
'ext': 'mp4',
|
||||
'title': 'What to do during the haze?',
|
||||
'uploader': 'Gov.sg',
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472184808,
|
||||
},
|
||||
'add_ie': [FacebookIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
compat_urllib_parse_unquote(self._match_id(url)),
|
||||
FacebookIE.ie_key())
|
||||
|
@@ -1,10 +1,12 @@
|
||||
#! -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -16,7 +18,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
@@ -75,12 +77,17 @@ class FC2IE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._login()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
webpage = None
|
||||
if not url.startswith('fc2:'):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
self._login()
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
title = 'FC2 video %s' % video_id
|
||||
thumbnail = None
|
||||
if webpage is not None:
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
@@ -113,3 +120,41 @@ class FC2IE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class FC2EmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P<query>.+)'
|
||||
IE_NAME = 'fc2:embed'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】',
|
||||
'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a',
|
||||
'info_dict': {
|
||||
'id': '201403223kCqB3Ez',
|
||||
'ext': 'flv',
|
||||
'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query = compat_parse_qs(mobj.group('query'))
|
||||
|
||||
video_id = query['i'][-1]
|
||||
title = query.get('tl', ['FC2 video %s' % video_id])[0]
|
||||
|
||||
sj = query.get('sj', [None])[0]
|
||||
thumbnail = None
|
||||
if sj:
|
||||
# See thumbnailImagePath() in ServerConst.as of flv2.swf
|
||||
thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % (
|
||||
sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id)))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FC2IE.ie_key(),
|
||||
'url': 'fc2:%s' % video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -1,18 +1,24 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class FoxgayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
|
||||
_TEST = {
|
||||
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
|
||||
'md5': '80d72beab5d04e1655a56ad37afe6841',
|
||||
'md5': '344558ccfea74d33b7adbce22e577f54',
|
||||
'info_dict': {
|
||||
'id': '2582',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
|
||||
'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
|
||||
'title': 'Fuck Turkish-style',
|
||||
'description': 'md5:6ae2d9486921891efe89231ace13ffdf',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
},
|
||||
@@ -22,27 +28,35 @@ class FoxgayIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(?P<title>.*?)</title>',
|
||||
webpage, 'title', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
|
||||
webpage, 'description', fatal=False)
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com')
|
||||
description = get_element_by_id('inf_tit', webpage)
|
||||
|
||||
# The default user-agent with foxgay cookies leads to pages without videos
|
||||
self._downloader.cookiejar.clear('.foxgay.com')
|
||||
# Find the URL for the iFrame which contains the actual video.
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
|
||||
'video frame', group='url')
|
||||
iframe = self._download_webpage(
|
||||
self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
|
||||
video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
|
||||
thumb_url = self._html_search_regex(
|
||||
r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
|
||||
iframe_url, video_id, headers={'User-Agent': 'curl/7.50.1'},
|
||||
note='Downloading video frame')
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'video_data\s*=\s*([^;]+);', iframe, 'video data'), video_id)
|
||||
|
||||
formats = [{
|
||||
'url': source,
|
||||
'height': resolution,
|
||||
} for source, resolution in zip(
|
||||
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumb_url,
|
||||
'thumbnail': video_data.get('act_vid', {}).get('thumb'),
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FoxNewsIE(AMPIE):
|
||||
IE_NAME = 'foxnews'
|
||||
IE_DESC = 'Fox News and Fox Business Video'
|
||||
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||
@@ -49,6 +51,11 @@ class FoxNewsIE(AMPIE):
|
||||
'url': 'http://video.foxbusiness.com/v/4442309889001',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
|
||||
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -58,3 +65,76 @@ class FoxNewsIE(AMPIE):
|
||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'timestamp': 1473299755,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||
webpage, 'video ID', group='id')
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id,
|
||||
FoxNewsIE.ie_key())
|
||||
|
||||
|
||||
class FoxNewsInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:insider'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'md5': 'a10c755e582d28120c62749b4feb4c0c',
|
||||
'info_dict': {
|
||||
'id': '5099377331001',
|
||||
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'ext': 'mp4',
|
||||
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
|
||||
'description': 'Is campus censorship getting out of control?',
|
||||
'timestamp': 1472168725,
|
||||
'upload_date': '20160825',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FoxNewsIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FoxNewsIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,14 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
@@ -21,8 +17,9 @@ class GameStarIE(InfoExtractor):
|
||||
'id': '76110',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
|
||||
'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
|
||||
'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den...',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406542020,
|
||||
'upload_date': '20140728',
|
||||
'duration': 17
|
||||
}
|
||||
@@ -32,41 +29,27 @@ class GameStarIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
og_title = self._og_search_title(webpage)
|
||||
title = re.sub(r'\s*- Video (bei|-) GameStar\.de$', '', og_title)
|
||||
|
||||
url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
|
||||
|
||||
description = self._og_search_description(webpage).strip()
|
||||
|
||||
thumbnail = self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+) ',
|
||||
webpage, 'upload_date', fatal=False))
|
||||
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r' Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r' Zuschauer: ([0-9\.]+) ', webpage,
|
||||
'view_count', fatal=False))
|
||||
# TODO: there are multiple ld+json objects in the webpage,
|
||||
# while _search_json_ld finds only the first one
|
||||
json_ld = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>[^<]+VideoObject[^<]+)</script>',
|
||||
webpage, 'JSON-LD', group='json_ld'), video_id)
|
||||
info_dict = self._json_ld(json_ld, video_id)
|
||||
info_dict['title'] = remove_end(info_dict['title'], ' - GameStar')
|
||||
|
||||
view_count = json_ld.get('interactionCount')
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
|
||||
r'([0-9]+) Kommentare</span>', webpage, 'comment_count',
|
||||
fatal=False))
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@@ -104,7 +104,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
]
|
||||
],
|
||||
'skip': 'URL invalid',
|
||||
},
|
||||
# Direct download with broken HEAD
|
||||
{
|
||||
@@ -268,7 +269,8 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# m3u8 served with Content-Type: text/plain
|
||||
{
|
||||
@@ -283,7 +285,8 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
@@ -368,6 +371,7 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
'skip': 'video gone',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.championat.com/video/football/v/87/87499.html',
|
||||
@@ -421,6 +425,7 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'movie expired',
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
@@ -448,6 +453,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
# HEAD requests lead to endless 301, while GET is OK
|
||||
'expected_warnings': ['301'],
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
@@ -522,6 +529,9 @@ class GenericIE(InfoExtractor):
|
||||
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
# This forum does not allow <iframe> syntaxes anymore
|
||||
# Now HTML tags are displayed as-is
|
||||
'skip': 'No videos on this page',
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
@@ -570,7 +580,8 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# francetv embed
|
||||
{
|
||||
@@ -2232,11 +2243,11 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for VODPlatform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
|
||||
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
|
||||
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class GlideIE(InfoExtractor):
|
||||
@@ -14,10 +13,8 @@ class GlideIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'UZF8zlmuQbe4mr+7dCiQ0w==',
|
||||
'ext': 'mp4',
|
||||
'title': 'Damon Timm\'s Glide message',
|
||||
'title': "Damon's Glide message",
|
||||
'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$',
|
||||
'uploader': 'Damon Timm',
|
||||
'upload_date': '20140919',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +24,8 @@ class GlideIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
r'<title>(.+?)</title>', webpage,
|
||||
'title', default=None) or self._og_search_title(webpage)
|
||||
video_url = self._proto_relative_url(self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'video URL', default=None,
|
||||
@@ -36,18 +34,10 @@ class GlideIE(InfoExtractor):
|
||||
r'<img[^>]+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'thumbnail url', default=None,
|
||||
group='url')) or self._og_search_thumbnail(webpage)
|
||||
uploader = self._search_regex(
|
||||
r'<div[^>]+class=["\']info-name["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<div[^>]+class="info-date"[^>]*>([^<]+)',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
@@ -19,7 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GloboIE(InfoExtractor):
|
||||
_VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
|
||||
_SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
|
||||
@@ -396,7 +396,7 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})',
|
||||
|
101
youtube_dl/extractor/go.py
Normal file
101
youtube_dl/extractor/go.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
)
|
||||
|
||||
|
||||
class GoIE(InfoExtractor):
|
||||
_BRANDS = {
|
||||
'abc': '001',
|
||||
'freeform': '002',
|
||||
'watchdisneychannel': '004',
|
||||
'watchdisneyjunior': '008',
|
||||
'watchdisneyxd': '009',
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'info_dict': {
|
||||
'id': '0_g86w5onx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sneak Peek: Language Arts',
|
||||
'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id),
|
||||
video_id)['video'][0]
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
for asset in video_data.get('assets', {}).get('asset', []):
|
||||
asset_url = asset.get('value')
|
||||
if not asset_url:
|
||||
continue
|
||||
format_id = asset.get('format')
|
||||
ext = determine_ext(asset_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': asset_url,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for cc in video_data.get('closedcaption', {}).get('src', []):
|
||||
cc_url = cc.get('value')
|
||||
if not cc_url:
|
||||
continue
|
||||
ext = determine_ext(cc_url)
|
||||
if ext == 'xml':
|
||||
ext = 'ttml'
|
||||
subtitles.setdefault(cc.get('lang'), []).append({
|
||||
'url': cc_url,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
|
||||
thumbnail_url = thumbnail.get('value')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('longdescription') or video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
|
||||
'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
|
||||
'episode_number': int_or_none(video_data.get('episodenumber')),
|
||||
'series': video_data.get('show', {}).get('title'),
|
||||
'season_number': int_or_none(video_data.get('season', {}).get('num')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -48,13 +48,23 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
# There are multiple videos in the playlist whlie only the first one
|
||||
# matches the video played in browsers
|
||||
video_info = configuration['playlist'][0]
|
||||
title = video_info['title']
|
||||
|
||||
formats = []
|
||||
for source in video_info['sources']:
|
||||
file_url = source['file']
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
file_url = m3u8_formats[0]['url']
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
file_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
a_format = {
|
||||
'url': file_url,
|
||||
@@ -70,7 +80,6 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video_info['title']
|
||||
description = video_info.get('description')
|
||||
thumbnail = video_info.get('image')
|
||||
else:
|
||||
|
77
youtube_dl/extractor/iwara.py
Normal file
77
youtube_dl/extractor/iwara.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class IwaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
'info_dict': {
|
||||
'id': 'amVwUl1EHpAD9RD',
|
||||
'ext': 'mp4',
|
||||
'title': '【MMD R-18】ガールフレンド carry_me_off',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
|
||||
'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
|
||||
'info_dict': {
|
||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||
'ext': 'mp4',
|
||||
'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['GoogleDrive'],
|
||||
}, {
|
||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||
'md5': '1d85f1e5217d2791626cff5ec83bb189',
|
||||
'info_dict': {
|
||||
'id': '6liAP9s2Ojc',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||
'upload_date': '20160910',
|
||||
'uploader': 'aMMDsork',
|
||||
'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname
|
||||
# ecchi is 'sexy' in Japanese
|
||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
|
||||
if not entries:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||
webpage, 'iframe URL', group='url')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': iframe_url,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||
|
||||
info_dict = entries[0]
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -63,10 +63,17 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like 1080p.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,})[pP]$', source.get('label') or '',
|
||||
'height', default=None))
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class KaraoketvIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.karaoketv\.co\.il/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
|
||||
'info_dict': {
|
||||
|
52
youtube_dl/extractor/ketnet.py
Normal file
52
youtube_dl/extractor/ketnet.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KetnetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
||||
'md5': 'd907f7b1814ef0fa285c0475d9994ed7',
|
||||
'info_dict': {
|
||||
'id': 'zomerse-filmpjes',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
|
||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
|
||||
'player config'),
|
||||
video_id)
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
config['source']['hls'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': config.get('description'),
|
||||
'thumbnail': config.get('image'),
|
||||
'series': config.get('program'),
|
||||
'episode': config.get('episode'),
|
||||
'formats': formats,
|
||||
}
|
@@ -37,7 +37,6 @@ class KickStarterIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Power Drive 2000',
|
||||
},
|
||||
'expected_warnings': ['OpenGraph description'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -67,6 +66,6 @@ class KickStarterIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -18,31 +18,20 @@ from ..utils import (
|
||||
class KUSIIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
|
||||
'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
|
||||
'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
|
||||
'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
|
||||
'info_dict': {
|
||||
'id': '12203019',
|
||||
'id': '12689020',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turko Files: Case Closed! & Put On Hold!',
|
||||
'duration': 231.0,
|
||||
'upload_date': '20160210',
|
||||
'timestamp': 1455087571,
|
||||
'title': "Turko Files: Refused to Help, It Ain't Right!",
|
||||
'duration': 223.586,
|
||||
'upload_date': '20160826',
|
||||
'timestamp': 1472233118,
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://kusi.com/video?clipId=12203019',
|
||||
'info_dict': {
|
||||
'id': '12203019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Turko Files: Case Closed! & Put On Hold!',
|
||||
'duration': 231.0,
|
||||
'upload_date': '20160210',
|
||||
'timestamp': 1455087571,
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Same as previous one
|
||||
},
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
24
youtube_dl/extractor/lci.py
Normal file
24
youtube_dl/extractor/lci.py
Normal file
@@ -0,0 +1,24 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LCIIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html',
|
||||
'md5': '2fdb2538b884d4d695f9bd2bde137e6c',
|
||||
'info_dict': {
|
||||
'id': '13244802',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hillary Clinton et sa quinte de toux, en plein meeting',
|
||||
'description': 'md5:a4363e3a960860132f8124b62f4a01c9',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id')
|
||||
return self.url_result('wat:' + wat_id, 'Wat', wat_id)
|
@@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor):
|
||||
def _extract_info(self, streams, mobile_urls, properties):
|
||||
video_id = properties['media_id']
|
||||
formats = []
|
||||
|
||||
urls = []
|
||||
for stream in streams:
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url or stream.get('drmProtected'):
|
||||
if not stream_url or stream.get('drmProtected') or stream_url in urls:
|
||||
continue
|
||||
urls.append(stream_url)
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
@@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor):
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
|
||||
urls.append(http_url)
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]),
|
||||
'url': http_url,
|
||||
'format_id': format_id.replace('rtmp', 'http'),
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
@@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor):
|
||||
for mobile_url in mobile_urls:
|
||||
media_url = mobile_url.get('mobileUrl')
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if not media_url or format_id == 'Widevine':
|
||||
if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
|
||||
continue
|
||||
urls.append(media_url)
|
||||
ext = determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
@@ -1,8 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
@@ -12,8 +15,10 @@ from ..utils import (
|
||||
class LRTIE(InfoExtractor):
|
||||
IE_NAME = 'lrt.lt'
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# m3u8 download
|
||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
||||
'info_dict': {
|
||||
'id': '54391',
|
||||
'ext': 'mp4',
|
||||
@@ -23,20 +28,45 @@ class LRTIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
}, {
|
||||
# direct mp3 download
|
||||
'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
|
||||
'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
|
||||
'info_dict': {
|
||||
'id': '1013074524',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kita tema 2016-09-05 15:05',
|
||||
'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
|
||||
'duration': 3008,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
||||
m3u8_url = self._search_regex(
|
||||
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
|
||||
webpage, 'm3u8 url', group='url')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
|
||||
formats = []
|
||||
for _, file_url in re.findall(
|
||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
ext = determine_ext(file_url)
|
||||
if ext not in ('m3u8', 'mp3'):
|
||||
continue
|
||||
# mp3 served as m3u8 produces stuttered media file
|
||||
if ext == 'm3u8' and '.mp3' in file_url:
|
||||
continue
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
fatal=False))
|
||||
elif ext == 'mp3':
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
40
youtube_dl/extractor/miaopai.py
Normal file
40
youtube_dl/extractor/miaopai.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MiaoPaiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P<id>[-A-Za-z0-9~_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm',
|
||||
'md5': '095ed3f1cd96b821add957bdc29f845b',
|
||||
'info_dict': {
|
||||
'id': 'n~0hO7sfV1nBEw4Y29-Hqg__',
|
||||
'ext': 'mp4',
|
||||
'title': '西游记音乐会的秒拍视频',
|
||||
'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg',
|
||||
}
|
||||
}
|
||||
|
||||
_USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
|
||||
webpage, 'thumbnail', fatal=False, group='url')
|
||||
videos = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
info = videos[0]
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
return info
|
@@ -35,7 +35,8 @@ class MoeVideoIE(InfoExtractor):
|
||||
'height': 360,
|
||||
'duration': 179,
|
||||
'filesize': 17822500,
|
||||
}
|
||||
},
|
||||
'skip': 'Video has been removed',
|
||||
},
|
||||
{
|
||||
'url': 'http://playreplay.net/video/77107.7f325710a627383d40540d8e991a',
|
||||
|
@@ -7,22 +7,19 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class SSAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)'
|
||||
class MovingImageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://ssa.nls.uk/film/3561',
|
||||
'url': 'http://movingimage.nls.uk/film/3561',
|
||||
'md5': '4caa05c2b38453e6f862197571a7be2f',
|
||||
'info_dict': {
|
||||
'id': '3561',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'SHETLAND WOOL',
|
||||
'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
|
||||
'duration': 900,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -30,10 +27,9 @@ class SSAIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
streamer = self._search_regex(
|
||||
r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
|
||||
play_path = self._search_regex(
|
||||
r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'),
|
||||
video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
|
||||
def search_field(field_name, fatal=False):
|
||||
return self._search_regex(
|
||||
@@ -44,13 +40,11 @@ class SSAIE(InfoExtractor):
|
||||
description = unescapeHTML(search_field('Description'))
|
||||
duration = parse_duration(search_field('Running time'))
|
||||
thumbnail = self._search_regex(
|
||||
r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
|
||||
r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': streamer,
|
||||
'play_path': play_path,
|
||||
'ext': 'flv',
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
@@ -4,7 +4,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
compat_xpath,
|
||||
)
|
||||
@@ -14,12 +13,13 @@ from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
RegexNotFoundError,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -36,6 +36,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
def _id_from_uri(uri):
|
||||
return uri.split(':')[-1]
|
||||
|
||||
@staticmethod
|
||||
def _remove_template_parameter(url):
|
||||
# Remove the templates, like &device={device}
|
||||
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
|
||||
|
||||
# This was originally implemented for ComedyCentral, but it also works here
|
||||
@classmethod
|
||||
def _transform_rtmp_url(cls, rtmp_video_url):
|
||||
@@ -117,9 +122,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
video_id = self._id_from_uri(uri)
|
||||
self.report_extraction(video_id)
|
||||
content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
|
||||
mediagen_url = content_el.attrib['url']
|
||||
# Remove the templates, like &device={device}
|
||||
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
|
||||
mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
|
||||
if 'acceptMethods' not in mediagen_url:
|
||||
mediagen_url += '&' if '?' in mediagen_url else '?'
|
||||
mediagen_url += 'acceptMethods=fms'
|
||||
@@ -178,12 +181,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
data = {'uri': uri}
|
||||
if self._LANG:
|
||||
data['lang'] = self._LANG
|
||||
return compat_urllib_parse_urlencode(data)
|
||||
return data
|
||||
|
||||
def _get_videos_info(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
feed_url = self._get_feed_url(uri)
|
||||
info_url = feed_url + '?' + self._get_feed_query(uri)
|
||||
info_url = update_url_query(feed_url, self._get_feed_query(uri))
|
||||
return self._get_videos_info_from_url(info_url, video_id)
|
||||
|
||||
def _get_videos_info_from_url(self, url, video_id):
|
||||
@@ -256,13 +259,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
site_id = uri.replace(video_id, '')
|
||||
config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/'
|
||||
'context52/config.xml'.format(site_id))
|
||||
config_doc = self._download_xml(config_url, video_id)
|
||||
feed_node = config_doc.find('.//feed')
|
||||
feed_url = feed_node.text.strip().split('?')[0]
|
||||
return feed_url
|
||||
config = self._download_json(
|
||||
'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
|
||||
return self._remove_template_parameter(config['feedWithQueryParams'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor):
|
||||
'id': '3685814',
|
||||
'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
|
||||
'upload_date': '20141027',
|
||||
'uploader_id': 'utkualp',
|
||||
'uploader': 'utkualp',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
|
@@ -1,25 +1,20 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .turner import TurnerBaseIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
xpath_text,
|
||||
xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
class NBAIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
@@ -44,28 +39,30 @@ class NBAIE(InfoExtractor):
|
||||
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
||||
'info_dict': {
|
||||
'id': '0041400301-cle-atl-recap',
|
||||
'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hawks vs. Cavaliers Game 1',
|
||||
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||
'duration': 228,
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': '1455672027478-Doc_Feb16_720',
|
||||
'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160217',
|
||||
'upload_date': '20160216',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
@@ -80,7 +77,7 @@ class NBAIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'Wigginsmp4',
|
||||
'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
@@ -92,6 +89,7 @@ class NBAIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
@@ -145,53 +143,12 @@ class NBAIE(InfoExtractor):
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
|
||||
video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
|
||||
video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
|
||||
title = xpath_text(video_info, 'headline')
|
||||
description = xpath_text(video_info, 'description')
|
||||
duration = parse_duration(xpath_text(video_info, 'length'))
|
||||
timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
|
||||
|
||||
thumbnails = []
|
||||
for image in video_info.find('images'):
|
||||
thumbnails.append({
|
||||
'id': image.attrib.get('cut'),
|
||||
'url': image.text,
|
||||
'width': int_or_none(image.attrib.get('width')),
|
||||
'height': int_or_none(image.attrib.get('height')),
|
||||
return self._extract_cvp_info(
|
||||
'http://www.nba.com/%s.xml' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
})
|
||||
|
||||
formats = []
|
||||
for video_file in video_info.findall('.//file'):
|
||||
video_url = video_file.text
|
||||
if video_url.startswith('/'):
|
||||
continue
|
||||
if video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||
elif video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
key = video_file.attrib.get('bitrate')
|
||||
format_info = {
|
||||
'format_id': key,
|
||||
'url': video_url,
|
||||
}
|
||||
mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
|
||||
if mobj:
|
||||
format_info.update({
|
||||
'width': int(mobj.group(1)),
|
||||
'height': int(mobj.group(2)),
|
||||
'tbr': int_or_none(mobj.group(3)),
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,15 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NewgroundsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||
'url': 'https://www.newgrounds.com/audio/listen/549479',
|
||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||
'info_dict': {
|
||||
'id': '549479',
|
||||
@@ -18,7 +15,7 @@ class NewgroundsIE(InfoExtractor):
|
||||
'uploader': 'Burn7',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.newgrounds.com/portal/view/673111',
|
||||
'url': 'https://www.newgrounds.com/portal/view/673111',
|
||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||
'info_dict': {
|
||||
'id': '673111',
|
||||
@@ -29,24 +26,20 @@ class NewgroundsIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
music_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, music_id)
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
[r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
|
||||
webpage, 'uploader')
|
||||
r'Author\s*<a[^>]+>([^<]+)', webpage, 'uploader', fatal=False)
|
||||
|
||||
music_url_json_string = self._html_search_regex(
|
||||
r'({"url":"[^"]+"),', webpage, 'music url') + '}'
|
||||
music_url_json = json.loads(music_url_json_string)
|
||||
music_url = music_url_json['url']
|
||||
music_url = self._parse_json(self._search_regex(
|
||||
r'"url":("[^"]+"),', webpage, ''), media_id)
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'url': music_url,
|
||||
'uploader': uploader,
|
||||
|
50
youtube_dl/extractor/nhk.py
Normal file
50
youtube_dl/extractor/nhk.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NhkVodIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
# Videos available only for a limited period of time. Visit
|
||||
# http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
|
||||
'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html',
|
||||
'info_dict': {
|
||||
'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5',
|
||||
'ext': 'flv',
|
||||
'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion',
|
||||
'description': 'md5:db338ee6ce8204f415b754782f819824',
|
||||
'series': 'TOKYO FASHION EXPRESS',
|
||||
'episode': 'The Kimono as Global Fashion',
|
||||
},
|
||||
'skip': 'Videos available only for a limited period of time',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
embed_code = self._search_regex(
|
||||
r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala embed code', group='id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<div[^>]+class=["\']episode-detail["\']>\s*<h\d+>([^<]+)',
|
||||
webpage, 'title', default=None)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p[^>]+class=["\']description["\'][^>]*>(.+?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
series = self._search_regex(
|
||||
r'<h2[^>]+class=["\']detail-top-player-title[^>]+><a[^>]+>([^<]+)',
|
||||
webpage, 'series', default=None)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % embed_code,
|
||||
'title': '%s - %s' % (series, title) if series and title else title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': title,
|
||||
}
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
from ..utils import update_url_query
|
||||
|
||||
|
||||
@@ -59,10 +58,10 @@ class NickIE(MTVServicesInfoExtractor):
|
||||
}]
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return compat_urllib_parse_urlencode({
|
||||
return {
|
||||
'feed': 'nick_arc_player_prime',
|
||||
'mgid': uri,
|
||||
})
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
|
||||
@@ -70,13 +69,16 @@ class NickIE(MTVServicesInfoExtractor):
|
||||
|
||||
class NickDeIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'nick.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:nick\.de|nickelodeon\.nl)/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nick.de/shows/342-icarly',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nickelodeon.nl/shows/474-spongebob/videos/17403-een-kijkje-in-de-keuken-met-sandy-van-binnenuit',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -4,40 +4,36 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_duration,
|
||||
ExtractorError
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NineCNineMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
|
||||
class NineCNineMediaBaseIE(InfoExtractor):
|
||||
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
|
||||
|
||||
|
||||
class NineCNineMediaStackIE(NineCNineMediaBaseIE):
|
||||
IE_NAME = '9c9media:stack'
|
||||
_VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
destination_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id)
|
||||
content = self._download_json(api_base_url, video_id, query={
|
||||
'$include': '[contentpackages]',
|
||||
})
|
||||
title = content['Name']
|
||||
if len(content['ContentPackages']) > 1:
|
||||
raise ExtractorError('multiple content packages')
|
||||
content_package = content['ContentPackages'][0]
|
||||
stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id']
|
||||
stacks = self._download_json(stacks_base_url, video_id)['Items']
|
||||
if len(stacks) > 1:
|
||||
raise ExtractorError('multiple stacks')
|
||||
stack = stacks[0]
|
||||
stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id'])
|
||||
destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
|
||||
stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
|
||||
stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
|
||||
|
||||
formats = []
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stack_base_url + 'm3u8', video_id, 'mp4',
|
||||
stack_base_url + 'm3u8', stack_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stack_base_url + 'f4m', video_id,
|
||||
stack_base_url + 'f4m', stack_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False)
|
||||
mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False)
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
@@ -46,10 +42,86 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': content.get('Desc') or content.get('ShortDesc'),
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'duration': parse_duration(content.get('BroadcastTime')),
|
||||
'id': stack_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NineCNineMediaIE(NineCNineMediaBaseIE):
|
||||
IE_NAME = '9c9media'
|
||||
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
||||
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
|
||||
content = self._download_json(api_base_url, content_id, query={
|
||||
'$include': '[Media,Season,ContentPackages]',
|
||||
})
|
||||
title = content['Name']
|
||||
if len(content['ContentPackages']) > 1:
|
||||
raise ExtractorError('multiple content packages')
|
||||
content_package = content['ContentPackages'][0]
|
||||
package_id = content_package['Id']
|
||||
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
||||
content_package = self._download_json(content_package_url, content_id)
|
||||
|
||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
|
||||
multistacks = len(stacks) > 1
|
||||
|
||||
thumbnails = []
|
||||
for image in content.get('Images', []):
|
||||
image_url = image.get('Url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('Width')),
|
||||
'height': int_or_none(image.get('Height')),
|
||||
})
|
||||
|
||||
tags, categories = [], []
|
||||
for source_name, container in (('Tags', tags), ('Genres', categories)):
|
||||
for e in content.get(source_name, []):
|
||||
e_name = e.get('Name')
|
||||
if not e_name:
|
||||
continue
|
||||
container.append(e_name)
|
||||
|
||||
description = content.get('Desc') or content.get('ShortDesc')
|
||||
season = content.get('Season', {})
|
||||
base_info = {
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'episode_number': int_or_none(content.get('Episode')),
|
||||
'season': season.get('Name'),
|
||||
'season_number': season.get('Number'),
|
||||
'season_id': season.get('Id'),
|
||||
'series': content.get('Media', {}).get('Name'),
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
entries = []
|
||||
for stack in stacks:
|
||||
stack_id = compat_str(stack['Id'])
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
|
||||
'id': stack_id,
|
||||
'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
|
||||
'duration': float_or_none(stack.get('Duration')),
|
||||
'ie_key': 'NineCNineMediaStack',
|
||||
}
|
||||
entry.update(base_info)
|
||||
entries.append(entry)
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -44,7 +44,20 @@ class NineNowIE(InfoExtractor):
|
||||
page_data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.*?});', webpage,
|
||||
'page data'), display_id)
|
||||
common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip')
|
||||
|
||||
for kind in ('episode', 'clip'):
|
||||
current_key = page_data.get(kind, {}).get(
|
||||
'current%sKey' % kind.capitalize())
|
||||
if not current_key:
|
||||
continue
|
||||
cache = page_data.get(kind, {}).get('%sCache' % kind, {})
|
||||
if not cache:
|
||||
continue
|
||||
common_data = (cache.get(current_key) or list(cache.values())[0])[kind]
|
||||
break
|
||||
else:
|
||||
raise ExtractorError('Unable to find video data')
|
||||
|
||||
video_data = common_data['video']
|
||||
|
||||
if video_data.get('drm'):
|
||||
|
@@ -1,26 +1,37 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hmac
|
||||
import hashlib
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class NYTimesBaseIE(InfoExtractor):
|
||||
_SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
|
||||
|
||||
def _extract_video_from_id(self, video_id):
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
# Authorization generation algorithm is reverse engineered from `signer` in
|
||||
# http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
|
||||
path = '/svc/video/api/v3/video/' + video_id
|
||||
hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
|
||||
video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
|
||||
'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
|
||||
'X-NYTV': 'vhs',
|
||||
}, fatal=False)
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
title = video_data['headline']
|
||||
description = video_data.get('summary')
|
||||
duration = float_or_none(video_data.get('duration'), 1000)
|
||||
|
||||
uploader = video_data.get('byline')
|
||||
publication_date = video_data.get('publication_date')
|
||||
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
|
||||
|
||||
def get_file_size(file_size):
|
||||
if isinstance(file_size, int):
|
||||
@@ -28,35 +39,59 @@ class NYTimesBaseIE(InfoExtractor):
|
||||
elif isinstance(file_size, dict):
|
||||
return int(file_size.get('value', 0))
|
||||
else:
|
||||
return 0
|
||||
return None
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video.get('type'),
|
||||
'vcodec': video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('fileSize')),
|
||||
} for video in video_data['renditions'] if video.get('url')
|
||||
]
|
||||
urls = []
|
||||
formats = []
|
||||
for video in video_data.get('renditions', []):
|
||||
video_url = video.get('url')
|
||||
format_id = video.get('type')
|
||||
if not video_url or format_id == 'thumbs' or video_url in urls:
|
||||
continue
|
||||
urls.append(video_url)
|
||||
ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id or 'hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
continue
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url, video_id, format_id or 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'vcodec': video.get('videoencoding') or video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [
|
||||
{
|
||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': 'http://www.nytimes.com/' + image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data.get('images', []) if image.get('url')
|
||||
]
|
||||
})
|
||||
|
||||
publication_date = video_data.get('publication_date')
|
||||
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'description': video_data.get('summary'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'uploader': video_data.get('byline'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
@@ -67,7 +102,7 @@ class NYTimesIE(NYTimesBaseIE):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'md5': 'd665342765db043f7e225cff19df0f2d',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
|
@@ -90,7 +90,7 @@ class OnetBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class OnetIE(OnetBaseIE):
|
||||
_VALID_URL = 'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'onet.tv'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -55,10 +55,12 @@ class OpenloadIE(InfoExtractor):
|
||||
|
||||
video_url_chars = []
|
||||
|
||||
for c in enc_data:
|
||||
for idx, c in enumerate(enc_data):
|
||||
j = compat_ord(c)
|
||||
if j >= 33 and j <= 126:
|
||||
j = ((j + 14) % 94) + 33
|
||||
if idx == len(enc_data) - 1:
|
||||
j += 3
|
||||
video_url_chars += compat_chr(j)
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
|
||||
|
@@ -1,53 +1,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ParliamentLiveUKIE(InfoExtractor):
|
||||
IE_NAME = 'parliamentlive.tv'
|
||||
IE_DESC = 'UK parliament videos'
|
||||
_VALID_URL = r'https?://www\.parliamentlive\.tv/Main/Player\.aspx\?(?:[^&]+&)*?meetingId=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.parliamentlive.tv/Main/Player.aspx?meetingId=15121&player=windowsmedia',
|
||||
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
|
||||
'info_dict': {
|
||||
'id': '15121',
|
||||
'ext': 'asf',
|
||||
'title': 'hoc home affairs committee, 18 mar 2014.pm',
|
||||
'description': 'md5:033b3acdf83304cd43946b2d5e5798d1',
|
||||
'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Home Affairs Committee',
|
||||
'uploader_id': 'FFMPEG-01',
|
||||
'timestamp': 1422696664,
|
||||
'upload_date': '20150131',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires mplayer (mms)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
asx_url = self._html_search_regex(
|
||||
r'embed.*?src="([^"]+)" name="MediaPlayer"', webpage,
|
||||
'metadata URL')
|
||||
asx = self._download_xml(asx_url, video_id, 'Downloading ASX metadata')
|
||||
video_url = asx.find('.//REF').attrib['HREF']
|
||||
|
||||
title = self._search_regex(
|
||||
r'''(?x)player\.setClipDetails\(
|
||||
(?:(?:[0-9]+|"[^"]+"),\s*){2}
|
||||
"([^"]+",\s*"[^"]+)"
|
||||
''',
|
||||
webpage, 'title').replace('", "', ', ')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<span id="MainContentPlaceHolder_CaptionsBlock_WitnessInfo">(.*?)</span>',
|
||||
webpage, 'description')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id)
|
||||
widget_config = self._parse_json(self._search_regex(
|
||||
r'kWidgetConfig\s*=\s*({.+});',
|
||||
webpage, 'kaltura widget config'), video_id)
|
||||
kaltura_url = 'kaltura:%s:%s' % (widget_config['wid'][1:], widget_config['entry_id'])
|
||||
event_title = self._download_json(
|
||||
'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title']
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'ext': 'asf',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'title': event_title,
|
||||
'description': '',
|
||||
'url': kaltura_url,
|
||||
'ie_key': 'Kaltura',
|
||||
}
|
||||
|
@@ -8,7 +8,14 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
class PeriscopeBaseIE(InfoExtractor):
|
||||
def _call_api(self, method, query, item_id):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s' % method,
|
||||
item_id, query=query)
|
||||
|
||||
|
||||
class PeriscopeIE(PeriscopeBaseIE):
|
||||
IE_DESC = 'Periscope'
|
||||
IE_NAME = 'periscope'
|
||||
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
|
||||
@@ -34,14 +41,11 @@ class PeriscopeIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, method, value):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value)
|
||||
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
|
||||
broadcast_data = self._call_api('getBroadcastPublic', token)
|
||||
broadcast_data = self._call_api(
|
||||
'getBroadcastPublic', {'broadcast_id': token}, token)
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
|
||||
@@ -61,7 +65,8 @@ class PeriscopeIE(InfoExtractor):
|
||||
'url': broadcast[image],
|
||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||
|
||||
stream = self._call_api('getAccessPublic', token)
|
||||
stream = self._call_api(
|
||||
'getAccessPublic', {'broadcast_id': token}, token)
|
||||
|
||||
formats = []
|
||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
|
||||
@@ -88,7 +93,7 @@ class PeriscopeIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class PeriscopeUserIE(InfoExtractor):
|
||||
class PeriscopeUserIE(PeriscopeBaseIE):
|
||||
_VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$'
|
||||
IE_DESC = 'Periscope user videos'
|
||||
IE_NAME = 'periscope:user'
|
||||
@@ -106,26 +111,34 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
user_name = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
webpage = self._download_webpage(url, user_name)
|
||||
|
||||
data_store = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-store=(["\'])(?P<data>.+?)\1',
|
||||
webpage, 'data store', default='{}', group='data')),
|
||||
user_id)
|
||||
user_name)
|
||||
|
||||
user = data_store.get('User', {}).get('user', {})
|
||||
title = user.get('display_name') or user.get('username')
|
||||
user = list(data_store['UserCache']['users'].values())[0]['user']
|
||||
user_id = user['id']
|
||||
session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id']
|
||||
|
||||
broadcasts = self._call_api(
|
||||
'getUserBroadcastsPublic',
|
||||
{'user_id': user_id, 'session_id': session_id},
|
||||
user_name)['broadcasts']
|
||||
|
||||
broadcast_ids = [
|
||||
broadcast['id'] for broadcast in broadcasts if broadcast.get('id')]
|
||||
|
||||
title = user.get('display_name') or user.get('username') or user_name
|
||||
description = user.get('description')
|
||||
|
||||
broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or
|
||||
data_store.get('BroadcastCache', {}).get('broadcastIds', []))
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id))
|
||||
'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id))
|
||||
for broadcast_id in broadcast_ids]
|
||||
|
||||
return self.playlist_result(entries, user_id, title, description)
|
||||
|
@@ -1,60 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PlayedIE(InfoExtractor):
|
||||
IE_NAME = 'played.to'
|
||||
_VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://played.to/j2f2sfiiukgt',
|
||||
'md5': 'c2bd75a368e82980e7257bf500c00637',
|
||||
'info_dict': {
|
||||
'id': 'j2f2sfiiukgt',
|
||||
'ext': 'flv',
|
||||
'title': 'youtube-dl_test_video.mp4',
|
||||
},
|
||||
'skip': 'Removed for copyright infringement.', # oh wow
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_error = re.search(
|
||||
r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage)
|
||||
if m_error:
|
||||
raise ExtractorError(m_error.group('msg'), expected=True)
|
||||
|
||||
data = self._hidden_inputs(orig_webpage)
|
||||
|
||||
self._sleep(2, video_id)
|
||||
|
||||
post = urlencode_postdata(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = sanitized_Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'file: "?(.+?)",', webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
}
|
@@ -1,14 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
@@ -97,3 +100,81 @@ class PolskieRadioIE(InfoExtractor):
|
||||
description = strip_or_none(self._og_search_description(webpage))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
|
||||
class PolskieRadioCategoryIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA',
|
||||
'info_dict': {
|
||||
'id': '5102',
|
||||
'title': 'HISTORIA ŻYWA',
|
||||
},
|
||||
'playlist_mincount': 38,
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/7/4807',
|
||||
'info_dict': {
|
||||
'id': '4807',
|
||||
'title': 'Vademecum 1050. rocznicy Chrztu Polski'
|
||||
},
|
||||
'playlist_mincount': 5
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow',
|
||||
'info_dict': {
|
||||
'id': '4143',
|
||||
'title': 'Kierunek Kraków',
|
||||
},
|
||||
'playlist_mincount': 61
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka',
|
||||
'info_dict': {
|
||||
'id': '214',
|
||||
'title': 'Muzyka',
|
||||
},
|
||||
'playlist_mincount': 61
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url)
|
||||
|
||||
def _entries(self, url, page, category_id):
|
||||
content = page
|
||||
for page_num in itertools.count(2):
|
||||
for a_entry, entry_id in re.findall(
|
||||
r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>',
|
||||
content):
|
||||
entry = extract_attributes(a_entry)
|
||||
href = entry.get('href')
|
||||
if not href:
|
||||
continue
|
||||
yield self.url_result(
|
||||
compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(),
|
||||
entry_id, entry.get('title'))
|
||||
mobj = re.search(
|
||||
r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
content)
|
||||
if not mobj:
|
||||
break
|
||||
next_url = compat_urlparse.urljoin(url, mobj.group('url'))
|
||||
content = self._download_webpage(
|
||||
next_url, category_id, 'Downloading page %s' % page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
category_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, category_id)
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+) - [^<]+ - [^<]+</title>',
|
||||
webpage, 'title', fatal=False)
|
||||
return self.playlist_result(
|
||||
self._entries(url, webpage, category_id),
|
||||
category_id, title)
|
||||
|
@@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):
|
||||
'duration': 551,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
|
||||
@@ -75,7 +77,14 @@ class PornComIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count'))
|
||||
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
|
||||
'view count', fatal=False))
|
||||
|
||||
def extract_list(kind):
|
||||
s = self._search_regex(
|
||||
r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
|
||||
webpage, kind, fatal=False)
|
||||
return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -86,4 +95,6 @@ class PornComIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'categories': extract_list('categories'),
|
||||
'tags': extract_list('tags'),
|
||||
}
|
||||
|
@@ -15,6 +15,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
@@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
},
|
||||
}, {
|
||||
# non-ASCII title
|
||||
@@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -183,6 +188,15 @@ class PornHubIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
page_params = self._parse_json(self._search_regex(
|
||||
r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
|
||||
webpage, 'page parameters', group='data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
tags = categories = None
|
||||
if page_params:
|
||||
tags = page_params.get('tags', '').split(',')
|
||||
categories = page_params.get('categories', '').split(',')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
@@ -195,6 +209,8 @@ class PornHubIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -13,61 +12,69 @@ from ..utils import (
|
||||
|
||||
|
||||
class PornoVoisinesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
|
||||
|
||||
_VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
|
||||
'/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
|
||||
|
||||
_SERVER_NUMBERS = (1, 2)
|
||||
_VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
|
||||
'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
|
||||
'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html',
|
||||
'md5': '6f8aca6a058592ab49fe701c8ba8317b',
|
||||
'info_dict': {
|
||||
'id': '1285',
|
||||
'id': '919',
|
||||
'display_id': 'recherche-appartement',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recherche appartement',
|
||||
'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
|
||||
'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140925',
|
||||
'duration': 120,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'categories': ['Débutantes', 'Scénario', 'Sodomie'],
|
||||
'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'],
|
||||
'age_limit': 18,
|
||||
'subtitles': {
|
||||
'fr': [{
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def build_video_url(cls, num):
|
||||
return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
settings_url = self._download_json(
|
||||
'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id,
|
||||
video_id, note='Getting settings URL')['video_settings_url']
|
||||
settings = self._download_json(settings_url, video_id)['data']
|
||||
|
||||
formats = []
|
||||
for kind, data in settings['variants'].items():
|
||||
if kind == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls'))
|
||||
elif kind == 'MP4':
|
||||
for item in data:
|
||||
formats.append({
|
||||
'url': item['url'],
|
||||
'height': item.get('height'),
|
||||
'bitrate': item.get('bitrate'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self.build_video_url(video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
|
||||
description = self._html_search_regex(
|
||||
r'<article id="descriptif">(.+?)</article>',
|
||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
|
||||
# The webpage has a bug - there's no space between "thumb" and src=
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2',
|
||||
webpage, 'thumbnail', fatal=False, group='url')
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
|
||||
duration = int_or_none(self._search_regex(
|
||||
'Durée (\d+)', webpage, 'duration', fatal=False))
|
||||
r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False))
|
||||
duration = settings.get('main', {}).get('duration')
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'(\d+) vues', webpage, 'view count', fatal=False))
|
||||
average_rating = self._search_regex(
|
||||
@@ -75,15 +82,19 @@ class PornoVoisinesIE(InfoExtractor):
|
||||
if average_rating:
|
||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||
|
||||
categories = self._html_search_meta(
|
||||
'keywords', webpage, 'categories', fatal=False)
|
||||
categories = self._html_search_regex(
|
||||
r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False)
|
||||
if categories:
|
||||
categories = [category.strip() for category in categories.split(',')]
|
||||
|
||||
subtitles = {'fr': [{
|
||||
'url': subtitle,
|
||||
} for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
@@ -93,4 +104,5 @@ class PornoVoisinesIE(InfoExtractor):
|
||||
'average_rating': average_rating,
|
||||
'categories': categories,
|
||||
'age_limit': 18,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -15,7 +15,111 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ProSiebenSat1IE(InfoExtractor):
|
||||
class ProSiebenSat1BaseIE(InfoExtractor):
|
||||
def _extract_video_info(self, url, clip_id):
|
||||
client_location = url
|
||||
|
||||
video = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos',
|
||||
clip_id, 'Downloading videos JSON', query={
|
||||
'access_token': self._TOKEN,
|
||||
'client_location': client_location,
|
||||
'client_name': self._CLIENT_NAME,
|
||||
'ids': clip_id,
|
||||
})[0]
|
||||
|
||||
if video.get('is_protected') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
duration = float_or_none(video.get('duration'))
|
||||
source_ids = [compat_str(source['id']) for source in video['sources']]
|
||||
|
||||
client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
|
||||
sources = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
|
||||
clip_id, 'Downloading sources JSON', query={
|
||||
'access_token': self._TOKEN,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': self._CLIENT_NAME,
|
||||
})
|
||||
server_id = sources['server_id']
|
||||
|
||||
def fix_bitrate(bitrate):
|
||||
bitrate = int_or_none(bitrate)
|
||||
if not bitrate:
|
||||
return None
|
||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
formats = []
|
||||
for source_id in source_ids:
|
||||
client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
urls = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
|
||||
clip_id, 'Downloading urls JSON', fatal=False, query={
|
||||
'access_token': self._TOKEN,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': self._CLIENT_NAME,
|
||||
'server_id': server_id,
|
||||
'source_ids': source_id,
|
||||
})
|
||||
if not urls:
|
||||
continue
|
||||
if urls.get('status_code') != 0:
|
||||
raise ExtractorError('This video is unavailable', expected=True)
|
||||
urls_sources = urls['sources']
|
||||
if isinstance(urls_sources, dict):
|
||||
urls_sources = urls_sources.values()
|
||||
for source in urls_sources:
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
protocol = source.get('protocol')
|
||||
mimetype = source.get('mimetype')
|
||||
if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url, clip_id, f4m_id='hds', fatal=False))
|
||||
elif mimetype == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = fix_bitrate(source['bitrate'])
|
||||
if protocol in ('rtmp', 'rtmpe'):
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
|
||||
if not mobj:
|
||||
continue
|
||||
path = mobj.group('path')
|
||||
mp4colon_index = path.rfind('mp4:')
|
||||
app = path[:mp4colon_index]
|
||||
play_path = path[mp4colon_index:]
|
||||
formats.append({
|
||||
'url': '%s/%s' % (mobj.group('url'), app),
|
||||
'app': app,
|
||||
'play_path': play_path,
|
||||
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
|
||||
'page_url': 'http://www.prosieben.de',
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'tbr': tbr,
|
||||
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||
IE_NAME = 'prosiebensat1'
|
||||
IE_DESC = 'ProSiebenSat.1 Digital'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
|
||||
@@ -188,6 +292,9 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
_TOKEN = 'prosieben'
|
||||
_SALT = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
_CLIENT_NAME = 'kolibri-2.0.19-splec4'
|
||||
_CLIPID_REGEXES = [
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
@@ -234,123 +341,22 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
def _extract_clip(self, url, webpage):
|
||||
clip_id = self._html_search_regex(
|
||||
self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
|
||||
access_token = 'prosieben'
|
||||
client_name = 'kolibri-2.0.19-splec4'
|
||||
client_location = url
|
||||
|
||||
video = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos',
|
||||
clip_id, 'Downloading videos JSON', query={
|
||||
'access_token': access_token,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
'ids': clip_id,
|
||||
})[0]
|
||||
|
||||
if video.get('is_protected') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
duration = float_or_none(video.get('duration'))
|
||||
source_ids = [compat_str(source['id']) for source in video['sources']]
|
||||
|
||||
g = '01!8d8F_)r9]4s[qeuXfP%'
|
||||
client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]).encode('utf-8')).hexdigest()
|
||||
|
||||
sources = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
|
||||
clip_id, 'Downloading sources JSON', query={
|
||||
'access_token': access_token,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
})
|
||||
server_id = sources['server_id']
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
|
||||
|
||||
def fix_bitrate(bitrate):
|
||||
bitrate = int_or_none(bitrate)
|
||||
if not bitrate:
|
||||
return None
|
||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
formats = []
|
||||
for source_id in source_ids:
|
||||
client_id = g[:2] + sha1(''.join([g, clip_id, access_token, server_id, client_location, source_id, g, client_name]).encode('utf-8')).hexdigest()
|
||||
urls = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
|
||||
clip_id, 'Downloading urls JSON', fatal=False, query={
|
||||
'access_token': access_token,
|
||||
'client_id': client_id,
|
||||
'client_location': client_location,
|
||||
'client_name': client_name,
|
||||
'server_id': server_id,
|
||||
'source_ids': source_id,
|
||||
})
|
||||
if not urls:
|
||||
continue
|
||||
if urls.get('status_code') != 0:
|
||||
raise ExtractorError('This video is unavailable', expected=True)
|
||||
urls_sources = urls['sources']
|
||||
if isinstance(urls_sources, dict):
|
||||
urls_sources = urls_sources.values()
|
||||
for source in urls_sources:
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
protocol = source.get('protocol')
|
||||
mimetype = source.get('mimetype')
|
||||
if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url, clip_id, f4m_id='hds', fatal=False))
|
||||
elif mimetype == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, clip_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = fix_bitrate(source['bitrate'])
|
||||
if protocol in ('rtmp', 'rtmpe'):
|
||||
mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
|
||||
if not mobj:
|
||||
continue
|
||||
path = mobj.group('path')
|
||||
mp4colon_index = path.rfind('mp4:')
|
||||
app = path[:mp4colon_index]
|
||||
play_path = path[mp4colon_index:]
|
||||
formats.append({
|
||||
'url': '%s/%s' % (mobj.group('url'), app),
|
||||
'app': app,
|
||||
'play_path': play_path,
|
||||
'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
|
||||
'page_url': 'http://www.prosieben.de',
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'tbr': tbr,
|
||||
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._extract_video_info(url, clip_id)
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
info.update({
|
||||
'id': clip_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
def _extract_playlist(self, url, webpage):
|
||||
playlist_id = self._html_search_regex(
|
||||
|
@@ -1,88 +1,51 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .prosiebensat1 import ProSiebenSat1BaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class Puls4IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
|
||||
class Puls4IE(ProSiebenSat1BaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
|
||||
'md5': '49f6a6629747eeec43cef6a46b5df81d',
|
||||
'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
|
||||
'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
|
||||
'info_dict': {
|
||||
'id': '2716816',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pro und Contra vom 23.02.2015',
|
||||
'description': 'md5:293e44634d9477a67122489994675db6',
|
||||
'duration': 2989,
|
||||
'upload_date': '20150224',
|
||||
'id': '118118',
|
||||
'ext': 'flv',
|
||||
'title': 'Tobias Homberger von myclubs im #2min2miotalk',
|
||||
'description': 'md5:f9def7c5e8745d6026d8885487d91955',
|
||||
'upload_date': '20160830',
|
||||
'uploader': 'PULS_4',
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
}, {
|
||||
'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
|
||||
'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
|
||||
'info_dict': {
|
||||
'id': '1298106',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lucky Fritz',
|
||||
},
|
||||
'skip': 'Only works from Germany',
|
||||
}]
|
||||
_TOKEN = 'puls4'
|
||||
_SALT = '01!kaNgaiNgah1Ie4AeSha'
|
||||
_CLIENT_NAME = ''
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
error_message = self._html_search_regex(
|
||||
r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
|
||||
webpage, 'error message', default=None)
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
|
||||
real_url = self._html_search_regex(
|
||||
r'\"fsk-button\".+?href=\"([^"]+)',
|
||||
webpage, 'fsk_button', default=None)
|
||||
if real_url:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
|
||||
player = self._search_regex(
|
||||
r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
|
||||
webpage, 'player')
|
||||
|
||||
player_json = self._parse_json(
|
||||
'[%s]' % player, video_id,
|
||||
transform_source=lambda s: s.replace('undefined,', ''))
|
||||
|
||||
formats = None
|
||||
result = None
|
||||
|
||||
for v in player_json:
|
||||
if isinstance(v, list) and not formats:
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'format': 'hd' if f.get('hd') else 'sd',
|
||||
'width': int_or_none(f.get('size_x')),
|
||||
'height': int_or_none(f.get('size_y')),
|
||||
'tbr': int_or_none(f.get('bitrate')),
|
||||
} for f in v]
|
||||
self._sort_formats(formats)
|
||||
elif isinstance(v, dict) and not result:
|
||||
result = {
|
||||
'id': video_id,
|
||||
'title': v['videopartname'].strip(),
|
||||
'description': v.get('videotitle'),
|
||||
'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
|
||||
'upload_date': unified_strdate(v.get('clipreleasetime')),
|
||||
'uploader': v.get('channel'),
|
||||
}
|
||||
|
||||
result['formats'] = formats
|
||||
|
||||
return result
|
||||
path = self._match_id(url)
|
||||
content_path = self._download_json(
|
||||
'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url']
|
||||
media = self._download_json(
|
||||
'http://www.puls4.com' + content_path,
|
||||
content_path)['mediaCurrent']
|
||||
player_content = media['playerContent']
|
||||
info = self._extract_video_info(url, player_content['id'])
|
||||
info.update({
|
||||
'id': compat_str(media['objectId']),
|
||||
'title': player_content['title'],
|
||||
'description': media.get('description'),
|
||||
'thumbnail': media.get('previewLink'),
|
||||
'upload_date': unified_strdate(media.get('date')),
|
||||
'duration': parse_duration(player_content.get('duration')),
|
||||
'episode': player_content.get('episodePartName'),
|
||||
'show': media.get('channel'),
|
||||
'season_id': player_content.get('seasonId'),
|
||||
'uploader': player_content.get('sourceCompany'),
|
||||
})
|
||||
return info
|
||||
|
@@ -1,59 +1,72 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PyvideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
|
||||
_VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
|
||||
'md5': '520915673e53a5c5d487c36e0c4d85b5',
|
||||
'info_dict': {
|
||||
'id': '24_4WWkSmNo',
|
||||
'ext': 'webm',
|
||||
'title': 'Become a logging expert in 30 minutes',
|
||||
'description': 'md5:9665350d466c67fb5b1598de379021f7',
|
||||
'upload_date': '20130320',
|
||||
'uploader': 'Next Day Video',
|
||||
'uploader_id': 'NextDayVideo',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
_TESTS = [{
|
||||
'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html',
|
||||
'info_dict': {
|
||||
'id': 'become-a-logging-expert-in-30-minutes',
|
||||
},
|
||||
{
|
||||
'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
|
||||
'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
|
||||
'info_dict': {
|
||||
'id': '2542',
|
||||
'ext': 'm4v',
|
||||
'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html',
|
||||
'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
|
||||
'info_dict': {
|
||||
'id': '2542',
|
||||
'ext': 'm4v',
|
||||
'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v',
|
||||
},
|
||||
]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
category = mobj.group('category')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = []
|
||||
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
data = self._download_json(
|
||||
'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json'
|
||||
% (category, video_id), video_id, fatal=False)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
|
||||
webpage, 'title', flags=re.DOTALL)
|
||||
video_url = self._search_regex(
|
||||
[r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
|
||||
webpage, 'video url', flags=re.DOTALL)
|
||||
if data:
|
||||
for video in data['videos']:
|
||||
video_url = video.get('url')
|
||||
if video_url:
|
||||
if video.get('type') == 'youtube':
|
||||
entries.append(self.url_result(video_url, 'Youtube'))
|
||||
else:
|
||||
entries.append({
|
||||
'id': compat_str(data.get('id') or video_id),
|
||||
'url': video_url,
|
||||
'title': data['title'],
|
||||
'description': data.get('description') or data.get('summary'),
|
||||
'thumbnail': data.get('thumbnail_url'),
|
||||
'duration': int_or_none(data.get('duration')),
|
||||
})
|
||||
else:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
media_urls = self._search_regex(
|
||||
r'(?s)Media URL:(.+?)</li>', webpage, 'media urls')
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls):
|
||||
media_url = m.group('url')
|
||||
if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url):
|
||||
entries.append(self.url_result(media_url, 'Youtube'))
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(title)[0],
|
||||
'url': video_url,
|
||||
}
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
39
youtube_dl/extractor/rmcdecouverte.py
Normal file
39
youtube_dl/extractor/rmcdecouverte.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class RMCDecouverteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE',
|
||||
'info_dict': {
|
||||
'id': '5111223049001',
|
||||
'ext': 'mp4',
|
||||
'title': ': LES HEROS DU 88e ETAGE',
|
||||
'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé la vie d\'innombrables personnes le 11 septembre 2001.',
|
||||
'uploader_id': '1969646226001',
|
||||
'upload_date': '20160904',
|
||||
'timestamp': 1472951103,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
|
||||
|
||||
@@ -11,21 +10,23 @@ class RottenTomatoesIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
|
||||
'info_dict': {
|
||||
'id': '613340',
|
||||
'id': '11028566',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toy Story 3',
|
||||
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
query = compat_urlparse.urlparse(og_video).query
|
||||
iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': InternetVideoArchiveIE._build_xml_url(query),
|
||||
'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id,
|
||||
'ie_key': InternetVideoArchiveIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
||||
|
@@ -88,7 +88,7 @@ class RutubeIE(InfoExtractor):
|
||||
class RutubeEmbedIE(InfoExtractor):
|
||||
IE_NAME = 'rutube:embed'
|
||||
IE_DESC = 'Rutube embedded videos'
|
||||
_VALID_URL = 'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||
|
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||
(?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
|
||||
(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||
@@ -265,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
'title': 'The Royal Concept EP',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE):
|
||||
'description': 'Cartman Consigue Una Sonda Anal',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
'skip': 'Geo-restricted',
|
||||
}]
|
||||
|
||||
|
||||
|
@@ -14,7 +14,7 @@ class SpankBangIE(InfoExtractor):
|
||||
'id': '3vvn',
|
||||
'ext': 'mp4',
|
||||
'title': 'fantasy solo',
|
||||
'description': 'dillion harper masturbates on a bed',
|
||||
'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'silly2587',
|
||||
'age_limit': 18,
|
||||
@@ -44,12 +44,10 @@ class SpankBangIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'class="desc"[^>]*>([^<]+)',
|
||||
webpage, 'description', default=None)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._search_regex(
|
||||
r'class="user"[^>]*>([^<]+)',
|
||||
r'class="user"[^>]*><img[^>]+>([^<]+)',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
@@ -103,7 +103,7 @@ class SpiegelIE(InfoExtractor):
|
||||
|
||||
|
||||
class SpiegelArticleIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
||||
IE_NAME = 'Spiegel:Article'
|
||||
IE_DESC = 'Articles on spiegel.de'
|
||||
_TESTS = [{
|
||||
|
59
youtube_dl/extractor/tbs.py
Normal file
59
youtube_dl/extractor/tbs.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class TBSIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html',
|
||||
'md5': '9e61d680e2285066ade7199e6408b2ee',
|
||||
'info_dict': {
|
||||
'id': '2007318',
|
||||
'ext': 'mp4',
|
||||
'title': 'Theatrical Trailer',
|
||||
'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html',
|
||||
'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56',
|
||||
'info_dict': {
|
||||
'id': '1538823',
|
||||
'ext': 'mp4',
|
||||
'title': 'You Better Run',
|
||||
'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site = domain[:3]
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params'))
|
||||
if video_params.get('isAuthRequired') == 'true':
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported.', expected=True)
|
||||
query = None
|
||||
clip_id = video_params.get('clipid')
|
||||
if clip_id:
|
||||
query = 'id=' + clip_id
|
||||
else:
|
||||
query = 'titleId=' + video_params['titleid']
|
||||
return self._extract_cvp_info(
|
||||
'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, {
|
||||
'default': {
|
||||
'media_src': 'http://ht.cdn.turner.com/%s/big' % site,
|
||||
},
|
||||
'secure': {
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site,
|
||||
'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain,
|
||||
},
|
||||
})
|
36
youtube_dl/extractor/telequebec.py
Normal file
36
youtube_dl/extractor/telequebec.py
Normal file
@@ -0,0 +1,36 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class TeleQuebecIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://zonevideo.telequebec.tv/media/20984/le-couronnement-de-new-york/couronnement-de-new-york',
|
||||
'md5': 'fe95a0957e5707b1b01f5013e725c90f',
|
||||
'info_dict': {
|
||||
'id': '20984',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le couronnement de New York',
|
||||
'description': 'md5:f5b3d27a689ec6c1486132b2d687d432',
|
||||
'upload_date': '20160220',
|
||||
'timestamp': 1455965438,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._download_json(
|
||||
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
|
||||
media_id)['media']
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_id,
|
||||
'url': 'limelight:media:' + media_data['streamInfo']['sourceId'],
|
||||
'title': media_data['title'],
|
||||
'description': media_data.get('descriptions', [{'text': None}])[0].get('text'),
|
||||
'duration': int_or_none(media_data.get('durationInMilliseconds'), 1000),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
53
youtube_dl/extractor/tfo.py
Normal file
53
youtube_dl/extractor/tfo.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TFOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
|
||||
'md5': '47c987d0515561114cf03d1226a9d4c7',
|
||||
'info_dict': {
|
||||
'id': '100463871',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video Game Hackathon',
|
||||
'description': 'md5:558afeba217c6c8d96c60e5421795c07',
|
||||
'upload_date': '20160212',
|
||||
'timestamp': 1455310233,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
|
||||
infos = self._download_json(
|
||||
'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
|
||||
'product_id': video_id,
|
||||
}).encode(), headers={
|
||||
'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
|
||||
})
|
||||
if infos.get('success') == 0:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True)
|
||||
video_data = infos['data']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'limelight:media:' + video_data['llid'],
|
||||
'title': video_data['title'],
|
||||
'description': video_data.get('description'),
|
||||
'series': video_data.get('collection'),
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
@@ -96,7 +96,7 @@ class ThePlatformBaseIE(OnceIE):
|
||||
class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
@@ -116,6 +116,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
|
||||
'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
|
||||
|
@@ -2,8 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..compat import compat_parse_qs
|
||||
|
||||
|
||||
class TheStarIE(InfoExtractor):
|
||||
@@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
brightcove_id = self._search_regex(
|
||||
r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)',
|
||||
webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
'BrightcoveNew', brightcove_id)
|
||||
|
@@ -1,84 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate
|
||||
)
|
||||
|
||||
|
||||
class THVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://thvideo.tv/v/th1987/',
|
||||
'md5': 'fa107b1f73817e325e9433505a70db50',
|
||||
'info_dict': {
|
||||
'id': '1987',
|
||||
'ext': 'mp4',
|
||||
'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览',
|
||||
'display_id': 'th1987',
|
||||
'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg',
|
||||
'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...',
|
||||
'upload_date': '20140722'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# extract download link from mobile player page
|
||||
webpage_player = self._download_webpage(
|
||||
'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id),
|
||||
video_id, note='Downloading video source page')
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.*?)" type', webpage_player, 'video url')
|
||||
|
||||
# extract video info from main page
|
||||
webpage = self._download_webpage(
|
||||
'http://thvideo.tv/v/th%s' % (video_id), video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
display_id = 'th%s' % video_id
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'span itemprop="datePublished" content="(.*?)">', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date
|
||||
}
|
||||
|
||||
|
||||
class THVideoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://thvideo.tv/mylist2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': '幻想万華鏡',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
list_title = self._html_search_regex(
|
||||
r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://thvideo.tv/v/th' + id, 'THVideo')
|
||||
for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)]
|
||||
|
||||
return self.playlist_result(entries, playlist_id, list_title)
|
@@ -1,10 +1,14 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from ..compat import compat_parse_qs
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class TlcDeIE(InfoExtractor):
|
||||
@@ -35,5 +39,5 @@ class TlcDeIE(InfoExtractor):
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0]
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -80,7 +81,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
|
||||
if not cfg_url:
|
||||
inputs = self._hidden_inputs(webpage)
|
||||
cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey'])
|
||||
cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha'
|
||||
% (inputs['vkey'], inputs['nkey'], video_id))
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, display_id, 'Downloading metadata',
|
||||
@@ -89,7 +91,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
formats = []
|
||||
|
||||
def extract_video_url(vl):
|
||||
return re.sub('speed=\d+', 'speed=', vl.text)
|
||||
return re.sub('speed=\d+', 'speed=', unescapeHTML(vl.text))
|
||||
|
||||
video_link = cfg_xml.find('./videoLink')
|
||||
if video_link is not None:
|
||||
@@ -201,7 +203,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||
_TESTS = [{
|
||||
# anonymous uploader, no categories
|
||||
'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
|
||||
'md5': '7e569419fe6d69543d01e6be22f5f7c4',
|
||||
'md5': 'ecf3498417d09216374fc5907f9c6ec0',
|
||||
'info_dict': {
|
||||
'id': '553878',
|
||||
'display_id': 'Carmella-Decesare-striptease',
|
||||
@@ -215,11 +217,11 @@ class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||
}, {
|
||||
# non-anonymous uploader, categories
|
||||
'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538',
|
||||
'md5': 'fcba2636572895aba116171a899a5658',
|
||||
'md5': '0f5d4d490dbfd117b8607054248a07c0',
|
||||
'info_dict': {
|
||||
'id': '6538',
|
||||
'display_id': 'Educational-xxx-video',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Educational xxx video',
|
||||
'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
|
@@ -1,36 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class TrollvidsIE(NuevoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
IE_NAME = 'trollvids'
|
||||
_TEST = {
|
||||
'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
'info_dict': {
|
||||
'id': '2349002',
|
||||
'ext': 'mp4',
|
||||
'title': '【MMD R-18】ガールフレンド carry_me_off',
|
||||
'age_limit': 18,
|
||||
'duration': 216.78,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
info = self._extract_nuevo(
|
||||
'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id,
|
||||
video_id)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'age_limit': 18
|
||||
})
|
||||
return info
|
@@ -1,26 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class TruTubeIE(NuevoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
|
||||
'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
|
||||
'info_dict': {
|
||||
'id': '14880',
|
||||
'ext': 'flv',
|
||||
'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_nuevo(
|
||||
'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
|
||||
video_id)
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user