Compare commits
265 Commits
2015.08.06
...
2015.09.03
Author | SHA1 | Date | |
---|---|---|---|
|
bd6742137f | ||
|
e8dcfa3d69 | ||
|
88720ed09b | ||
|
1e804244d0 | ||
|
198492bbf0 | ||
|
8f9d522f62 | ||
|
cbae233aba | ||
|
b17ca9c945 | ||
|
ebf4ca39ba | ||
|
e5e78797e6 | ||
|
080997b808 | ||
|
77306e8b97 | ||
|
6917d2a2f0 | ||
|
36c15522c1 | ||
|
804c343a4f | ||
|
cd5d75427e | ||
|
5ddc127da6 | ||
|
f859695b49 | ||
|
cb3d2eb9e9 | ||
|
33eae08f04 | ||
|
aa3f98677d | ||
|
fffccaaf41 | ||
|
cdc8d0c373 | ||
|
39955b0451 | ||
|
52dfb7ffe2 | ||
|
93462856e1 | ||
|
fcd9e423ec | ||
|
db8f2bfd99 | ||
|
55801fc76e | ||
|
d3d89c3256 | ||
|
8875b3d572 | ||
|
aabc2be693 | ||
|
c9afb51cea | ||
|
c0a656876c | ||
|
17a647630b | ||
|
c88e118b3c | ||
|
ae6a802106 | ||
|
b184f94413 | ||
|
ee3ec091f4 | ||
|
ef49b59053 | ||
|
1f8125805e | ||
|
efd712c69b | ||
|
109a4156e1 | ||
|
678d33295b | ||
|
5e58956d0a | ||
|
e276fd2cb3 | ||
|
9b22cb10c4 | ||
|
8ca31a0e05 | ||
|
20149a5da1 | ||
|
054d43bb11 | ||
|
65488b820c | ||
|
c3c9f87954 | ||
|
56f447be9f | ||
|
79fa9db0da | ||
|
071c10137b | ||
|
a4962b80d6 | ||
|
5307c33232 | ||
|
1b660cce12 | ||
|
8df8c278b6 | ||
|
d7e8264517 | ||
|
f11c316347 | ||
|
f62e02c24f | ||
|
70113c38c9 | ||
|
3d8132f5e2 | ||
|
39affb5aa4 | ||
|
a882c5f474 | ||
|
61a7ff1622 | ||
|
42e7373bd3 | ||
|
e269d3ae7d | ||
|
e7ddaef5bd | ||
|
62984e4584 | ||
|
3c53455d15 | ||
|
bbb43a39fd | ||
|
43e7d3c945 | ||
|
2f72e83bbd | ||
|
57179b4ca1 | ||
|
4bc8eec4eb | ||
|
baf510bf8c | ||
|
6d53cdd6ce | ||
|
ebbf078c7d | ||
|
95e431e9ec | ||
|
11addc50ff | ||
|
e4df2f98cc | ||
|
e7c14660d3 | ||
|
90076b6172 | ||
|
28b83495d8 | ||
|
551c7837ac | ||
|
59e6acc757 | ||
|
9990c960f2 | ||
|
2006a06eff | ||
|
2b6bda1ed8 | ||
|
468083d2f5 | ||
|
483fc223bb | ||
|
66ce97024d | ||
|
8c97f81943 | ||
|
d7c1630570 | ||
|
5e1a5ac8de | ||
|
9eb4ab6ad9 | ||
|
4932a817a0 | ||
|
5d003e29b1 | ||
|
dc95bd503e | ||
|
f738dd7b7c | ||
|
f908b74fa3 | ||
|
c687ac745b | ||
|
912e0b7e46 | ||
|
03bc7237ad | ||
|
dd565ac1ad | ||
|
5cdefc4625 | ||
|
ce00af8767 | ||
|
51047444aa | ||
|
aa6cd05ed8 | ||
|
dac14bf311 | ||
|
05fe2594e4 | ||
|
26e1c3514f | ||
|
22c83245c5 | ||
|
7900aede14 | ||
|
f877c6ae5a | ||
|
ca681f7041 | ||
|
a01da8bbf8 | ||
|
f3a65d9636 | ||
|
559f4c550f | ||
|
03c635a4b5 | ||
|
34a4cd0a34 | ||
|
3b9b32f404 | ||
|
9c724a9802 | ||
|
7a6e8a1b17 | ||
|
369c12e038 | ||
|
0fa5795b85 | ||
|
c00c7c0af0 | ||
|
cbaed4bb5e | ||
|
f74a7348f6 | ||
|
8626b23e4e | ||
|
0086874277 | ||
|
7fc18d9309 | ||
|
974f1a385a | ||
|
6900b4f6f5 | ||
|
d90e4bdb74 | ||
|
276c989772 | ||
|
ea99110d24 | ||
|
221a59fe6f | ||
|
eaa5646483 | ||
|
041bc3adc5 | ||
|
e64b756943 | ||
|
201ea3ee8e | ||
|
9303ce3e69 | ||
|
06c085ab6e | ||
|
c576ef1e7c | ||
|
11bed5827d | ||
|
fab83e2456 | ||
|
1d25e9d173 | ||
|
9c21f22923 | ||
|
3aa697f993 | ||
|
8b9848ac56 | ||
|
8b8c1093b6 | ||
|
d0d6c097fc | ||
|
6be5e46994 | ||
|
45694b504a | ||
|
41dbc50f9c | ||
|
4d2ad866f3 | ||
|
3cafca04aa | ||
|
594f51b859 | ||
|
fb56131dd9 | ||
|
a34e19629c | ||
|
3c12a027d4 | ||
|
cb28e03386 | ||
|
7393746da2 | ||
|
6828c809e4 | ||
|
28479149cc | ||
|
237c03c8ea | ||
|
e73c85cb23 | ||
|
b6b2711298 | ||
|
3b7130439a | ||
|
2c919adb74 | ||
|
60231c65b9 | ||
|
f196047832 | ||
|
240ca32e57 | ||
|
fa37c26c4d | ||
|
d7dbfc7cc1 | ||
|
d9ab5262b1 | ||
|
fb124e3741 | ||
|
479bf783d2 | ||
|
f0f3a6c99d | ||
|
f57b7835e2 | ||
|
1df3186e0e | ||
|
0b7c27828d | ||
|
0a19d4ccd6 | ||
|
9f3da13860 | ||
|
bf812ef714 | ||
|
b1ac38fadc | ||
|
fb0d12c6cb | ||
|
34952f09e1 | ||
|
34a7de2970 | ||
|
0ff827419e | ||
|
b29440aee6 | ||
|
11b5605815 | ||
|
844587669e | ||
|
f6c3664d71 | ||
|
c5864a8ce6 | ||
|
27c7114af6 | ||
|
0791ac1b44 | ||
|
1de5cd3ba5 | ||
|
729accb482 | ||
|
942acef594 | ||
|
fb2f339fec | ||
|
98044462b1 | ||
|
0dcb318f62 | ||
|
f32143469f | ||
|
3a30508b94 | ||
|
e0b9d78fab | ||
|
8d6765cf48 | ||
|
12bb392a0f | ||
|
08df685fe7 | ||
|
c8d1be772d | ||
|
887e9bc7b5 | ||
|
9f2e7c2f34 | ||
|
d7bb8884af | ||
|
464e792496 | ||
|
18c3281f9e | ||
|
8e2b1be127 | ||
|
b61b7787cb | ||
|
b465083f45 | ||
|
154655a85a | ||
|
59e89e62d7 | ||
|
d5d7bdaeb5 | ||
|
b2f82948ee | ||
|
428e4e4a85 | ||
|
1e83741c9a | ||
|
621d6a9516 | ||
|
3550821fb4 | ||
|
5b0c40da24 | ||
|
e0ac521438 | ||
|
c29458f3ec | ||
|
bf94d763ba | ||
|
8a37aa1517 | ||
|
f3d24df6f2 | ||
|
fd5d8270dc | ||
|
be612d9e0c | ||
|
4a7434d0b0 | ||
|
ad2141be2d | ||
|
f94639fadf | ||
|
89faae660f | ||
|
0f422256d6 | ||
|
acc1adbe7a | ||
|
8002ac9e0a | ||
|
6d30cf04db | ||
|
3be3c622dc | ||
|
cd6b555e19 | ||
|
d41d04c0f5 | ||
|
17712eeb19 | ||
|
41c3a5a7be | ||
|
8765222d22 | ||
|
645f814544 | ||
|
308cfe0ab3 | ||
|
e5e8d20a3a | ||
|
a107193e4b | ||
|
dfaba1ab95 | ||
|
a62fd1af27 | ||
|
1a117a7728 | ||
|
3c07a729a6 | ||
|
84c0ed50a5 | ||
|
02c126a7c2 | ||
|
114ed20e64 | ||
|
9d681c2bb3 | ||
|
3af1fac7b0 | ||
|
233c1c0e76 |
@@ -5,9 +5,7 @@ python:
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -yqq rtmpdump
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
3
AUTHORS
3
AUTHORS
@@ -137,3 +137,6 @@ Zach Bruggeman
|
||||
Tjark Saul
|
||||
slangangular
|
||||
Behrouz Abbasi
|
||||
ngld
|
||||
nyuszika7h
|
||||
Shaun Walbridge
|
||||
|
@@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
|
20
README.md
20
README.md
@@ -34,7 +34,7 @@ You can also use pip:
|
||||
|
||||
sudo pip install youtube-dl
|
||||
|
||||
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a small command-line program to download videos from
|
||||
@@ -108,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||
|
||||
## Filesystem Options:
|
||||
@@ -207,7 +207,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR Two-factor auth code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, smotri)
|
||||
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||
|
||||
## Post-processing Options:
|
||||
-x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
||||
@@ -236,7 +236,14 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, system wide configuration file is located at `/etc/youtube-dl.conf` and user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configration file youtube-dl will always extract the audio, not copy the mtime and use proxy:
|
||||
```
|
||||
--extract-audio
|
||||
--no-mtime
|
||||
--proxy 127.0.0.1:3128
|
||||
```
|
||||
|
||||
You can use `--ignore-config` if you want to disable configuration file for a particular youtube-dl run.
|
||||
|
||||
### Authentication with `.netrc` file ###
|
||||
|
||||
@@ -272,6 +279,7 @@ The `-o` option allows users to indicate a template for the output file names. T
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The index of the video in the playlist, a five-digit number.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
@@ -544,7 +552,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
@@ -572,7 +580,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
|
@@ -86,7 +86,7 @@
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **Cinemassacre**
|
||||
- **clipfish**
|
||||
- **Clipfish**
|
||||
- **cliphunter**
|
||||
- **Clipsyndicate**
|
||||
- **Cloudy**
|
||||
@@ -116,6 +116,7 @@
|
||||
- **DailymotionCloud**
|
||||
- **daum.net**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
@@ -149,6 +150,7 @@
|
||||
- **EroProfile**
|
||||
- **Escapist**
|
||||
- **ESPN** (Currently broken)
|
||||
- **EsriVideo**
|
||||
- **EveryonesMixtape**
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
@@ -164,7 +166,7 @@
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Foxgay**
|
||||
- **FoxNews**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-quoi**
|
||||
- **FranceCulture**
|
||||
@@ -218,7 +220,10 @@
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **ImgurAlbum**
|
||||
- **Ina**
|
||||
- **Indavideo**
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
@@ -297,13 +302,16 @@
|
||||
- **Moviezine**
|
||||
- **movshare**: MovShare
|
||||
- **MPORA**
|
||||
- **MSNBC**
|
||||
- **MTV**
|
||||
- **mtv.de**
|
||||
- **mtviggy.com**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
- **MusicVault**
|
||||
- **muzu.tv**
|
||||
- **Mwave**
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
@@ -351,7 +359,6 @@
|
||||
- **NowTV**
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo**: npo.nl and ntr.nl
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
@@ -377,6 +384,7 @@
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **PBS**
|
||||
- **Periscope**: Periscope
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
@@ -385,8 +393,11 @@
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
- **plus.google**: Google Plus
|
||||
- **pluzz.francetv.fr**
|
||||
- **podomatic**
|
||||
@@ -406,6 +417,7 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **Quickscope**: Quick Scope
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
@@ -430,6 +442,7 @@
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RTVNH**
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
@@ -453,7 +466,8 @@
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||
- **Shared**
|
||||
- **Shahid**
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **ShareSix**
|
||||
- **Sina**
|
||||
- **Slideshare**
|
||||
@@ -518,6 +532,7 @@
|
||||
- **ted**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
- **Telegraaf**
|
||||
- **TeleMB**
|
||||
- **TeleTask**
|
||||
- **TenPlay**
|
||||
@@ -525,6 +540,7 @@
|
||||
- **TF1**
|
||||
- **TheOnion**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheSixtyOne**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
@@ -590,7 +606,6 @@
|
||||
- **Viddler**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoBam**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videolectures.net**
|
||||
@@ -621,6 +636,7 @@
|
||||
- **Vodlocker**
|
||||
- **VoiceRepublic**
|
||||
- **Vporn**
|
||||
- **vpro**: npo.nl and ntr.nl
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
|
@@ -133,8 +133,8 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(
|
||||
isinstance(got, list),
|
||||
'Expected field %s to be a list, but it is of type %s' % (
|
||||
isinstance(got, (list, dict)),
|
||||
'Expected field %s to be a list or a dict, but it is of type %s' % (
|
||||
info_field, type(got).__name__))
|
||||
expected_num = int(expected.partition(':')[2])
|
||||
assertGreaterEqual(
|
||||
@@ -160,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in got_dict.items()
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
|
@@ -136,7 +136,9 @@ def generator(test_case):
|
||||
# We're not using .download here sine that is just a shim
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(test_case['url'])
|
||||
res_dict = ydl.extract_info(
|
||||
test_case['url'],
|
||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
|
@@ -25,6 +25,7 @@ from youtube_dl.extractor import (
|
||||
RaiIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
)
|
||||
@@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b')
|
||||
|
||||
|
||||
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
|
||||
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
|
||||
IE = ThePlatformFeedIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade')
|
||||
|
||||
|
||||
class TestRtveSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/'
|
||||
IE = RTVEALaCartaIE
|
||||
|
@@ -2009,7 +2009,7 @@ class YoutubeDL(object):
|
||||
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||
try:
|
||||
uf = self.urlopen(t['url'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
|
||||
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
|
||||
|
@@ -45,11 +45,13 @@ class ExternalFD(FileDownloader):
|
||||
def supports(cls, info_dict):
|
||||
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||
|
||||
def _source_address(self, command_option):
|
||||
source_address = self.params.get('source_address')
|
||||
if source_address is None:
|
||||
def _option(self, command_option, param):
|
||||
param = self.params.get(param)
|
||||
if param is None:
|
||||
return []
|
||||
return [command_option, source_address]
|
||||
if isinstance(param, bool):
|
||||
return [command_option]
|
||||
return [command_option, param]
|
||||
|
||||
def _configuration_args(self, default=[]):
|
||||
ex_args = self.params.get('external_downloader_args')
|
||||
@@ -77,7 +79,17 @@ class CurlFD(ExternalFD):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
||||
class AxelFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', '%s: %s' % (key, val)]
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@@ -88,7 +100,9 @@ class WgetFD(ExternalFD):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--bind-address')
|
||||
cmd += self._option('--bind-address', 'source_address')
|
||||
cmd += self._option('--proxy', 'proxy')
|
||||
cmd += self._option('--no-check-certificate', 'nocheckcertificate')
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
@@ -105,7 +119,8 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
@@ -13,6 +13,8 @@ from ..compat import (
|
||||
compat_urllib_error,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
struct_pack,
|
||||
struct_unpack,
|
||||
xpath_text,
|
||||
@@ -343,18 +345,19 @@ class F4mFD(FragmentFD):
|
||||
success = ctx['dl'].download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
down_data = down.read()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
break
|
||||
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||
down_data = down.read()
|
||||
down.close()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
_, box_type, box_data = reader.read_box_info()
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
break
|
||||
if live:
|
||||
os.remove(frag_filename)
|
||||
os.remove(encodeFilename(frag_sanitized))
|
||||
else:
|
||||
frags_filenames.append(frag_filename)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
@@ -375,6 +378,6 @@ class F4mFD(FragmentFD):
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
os.remove(encodeFilename(frag_file))
|
||||
|
||||
return True
|
||||
|
@@ -35,6 +35,7 @@ class FragmentFD(FileDownloader):
|
||||
'quiet': True,
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit', None),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
|
@@ -12,6 +12,7 @@ from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
)
|
||||
|
||||
|
||||
@@ -32,6 +33,8 @@ class HlsFD(FileDownloader):
|
||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args.append(encodeFilename(tmpfilename, True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
@@ -87,13 +90,13 @@ class NativeHlsFD(FragmentFD):
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
if not success:
|
||||
return False
|
||||
with open(frag_filename, 'rb') as down:
|
||||
ctx['dest_stream'].write(down.read())
|
||||
frags_filenames.append(frag_filename)
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
frags_filenames.append(frag_sanitized)
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(frag_file)
|
||||
os.remove(encodeFilename(frag_file))
|
||||
|
||||
return True
|
||||
|
@@ -118,6 +118,7 @@ from .dailymotion import (
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import DCNIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
@@ -157,6 +158,7 @@ from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
from .espn import ESPNIE
|
||||
from .esri import EsriVideoIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
@@ -239,8 +241,15 @@ from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
)
|
||||
from .imgur import ImgurIE
|
||||
from .imgur import (
|
||||
ImgurIE,
|
||||
ImgurAlbumIE,
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .indavideo import (
|
||||
IndavideoIE,
|
||||
IndavideoEmbedIE,
|
||||
)
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
@@ -334,11 +343,13 @@ from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .mwave import MwaveIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import MyviIE
|
||||
@@ -352,6 +363,7 @@ from .nbc import (
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
MSNBCIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
@@ -431,6 +443,10 @@ from .orf import (
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .periscope import (
|
||||
PeriscopeIE,
|
||||
QuickscopeIE,
|
||||
)
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
@@ -439,8 +455,13 @@ from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
from .pluralsight import (
|
||||
PluralsightIE,
|
||||
PluralsightCourseIE,
|
||||
)
|
||||
from .podomatic import PodomaticIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -486,6 +507,7 @@ from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@@ -512,6 +534,7 @@ from .senateisvp import SenateISVPIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .sharesix import ShareSixIE
|
||||
from .sina import SinaIE
|
||||
@@ -591,6 +614,7 @@ from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .teletask import TeleTaskIE
|
||||
from .tenplay import TenPlayIE
|
||||
@@ -598,7 +622,10 @@ from .testurl import TestURLIE
|
||||
from .testtube import TestTubeIE
|
||||
from .tf1 import TF1IE
|
||||
from .theonion import TheOnionIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
)
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
@@ -682,7 +709,6 @@ from .vgtv import (
|
||||
from .vh1 import VH1IE
|
||||
from .vice import ViceIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
|
@@ -1,16 +1,20 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
'md5': 'cb3dd03b18455a661071ee1e28344d9f',
|
||||
'info_dict': {
|
||||
@@ -19,22 +23,47 @@ class ABCIE(InfoExtractor):
|
||||
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
|
||||
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
|
||||
'md5': 'db2a5369238b51f9811ad815b69dc086',
|
||||
'info_dict': {
|
||||
'id': 'NvqvPeNZsHU',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150816',
|
||||
'uploader': 'ABC News (Australia)',
|
||||
'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
|
||||
'uploader_id': 'NewsOnABC',
|
||||
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
urls_info_json = self._search_regex(
|
||||
r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
|
||||
flags=re.DOTALL)
|
||||
urls_info = json.loads(urls_info_json.replace('\'', '"'))
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
|
||||
urls_info = self._parse_json(
|
||||
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||
|
||||
if not isinstance(urls_info, list):
|
||||
urls_info = [urls_info]
|
||||
|
||||
if mobj.group('type') == 'YouTube':
|
||||
return self.playlist_result([
|
||||
self.url_result(url_info['url']) for url_info in urls_info])
|
||||
|
||||
formats = [{
|
||||
'url': url_info['url'],
|
||||
'width': int(url_info['width']),
|
||||
'height': int(url_info['height']),
|
||||
'tbr': int(url_info['bitrate']),
|
||||
'filesize': int(url_info['filesize']),
|
||||
'width': int_or_none(url_info.get('width')),
|
||||
'height': int_or_none(url_info.get('height')),
|
||||
'tbr': int_or_none(url_info.get('bitrate')),
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
} for url_info in urls_info]
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -18,6 +18,7 @@ class BreakIE(InfoExtractor):
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
'title': 'When Girls Act Like D-Bags',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from .bliptv import BlipTVIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
@@ -83,10 +84,10 @@ class CinemassacreIE(InfoExtractor):
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
[
|
||||
r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
ScreenwaveMediaIE.EMBED_PATTERN,
|
||||
r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
],
|
||||
webpage, 'player data URL', default=None)
|
||||
webpage, 'player data URL', default=None, group='url')
|
||||
if not playerdata_url:
|
||||
playerdata_url = BlipTVIE._extract_url(webpage)
|
||||
if not playerdata_url:
|
||||
|
@@ -1,53 +1,68 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class ClipfishIE(InfoExtractor):
|
||||
IE_NAME = 'clipfish'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '2521cd644e862936cf2e698206e47385',
|
||||
'md5': '79bc922f3e8a9097b3d68a93780fd475',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'timestamp': 1370938118,
|
||||
'upload_date': '20130611',
|
||||
'duration': 82,
|
||||
},
|
||||
'skip': 'Blocked in the US'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
doc = self._download_xml(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
xml_bytes = xml.etree.ElementTree.tostring(doc)
|
||||
raise ExtractorError('Cannot find video URL in document %r' %
|
||||
xml_bytes)
|
||||
thumbnail = doc.find('imageurl').text
|
||||
duration = parse_duration(doc.find('duration').text)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(
|
||||
js_to_json(self._html_search_regex(
|
||||
'(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.append({
|
||||
'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - Video')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(video_info.get('length'))
|
||||
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
@@ -15,9 +15,11 @@ import xml.etree.ElementTree
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookies,
|
||||
compat_getpass,
|
||||
compat_HTTPError,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -37,6 +39,9 @@ from ..utils import (
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
|
||||
@@ -200,8 +205,8 @@ class InfoExtractor(object):
|
||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "title" and "id" attributes with the same
|
||||
semantics as videos (see above).
|
||||
Additionally, playlists can have "title", "description" and "id" attributes
|
||||
with the same semantics as videos (see above).
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
@@ -505,6 +510,12 @@ class InfoExtractor(object):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen('Logging in')
|
||||
|
||||
@staticmethod
|
||||
def raise_login_required(msg='This video is only available for registered users'):
|
||||
raise ExtractorError(
|
||||
'%s. Use --username and --password or --netrc to provide account credentials.' % msg,
|
||||
expected=True)
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
@@ -606,7 +617,7 @@ class InfoExtractor(object):
|
||||
|
||||
return (username, password)
|
||||
|
||||
def _get_tfa_info(self):
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
"""
|
||||
Get the two-factor authentication info
|
||||
TODO - asking the user will be required for sms/phone verify
|
||||
@@ -620,7 +631,7 @@ class InfoExtractor(object):
|
||||
if downloader_params.get('twofactor', None) is not None:
|
||||
return downloader_params['twofactor']
|
||||
|
||||
return None
|
||||
return compat_getpass('Type %s and press [Return]: ' % note)
|
||||
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
@@ -636,7 +647,7 @@ class InfoExtractor(object):
|
||||
@staticmethod
|
||||
def _meta_regex(prop):
|
||||
return r'''(?isx)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||
(?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
|
||||
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
|
||||
|
||||
def _og_search_property(self, prop, html, name=None, **kargs):
|
||||
@@ -720,16 +731,18 @@ class InfoExtractor(object):
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
return dict([
|
||||
(input.group('name'), input.group('value')) for input in re.finditer(
|
||||
r'''(?x)
|
||||
<input\s+
|
||||
type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
|
||||
name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
|
||||
(?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
|
||||
value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
|
||||
''', html)
|
||||
])
|
||||
hidden_inputs = {}
|
||||
for input in re.findall(r'<input([^>]+)>', html):
|
||||
if not re.search(r'type=(["\'])hidden\1', input):
|
||||
continue
|
||||
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
|
||||
if not name:
|
||||
continue
|
||||
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
|
||||
if not value:
|
||||
continue
|
||||
hidden_inputs[name.group('value')] = value.group('value')
|
||||
return hidden_inputs
|
||||
|
||||
def _form_hidden_inputs(self, form_id, html):
|
||||
form = self._search_regex(
|
||||
@@ -978,69 +991,221 @@ class InfoExtractor(object):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
# TODO: improve extraction
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True):
|
||||
smil = self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
@staticmethod
|
||||
def _xpath_ns(path, namespace=None):
|
||||
if not namespace:
|
||||
return path
|
||||
out = []
|
||||
for c in path.split('/'):
|
||||
if not c or c == '.':
|
||||
out.append(c)
|
||||
else:
|
||||
out.append('{%s}%s' % (namespace, c))
|
||||
return '/'.join(out)
|
||||
|
||||
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
|
||||
if smil is False:
|
||||
assert not fatal
|
||||
return []
|
||||
|
||||
base = smil.find('./head/meta').get('base')
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
return self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
|
||||
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
||||
smil = self._download_smil(smil_url, video_id, fatal=fatal)
|
||||
if smil is False:
|
||||
return {}
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
def _download_smil(self, smil_url, video_id, fatal=True):
|
||||
return self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file', fatal=fatal)
|
||||
|
||||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
description = None
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
name = meta.attrib.get('name')
|
||||
content = meta.attrib.get('content')
|
||||
if not name or not content:
|
||||
continue
|
||||
if not title and name == 'title':
|
||||
title = content
|
||||
elif not description and name in ('description', 'abstract'):
|
||||
description = content
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title or video_id,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _parse_smil_namespace(self, smil):
|
||||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
if b:
|
||||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
if smil.findall('./body/seq/video'):
|
||||
video = smil.findall('./body/seq/video')[0]
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
else:
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
|
||||
formats.extend(fmts)
|
||||
http_count = 0
|
||||
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
ext = video.get('ext')
|
||||
src_ext = determine_ext(src)
|
||||
streamer = video.get('streamer') or base
|
||||
|
||||
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||
rtmp_count += 1
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
if transform_rtmp_url:
|
||||
streamer, src = transform_rtmp_url(streamer, src)
|
||||
formats[-1].update({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
})
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls'))
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
'hdcore': '3.2.0',
|
||||
'plugin': 'flowplayer-3.2.0.1',
|
||||
}
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http'):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
'ext': ext or src_ext or 'flv',
|
||||
'format_id': 'http-%d' % (bitrate or http_count),
|
||||
'tbr': bitrate,
|
||||
'filesize': filesize,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _parse_smil_video(self, video, video_id, base, rtmp_count):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
return [], rtmp_count
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
return ([{
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
elif proto.startswith('http'):
|
||||
return ([{
|
||||
'url': base + src,
|
||||
'ext': ext or 'flv',
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}], rtmp_count)
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
SUBTITLES_TYPES = {
|
||||
'text/vtt': 'vtt',
|
||||
'text/srt': 'srt',
|
||||
'application/smptett+xml': 'tt',
|
||||
}
|
||||
if type_ in SUBTITLES_TYPES:
|
||||
ext = SUBTITLES_TYPES[type_]
|
||||
lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
|
||||
xspf = self._download_xml(
|
||||
playlist_url, playlist_id, 'Downloading xpsf playlist',
|
||||
'Unable to download xspf manifest', fatal=fatal)
|
||||
if xspf is False:
|
||||
return []
|
||||
return self._parse_xspf(xspf, playlist_id)
|
||||
|
||||
def _parse_xspf(self, playlist, playlist_id):
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
|
||||
description = xpath_text(
|
||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||
thumbnail = xpath_text(
|
||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
})
|
||||
return entries
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
@@ -1120,6 +1285,23 @@ class InfoExtractor(object):
|
||||
def _get_subtitles(self, *args, **kwargs):
|
||||
raise NotImplementedError("This method must be implemented by subclasses")
|
||||
|
||||
@staticmethod
|
||||
def _merge_subtitle_items(subtitle_list1, subtitle_list2):
|
||||
""" Merge subtitle items for one language. Items with duplicated URLs
|
||||
will be dropped. """
|
||||
list1_urls = set([item['url'] for item in subtitle_list1])
|
||||
ret = list(subtitle_list1)
|
||||
ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
|
||||
""" Merge two subtitle dictionaries, language by language. """
|
||||
ret = dict(subtitle_dict1)
|
||||
for lang in subtitle_dict2:
|
||||
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
|
||||
return ret
|
||||
|
||||
def extract_automatic_captions(self, *args, **kwargs):
|
||||
if (self._downloader.params.get('writeautomaticsub', False) or
|
||||
self._downloader.params.get('listsubtitles')):
|
||||
|
@@ -14,11 +14,13 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -235,7 +237,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
webpage_url = 'http://www.' + mobj.group('url')
|
||||
|
||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage')
|
||||
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, 'trailer-notice', default='')
|
||||
note_m = self._html_search_regex(
|
||||
r'<div class="showmedia-trailer-notice">(.+?)</div>',
|
||||
webpage, 'trailer-notice', default='')
|
||||
if note_m:
|
||||
raise ExtractorError(note_m)
|
||||
|
||||
@@ -245,6 +249,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if msg.get('type') == 'error':
|
||||
raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
|
||||
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, 'video_title', flags=re.DOTALL)
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='')
|
||||
@@ -279,6 +286,20 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
video_url = stream_info.find('./host').text
|
||||
video_play_path = stream_info.find('./file').text
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
formats.append({
|
||||
'url': direct_video_url,
|
||||
'format_id': video_format,
|
||||
})
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
|
84
youtube_dl/extractor/dcn.py
Normal file
84
youtube_dl/extractor/dcn.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class DCNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '17375',
|
||||
'ext': 'mp4',
|
||||
'title': 'رحلة العمر : الحلقة 1',
|
||||
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
headers={'Origin': 'http://www.dcndigital.ae'})
|
||||
|
||||
video = self._download_json(request, video_id)
|
||||
title = video.get('title_en') or video['title_ar']
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?'
|
||||
+ compat_urllib_parse.urlencode({
|
||||
'id': video['id'],
|
||||
'user_id': video['user_id'],
|
||||
'signature': video['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), video_id)
|
||||
|
||||
m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
|
||||
rtsp_url = self._search_regex(
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False)
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
img = video.get('img')
|
||||
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
|
||||
duration = int_or_none(video.get('duration'))
|
||||
description = video.get('description_en') or video.get('description_ar')
|
||||
timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,10 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
)
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class DHMIE(InfoExtractor):
|
||||
@@ -34,24 +31,14 @@ class DHMIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(
|
||||
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
||||
|
||||
video_url = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}location',
|
||||
'video url', fatal=True)
|
||||
thumbnail = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}image',
|
||||
'thumbnail')
|
||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||
|
||||
title = self._search_regex(
|
||||
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||
@@ -63,11 +50,10 @@ class DHMIE(InfoExtractor):
|
||||
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||
webpage, 'duration', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
entries[0].update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -9,8 +9,8 @@ from ..utils import qualities
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||
'info_dict': {
|
||||
@@ -20,11 +20,15 @@ class DumpertIE(InfoExtractor):
|
||||
'description': 'Niet schrikken hoor',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
url = 'https://www.dumpert.nl/mediabase/' + video_id
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
@@ -71,8 +71,7 @@ class EroProfileIE(InfoExtractor):
|
||||
|
||||
m = re.search(r'You must be logged in to view this video\.', webpage)
|
||||
if m:
|
||||
raise ExtractorError(
|
||||
'This video requires login. Please specify a username and password and try again.', expected=True)
|
||||
self.raise_login_required('This video requires login')
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
|
74
youtube_dl/extractor/esri.py
Normal file
74
youtube_dl/extractor/esri.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class EsriVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications',
|
||||
'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc',
|
||||
'info_dict': {
|
||||
'id': '1124',
|
||||
'ext': 'mp4',
|
||||
'title': 'ArcGIS Online - Developing Applications',
|
||||
'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 185,
|
||||
'upload_date': '20120419',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for width, height, content in re.findall(
|
||||
r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage):
|
||||
for video_url, ext, filesize in re.findall(
|
||||
r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content):
|
||||
formats.append({
|
||||
'url': compat_urlparse.urljoin(url, video_url),
|
||||
'ext': ext.lower(),
|
||||
'format_id': '%s-%s' % (ext.lower(), height),
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
'filesize_approx': parse_filesize(filesize),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta('title', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description', fatal=False)
|
||||
|
||||
thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
[r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"],
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'last-modified', webpage, 'upload date', fatal=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats
|
||||
}
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_NETRC_MACHINE = 'fc2'
|
||||
_TESTS = [{
|
||||
@@ -37,6 +37,9 @@ class FC2IE(InfoExtractor):
|
||||
'password': '(snip)',
|
||||
'skip': 'requires actual password'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.fc2.com/en/a/content/20130926eZpARwsF',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -80,13 +83,13 @@ class FC2IE(InfoExtractor):
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/')
|
||||
refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
|
@@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor):
|
||||
'upload_date': '20141120',
|
||||
'duration': 3960,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -32,6 +32,7 @@ class FourTubeIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
@@ -8,7 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class FoxNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
IE_DESC = 'Fox News and Fox Business Video'
|
||||
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||
@@ -42,13 +45,19 @@ class FoxNewsIE(InfoExtractor):
|
||||
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxbusiness.com/v/4442309889001',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
video = self._download_json(
|
||||
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
|
||||
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
|
||||
|
||||
item = video['channel']['item']
|
||||
title = item['title']
|
||||
|
@@ -78,9 +78,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info['titre']
|
||||
subtitle = info.get('sous_titre')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['titre'],
|
||||
'title': title,
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
@@ -214,15 +219,15 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
# france5
|
||||
{
|
||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||
'md5': '78f0f4064f9074438e660785bbf2c5d9',
|
||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
|
||||
'md5': 'f6c577df3806e26471b3d21631241fd0',
|
||||
'info_dict': {
|
||||
'id': '108961659',
|
||||
'id': '123327454',
|
||||
'ext': 'flv',
|
||||
'title': 'C à dire ?!',
|
||||
'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410795000,
|
||||
'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
|
||||
'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
|
||||
'upload_date': '20150831',
|
||||
'timestamp': 1441035120,
|
||||
},
|
||||
},
|
||||
# franceo
|
||||
|
@@ -53,7 +53,7 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
'url': '%s%d.%s' % (link[0], bitrate, link[1]),
|
||||
'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])),
|
||||
'format_id': '%s-%d' % (link[1], bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
|
@@ -48,6 +48,7 @@ from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionCloudIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
from .snagfilms import SnagFilmsEmbedIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -130,6 +131,89 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
},
|
||||
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
|
||||
{
|
||||
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
|
||||
'info_dict': {
|
||||
'id': 'smil',
|
||||
'ext': 'mp4',
|
||||
'title': 'Automatics, robotics and biocybernetics',
|
||||
'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
|
||||
'formats': 'mincount:16',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'force_generic_extractor': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
|
||||
{
|
||||
'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
|
||||
'info_dict': {
|
||||
'id': 'hds',
|
||||
'ext': 'flv',
|
||||
'title': 'hds',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from https://www.restudy.dk/video/play/id/1637
|
||||
{
|
||||
'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
|
||||
'info_dict': {
|
||||
'id': 'video_1637',
|
||||
'ext': 'flv',
|
||||
'title': 'video_1637',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
|
||||
{
|
||||
'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
|
||||
'info_dict': {
|
||||
'id': 'smil-service',
|
||||
'ext': 'flv',
|
||||
'title': 'smil-service',
|
||||
'formats': 'mincount:1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
|
||||
{
|
||||
'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
|
||||
'info_dict': {
|
||||
'id': '4719370',
|
||||
'ext': 'mp4',
|
||||
'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
|
||||
{
|
||||
'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
|
||||
'info_dict': {
|
||||
'id': 'mZlp2ctYIUEB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tikibad ontruimd wegens brand',
|
||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 33,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@@ -236,6 +320,19 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
},
|
||||
{
|
||||
# ooyala video embedded with http://player.ooyala.com/iframe.js
|
||||
'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
|
||||
'info_dict': {
|
||||
'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
|
||||
'ext': 'mp4',
|
||||
'title': '"Steve Jobs: Man in the Machine" trailer',
|
||||
'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# multiple ooyala embeds on SBN network websites
|
||||
{
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@@ -905,6 +1002,16 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'New experience with Acrobat DC',
|
||||
'duration': 248.667,
|
||||
},
|
||||
},
|
||||
# ScreenwaveMedia embed
|
||||
{
|
||||
'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
|
||||
'md5': '24ace5baba0d35d55c6810b51f34e9e0',
|
||||
'info_dict': {
|
||||
'id': 'cinemasnob-55d26273809dd',
|
||||
'ext': 'mp4',
|
||||
'title': 'cinemasnob',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@@ -1110,11 +1217,15 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
try:
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@@ -1320,7 +1431,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
@@ -1618,6 +1729,11 @@ class GenericIE(InfoExtractor):
|
||||
if snagfilms_url:
|
||||
return self.url_result(snagfilms_url)
|
||||
|
||||
# Look for ScreenwaveMedia embeds
|
||||
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||
@@ -1716,7 +1832,8 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
if determine_ext(video_url) == 'smil':
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_smil_formats(video_url, video_id),
|
||||
@@ -1724,6 +1841,8 @@ class GenericIE(InfoExtractor):
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
elif ext == 'xspf':
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
|
@@ -13,6 +13,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -359,13 +360,8 @@ class GloboIE(InfoExtractor):
|
||||
self._API_URL_TEMPLATE % video_id, video_id)['videos'][0]
|
||||
|
||||
title = video['title']
|
||||
duration = float_or_none(video['duration'], 1000)
|
||||
like_count = video['likes']
|
||||
uploader = video['channel']
|
||||
uploader_id = video['channel_id']
|
||||
|
||||
formats = []
|
||||
|
||||
for resource in video['resources']:
|
||||
resource_id = resource.get('_id')
|
||||
if not resource_id:
|
||||
@@ -407,6 +403,11 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(video.get('duration'), 1000)
|
||||
like_count = int_or_none(video.get('likes'))
|
||||
uploader = video.get('channel')
|
||||
uploader_id = video.get('channel_id')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
@@ -97,3 +97,28 @@ class ImgurIE(InfoExtractor):
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': self._og_search_title(webpage),
|
||||
}
|
||||
|
||||
|
||||
class ImgurAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://imgur.com/gallery/Q95ko',
|
||||
'info_dict': {
|
||||
'id': 'Q95ko',
|
||||
},
|
||||
'playlist_count': 25,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
|
||||
album_images = self._download_json(
|
||||
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
|
||||
album_id)['data']['images']
|
||||
|
||||
entries = [
|
||||
self.url_result('http://imgur.com/%s' % image['hash'])
|
||||
for image in album_images if image.get('hash')]
|
||||
|
||||
return self.playlist_result(entries, album_id)
|
||||
|
142
youtube_dl/extractor/indavideo.py
Normal file
142
youtube_dl/extractor/indavideo.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class IndavideoEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
|
||||
'md5': 'f79b009c66194acacd40712a6778acfa',
|
||||
'info_dict': {
|
||||
'id': '1837039',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cicatánc',
|
||||
'description': '',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'cukiajanlo',
|
||||
'uploader_id': '83729',
|
||||
'timestamp': 1439193826,
|
||||
'upload_date': '20150810',
|
||||
'duration': 72,
|
||||
'age_limit': 0,
|
||||
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
title = video['title']
|
||||
|
||||
video_urls = video.get('video_files', [])
|
||||
video_file = video.get('video_file')
|
||||
if video:
|
||||
video_urls.append(video_file)
|
||||
video_urls = list(set(video_urls))
|
||||
|
||||
video_prefix = video_urls[0].rsplit('/', 1)[0]
|
||||
|
||||
for flv_file in video.get('flv_files', []):
|
||||
flv_url = '%s/%s' % (video_prefix, flv_file)
|
||||
if flv_url not in video_urls:
|
||||
video_urls.append(flv_url)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None),
|
||||
} for video_url in video_urls]
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = video.get('date')
|
||||
if timestamp:
|
||||
# upload date is in CEST
|
||||
timestamp = parse_iso8601(timestamp + ' +0200', ' ')
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(thumbnail)
|
||||
} for thumbnail in video.get('thumbnails', [])]
|
||||
|
||||
tags = [tag['title'] for tag in video.get('tags', [])]
|
||||
|
||||
return {
|
||||
'id': video.get('id') or video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': video.get('user_name'),
|
||||
'uploader_id': video.get('user_id'),
|
||||
'timestamp': timestamp,
|
||||
'duration': int_or_none(video.get('length')),
|
||||
'age_limit': parse_age_limit(video.get('age_limit')),
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class IndavideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://indavideo.hu/video/Vicces_cica_1',
|
||||
'md5': '8c82244ba85d2a2310275b318eb51eac',
|
||||
'info_dict': {
|
||||
'id': '1335611',
|
||||
'display_id': 'Vicces_cica_1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vicces cica',
|
||||
'description': 'Játszik a tablettel. :D',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Jet_Pack',
|
||||
'uploader_id': '491217',
|
||||
'timestamp': 1390821212,
|
||||
'upload_date': '20140127',
|
||||
'duration': 7,
|
||||
'age_limit': 0,
|
||||
'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
embed_url = self._search_regex(
|
||||
r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'IndavideoEmbed',
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
}
|
@@ -201,7 +201,7 @@ class IqiyiIE(InfoExtractor):
|
||||
return raw_data
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
enc_key = '8e29ab5666d041c3a1ea76e06dabdffb'
|
||||
enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -13,12 +13,24 @@ from ..utils import (
|
||||
|
||||
class KalturaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:kaltura:|
|
||||
https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
|
||||
)(?P<partner_id>\d+)
|
||||
(?::|
|
||||
/(?:[^/]+/)*?entry_id/
|
||||
)(?P<id>[0-9a-z_]+)'''
|
||||
(?:
|
||||
kaltura:(?P<partner_id_s>\d+):(?P<id_s>[0-9a-z_]+)|
|
||||
https?://
|
||||
(:?(?:www|cdnapisec)\.)?kaltura\.com/
|
||||
(?:
|
||||
(?:
|
||||
# flash player
|
||||
index\.php/kwidget/
|
||||
(?:[^/]+/)*?wid/_(?P<partner_id>\d+)/
|
||||
(?:[^/]+/)*?entry_id/(?P<id>[0-9a-z_]+)|
|
||||
# html5 player
|
||||
html5/html5lib/
|
||||
(?:[^/]+/)*?entry_id/(?P<id_html5>[0-9a-z_]+)
|
||||
.*\?.*\bwid=_(?P<partner_id_html5>\d+)
|
||||
)
|
||||
)
|
||||
)
|
||||
'''
|
||||
_API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -43,6 +55,10 @@ class KalturaIE(InfoExtractor):
|
||||
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
|
||||
@@ -105,9 +121,9 @@ class KalturaIE(InfoExtractor):
|
||||
video_id, actions, note='Downloading video info JSON')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
|
||||
partner_id = mobj.group('partner_id_s') or mobj.group('partner_id') or mobj.group('partner_id_html5')
|
||||
entry_id = mobj.group('id_s') or mobj.group('id') or mobj.group('id_html5')
|
||||
|
||||
info, source_data = self._get_video_info(entry_id, partner_id)
|
||||
|
||||
@@ -126,7 +142,7 @@ class KalturaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': entry_id,
|
||||
'title': info['name'],
|
||||
'formats': formats,
|
||||
'description': info.get('description'),
|
||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
@@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
video_url = self._search_regex(
|
||||
r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
thumbnail = self._search_regex(
|
||||
r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'video title')
|
||||
r'(?s)<h2>(.+?)</h2>', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'video description')
|
||||
'description', webpage, 'description')
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
duration = self._search_regex(
|
||||
r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
|
||||
if duration:
|
||||
duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
|
||||
view_count = self._search_regex(
|
||||
r'Просмотров: <em>([^<]+)</em>',
|
||||
webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = int_or_none(view_count.replace(' ', ''))
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(
|
||||
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = mobj.group('total')
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -25,6 +25,9 @@ class KrasViewIE(InfoExtractor):
|
||||
'duration': 27,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Not accessible from Travis CI server',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -8,9 +8,9 @@ from ..utils import unified_strdate
|
||||
|
||||
|
||||
class LibsynIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||
'info_dict': {
|
||||
@@ -19,12 +19,24 @@ class LibsynIE(InfoExtractor):
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
|
||||
'md5': '6c5cb21acd622d754d3b1a92b582ce42',
|
||||
'info_dict': {
|
||||
'id': '3727166',
|
||||
'ext': 'mp3',
|
||||
'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
|
||||
'upload_date': '20150818',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
url = m.group('mainurl')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
@@ -32,20 +44,18 @@ class LibsynIE(InfoExtractor):
|
||||
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||
r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None)
|
||||
episode_title = self._search_regex(
|
||||
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||
r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title')
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
|
@@ -11,6 +11,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
@@ -70,6 +71,15 @@ class LyndaBaseIE(InfoExtractor):
|
||||
'Confirming log in and log out from another device')
|
||||
|
||||
if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')):
|
||||
if 'login error' in login_page:
|
||||
mobj = re.search(
|
||||
r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>',
|
||||
login_page)
|
||||
if mobj:
|
||||
raise ExtractorError(
|
||||
'lynda returned error: %s - %s'
|
||||
% (mobj.group('title'), clean_html(mobj.group('description'))),
|
||||
expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
@@ -108,9 +118,7 @@ class LyndaIE(LyndaBaseIE):
|
||||
'lynda returned error: %s' % video_json['Message'], expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
raise ExtractorError(
|
||||
'Video %s is only available for members. '
|
||||
% video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
self.raise_login_required('Video %s is only available for members' % video_id)
|
||||
|
||||
video_id = compat_str(video_json['ID'])
|
||||
duration = video_json['DurationInSeconds']
|
||||
|
@@ -25,6 +25,7 @@ class MailRuIE(InfoExtractor):
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
{
|
||||
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
|
||||
@@ -39,6 +40,7 @@ class MailRuIE(InfoExtractor):
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
]
|
||||
|
||||
|
@@ -9,7 +9,10 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class MonikerIE(InfoExtractor):
|
||||
@@ -24,6 +27,14 @@ class MonikerIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'jih3nce3x6wn',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vidspot.net/l2ngsmhs8ci5',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
@@ -38,7 +49,10 @@ class MonikerIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
orig_video_id = self._match_id(url)
|
||||
video_id = remove_start(orig_video_id, 'embed-')
|
||||
url = url.replace(orig_video_id, video_id)
|
||||
assert re.match(self._VALID_URL, url) is not None
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>File Not Found<' in orig_webpage:
|
||||
|
@@ -67,7 +67,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
return [{'url': url, 'ext': 'mp4'}]
|
||||
|
||||
def _extract_video_formats(self, mdoc, mtvn_id):
|
||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
|
||||
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||
self.to_screen('The normal version is not available from your '
|
||||
'country, trying with the mobile version')
|
||||
@@ -114,7 +114,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
# Remove the templates, like &device={device}
|
||||
mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
|
||||
if 'acceptMethods' not in mediagen_url:
|
||||
mediagen_url += '&acceptMethods=fms'
|
||||
mediagen_url += '&' if '?' in mediagen_url else '?'
|
||||
mediagen_url += 'acceptMethods=fms'
|
||||
|
||||
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
||||
'Downloading video urls')
|
||||
@@ -141,7 +142,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
|
||||
if title_el is None:
|
||||
title_el = itemdoc.find('.//title')
|
||||
title_el = itemdoc.find('.//title') or itemdoc.find('./title')
|
||||
if title_el.text is None:
|
||||
title_el = None
|
||||
|
||||
@@ -174,8 +175,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if self._LANG:
|
||||
info_url += 'lang=%s&' % self._LANG
|
||||
info_url += data
|
||||
return self._get_videos_info_from_url(info_url, video_id)
|
||||
|
||||
def _get_videos_info_from_url(self, url, video_id):
|
||||
idoc = self._download_xml(
|
||||
info_url, video_id,
|
||||
url, video_id,
|
||||
'Downloading info', transform_source=fix_xml_ampersands)
|
||||
return self.playlist_result(
|
||||
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
||||
@@ -288,3 +292,65 @@ class MTVIggyIE(MTVServicesInfoExtractor):
|
||||
}
|
||||
}
|
||||
_FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/'
|
||||
|
||||
|
||||
class MTVDEIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtv.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
|
||||
'info_dict': {
|
||||
'id': 'music_video-a50bc5f0b3aa4b3190aa',
|
||||
'ext': 'mp4',
|
||||
'title': 'MusicVideo_cro-traum',
|
||||
'description': 'Cro - Traum',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
|
||||
'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f5ae778b9832cc837189',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# single video in pagePlaylist with different id
|
||||
'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-4e760566473c4c8c5344',
|
||||
'ext': 'mp4',
|
||||
'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
|
||||
'description': 'MTV Movies Supercut',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
|
||||
video_id)
|
||||
|
||||
# news pages contain single video in playlist with different id
|
||||
if len(playlist) == 1:
|
||||
return self._get_videos_info_from_url(playlist[0]['mrss'], video_id)
|
||||
|
||||
for item in playlist:
|
||||
item_id = item.get('id')
|
||||
if item_id and compat_str(item_id) == video_id:
|
||||
return self._get_videos_info_from_url(item['mrss'], video_id)
|
||||
|
58
youtube_dl/extractor/mwave.py
Normal file
58
youtube_dl/extractor/mwave.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class MwaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859',
|
||||
'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc',
|
||||
'info_dict': {
|
||||
'id': '168859',
|
||||
'ext': 'flv',
|
||||
'title': '[M COUNTDOWN] SISTAR - SHAKE IT',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'M COUNTDOWN',
|
||||
'duration': 206,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
vod_info = self._download_json(
|
||||
'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id,
|
||||
video_id, 'Download vod JSON')
|
||||
|
||||
formats = []
|
||||
for num, cdn_info in enumerate(vod_info['cdn']):
|
||||
stream_url = cdn_info.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
stream_name = cdn_info.get('name') or compat_str(num)
|
||||
f4m_stream = self._download_json(
|
||||
stream_url, video_id,
|
||||
'Download %s stream JSON' % stream_name)
|
||||
f4m_url = f4m_stream.get('fileurl')
|
||||
if not f4m_url:
|
||||
continue
|
||||
formats.extend(
|
||||
self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': vod_info['title'],
|
||||
'thumbnail': vod_info.get('cover'),
|
||||
'uploader': vod_info.get('program_title'),
|
||||
'duration': parse_duration(vod_info.get('time')),
|
||||
'view_count': int_or_none(vod_info.get('hit')),
|
||||
'formats': formats,
|
||||
}
|
@@ -236,3 +236,28 @@ class NBCNewsIE(InfoExtractor):
|
||||
'url': info['videoAssets'][-1]['publicUrl'],
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
|
||||
class MSNBCIE(InfoExtractor):
|
||||
# https URLs redirect to corresponding http ones
|
||||
_VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||
'info_dict': {
|
||||
'id': 'n_hayes_Aimm_140801_272214',
|
||||
'ext': 'mp4',
|
||||
'title': 'The chaotic GOP immigration vote',
|
||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1406937606,
|
||||
'upload_date': '20140802',
|
||||
'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._html_search_meta('embedURL', webpage)
|
||||
return self.url_result(embed_url)
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NowTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)'
|
||||
|
||||
_TESTS = [{
|
||||
# rtl
|
||||
@@ -127,10 +127,19 @@ class NowTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.nowtv.de/rtl2/echtzeit/list/aktuell/schnelles-geld-am-ende-der-welt/player',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
display_id_split = display_id.split('/')
|
||||
if len(display_id) > 2:
|
||||
display_id = '/'.join((display_id_split[0], display_id_split[-1]))
|
||||
|
||||
info = self._download_json(
|
||||
'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id,
|
||||
|
@@ -407,6 +407,7 @@ class NPORadioFragmentIE(InfoExtractor):
|
||||
|
||||
|
||||
class VPROIE(NPOIE):
|
||||
IE_NAME = 'vpro'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||
|
||||
_TESTS = [
|
||||
|
@@ -12,19 +12,21 @@ from ..utils import (
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
'md5': '8e24ad2da6f387948e7a7d44eb8668fe',
|
||||
'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc',
|
||||
'info_dict': {
|
||||
'id': '20079905452',
|
||||
'ext': 'mp4',
|
||||
'title': 'Культура меняет нас (прекрасный ролик!))',
|
||||
'duration': 100,
|
||||
'upload_date': '20141207',
|
||||
'uploader_id': '330537914540',
|
||||
'uploader': 'Виталий Добровольский',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# metadataUrl
|
||||
@@ -35,13 +37,33 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Девушка без комплексов ...',
|
||||
'duration': 191,
|
||||
'upload_date': '20150518',
|
||||
'uploader_id': '534380003155',
|
||||
'uploader': 'Андрей Мещанинов',
|
||||
'uploader': '☭ Андрей Мещанинов ☭',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
|
||||
'url': 'http://ok.ru/video/64211978996595-1',
|
||||
'md5': '5d7475d428845cd2e13bae6f1a992278',
|
||||
'info_dict': {
|
||||
'id': '64211978996595-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Космическая среда от 26 августа 2015',
|
||||
'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
|
||||
'duration': 440,
|
||||
'upload_date': '20150826',
|
||||
'uploader_id': '750099571',
|
||||
'uploader': 'Алина П',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ok.ru/video/20648036891',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -52,7 +74,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
|
||||
player = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-attributes="([^"]+)"', webpage, 'player')),
|
||||
r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
|
||||
webpage, 'player', group='player')),
|
||||
video_id)
|
||||
|
||||
flashvars = player['flashvars']
|
||||
@@ -85,16 +108,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
|
||||
like_count = int_or_none(metadata.get('likeCount'))
|
||||
|
||||
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'ext': 'mp4',
|
||||
'format_id': f['name'],
|
||||
'quality': quality(f['name']),
|
||||
} for f in metadata['videos']]
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
@@ -104,5 +118,24 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
if metadata.get('provider') == 'USER_YOUTUBE':
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': movie['contentId'],
|
||||
})
|
||||
return info
|
||||
|
||||
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'ext': 'mp4',
|
||||
'format_id': f['name'],
|
||||
'quality': quality(f['name']),
|
||||
} for f in metadata['videos']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
@@ -92,6 +92,7 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 3172,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140122',
|
||||
'age_limit': 10,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
|
99
youtube_dl/extractor/periscope.py
Normal file
99
youtube_dl/extractor/periscope.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
IE_DESC = 'Periscope'
|
||||
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
|
||||
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
|
||||
'info_dict': {
|
||||
'id': '56102209',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
|
||||
'timestamp': 1438978559,
|
||||
'upload_date': '20150807',
|
||||
'uploader': 'Bec Boop',
|
||||
'uploader_id': '1465763',
|
||||
},
|
||||
'skip': 'Expires in 24 hours',
|
||||
}
|
||||
|
||||
def _call_api(self, method, token):
|
||||
return self._download_json(
|
||||
'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token)
|
||||
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
|
||||
broadcast_data = self._call_api('getBroadcastPublic', token)
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
|
||||
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
state = broadcast.get('state').lower()
|
||||
if state == 'running':
|
||||
title = self._live_title(title)
|
||||
timestamp = parse_iso8601(broadcast.get('created_at'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': broadcast[image],
|
||||
} for image in ('image_url', 'image_url_small') if broadcast.get(image)]
|
||||
|
||||
stream = self._call_api('getAccessPublic', token)
|
||||
|
||||
formats = []
|
||||
for format_id in ('replay', 'rtmp', 'hls', 'https_hls'):
|
||||
video_url = stream.get(format_id + '_url')
|
||||
if not video_url:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
|
||||
}
|
||||
if format_id != 'rtmp':
|
||||
f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8'
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': broadcast.get('id') or token,
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class QuickscopeIE(InfoExtractor):
|
||||
IE_DESC = 'Quick Scope'
|
||||
_VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://watchonperiscope.com/broadcast/56180087',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcast_id = self._match_id(url)
|
||||
request = compat_urllib_request.Request(
|
||||
'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({
|
||||
'broadcast_id': broadcast_id,
|
||||
'entry_ticket': '',
|
||||
'from_push': 'false',
|
||||
'uses_sessions': 'true',
|
||||
}).encode('utf-8'))
|
||||
return self.url_result(
|
||||
self._download_json(request, broadcast_id)['share_url'], 'Periscope')
|
181
youtube_dl/extractor/playtvak.py
Normal file
181
youtube_dl/extractor/playtvak.py
Normal file
@@ -0,0 +1,181 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class PlaytvakIE(InfoExtractor):
|
||||
IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko',
|
||||
'md5': '4525ae312c324b4be2f4603cc78ceb4a',
|
||||
'info_dict': {
|
||||
'id': 'A150730_150323_hodinovy-manzel_kuko',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vyžeňte vosy a sršně ze zahrady',
|
||||
'description': 'md5:f93d398691044d303bc4a3de62f3e976',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 279,
|
||||
'timestamp': 1438732860,
|
||||
'upload_date': '20150805',
|
||||
'is_live': False,
|
||||
}
|
||||
}, { # live video test
|
||||
'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat',
|
||||
'info_dict': {
|
||||
'id': 'A150624_164934_planespotting_cat',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, { # idnes.cz
|
||||
'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku',
|
||||
'md5': '819832ba33cd7016e58a6658577fe289',
|
||||
'info_dict': {
|
||||
'id': 'A150809_104116_domaci_pku',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se',
|
||||
'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'duration': 39,
|
||||
'timestamp': 1438969140,
|
||||
'upload_date': '20150807',
|
||||
'is_live': False,
|
||||
}
|
||||
}, { # lidovky.cz
|
||||
'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE',
|
||||
'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8',
|
||||
'info_dict': {
|
||||
'id': 'A150808_214044_ln-video_ELE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Táhni! Demonstrace proti imigrantům budila emoce',
|
||||
'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439052180,
|
||||
'upload_date': '20150808',
|
||||
'is_live': False,
|
||||
}
|
||||
}, { # metro.cz
|
||||
'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row',
|
||||
'md5': '84fc1deedcac37b7d4a6ccae7c716668',
|
||||
'info_dict': {
|
||||
'id': 'A141111_173251_metro-extra_row',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recesisté udělali z billboardu kolotoč',
|
||||
'description': 'md5:7369926049588c3989a66c9c1a043c4c',
|
||||
'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1415725500,
|
||||
'upload_date': '20141111',
|
||||
'is_live': False,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info_url = self._html_search_regex(
|
||||
r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url')
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(info_url)
|
||||
|
||||
qs = compat_urlparse.parse_qs(parsed_url.query)
|
||||
qs.update({
|
||||
'reklama': ['0'],
|
||||
'type': ['js'],
|
||||
})
|
||||
|
||||
info_url = compat_urlparse.urlunparse(
|
||||
parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True)))
|
||||
|
||||
json_info = self._download_json(
|
||||
info_url, video_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
item = None
|
||||
for i in json_info['items']:
|
||||
if i.get('type') == 'video' or i.get('type') == 'stream':
|
||||
item = i
|
||||
break
|
||||
if not item:
|
||||
raise ExtractorError('No suitable stream found')
|
||||
|
||||
quality = qualities(('low', 'middle', 'high'))
|
||||
|
||||
formats = []
|
||||
for fmt in item['video']:
|
||||
video_url = fmt.get('file')
|
||||
if not video_url:
|
||||
continue
|
||||
|
||||
format_ = fmt['format']
|
||||
format_id = '%s_%s' % (format_, fmt['quality'])
|
||||
preference = None
|
||||
|
||||
if format_ in ('mp4', 'webm'):
|
||||
ext = format_
|
||||
elif format_ == 'rtmp':
|
||||
ext = 'flv'
|
||||
elif format_ == 'apple':
|
||||
ext = 'mp4'
|
||||
# Some streams have mp3 audio which does not play
|
||||
# well with ffmpeg filter aac_adtstoasc
|
||||
preference = -1
|
||||
elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests
|
||||
continue
|
||||
else: # Other formats not supported yet
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
'quality': quality(fmt.get('quality')),
|
||||
'preference': preference,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = item['title']
|
||||
is_live = item['type'] == 'stream'
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
timestamp = None
|
||||
duration = None
|
||||
if not is_live:
|
||||
duration = int_or_none(item.get('length'))
|
||||
timestamp = item.get('published')
|
||||
if timestamp:
|
||||
timestamp = parse_iso8601(timestamp[:-5])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': item.get('image'),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
}
|
207
youtube_dl/extractor/pluralsight.py
Normal file
207
youtube_dl/extractor/pluralsight.py
Normal file
@@ -0,0 +1,207 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class PluralsightIE(InfoExtractor):
|
||||
IE_NAME = 'pluralsight'
|
||||
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
|
||||
_LOGIN_URL = 'https://www.pluralsight.com/id/'
|
||||
_NETRC_MACHINE = 'pluralsight'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
|
||||
'md5': '4d458cf5cf4c593788672419a8dd4cf8',
|
||||
'info_dict': {
|
||||
'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04',
|
||||
'ext': 'mp4',
|
||||
'title': 'Management of SQL Server - Demo Monitoring',
|
||||
'duration': 338,
|
||||
},
|
||||
'skip': 'Requires pluralsight account credentials',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required('Pluralsight account is required')
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'Username': username.encode('utf-8'),
|
||||
'Password': password.encode('utf-8'),
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
error = self._search_regex(
|
||||
r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
author = mobj.group('author')
|
||||
name = mobj.group('name')
|
||||
clip_id = mobj.group('clip')
|
||||
course = mobj.group('course')
|
||||
|
||||
display_id = '%s-%s' % (name, clip_id)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
collection = self._parse_json(
|
||||
self._search_regex(
|
||||
r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
|
||||
webpage, 'modules'),
|
||||
display_id)
|
||||
|
||||
module, clip = None, None
|
||||
|
||||
for module_ in collection:
|
||||
if module_.get('moduleName') == name:
|
||||
module = module_
|
||||
for clip_ in module_.get('clips', []):
|
||||
clip_index = clip_.get('clipIndex')
|
||||
if clip_index is None:
|
||||
continue
|
||||
if compat_str(clip_index) == clip_id:
|
||||
clip = clip_
|
||||
break
|
||||
|
||||
if not clip:
|
||||
raise ExtractorError('Unable to resolve clip')
|
||||
|
||||
QUALITIES = {
|
||||
'low': {'width': 640, 'height': 480},
|
||||
'medium': {'width': 848, 'height': 640},
|
||||
'high': {'width': 1024, 'height': 768},
|
||||
}
|
||||
|
||||
ALLOWED_QUALITIES = (
|
||||
('webm', ('high',)),
|
||||
('mp4', ('low', 'medium', 'high',)),
|
||||
)
|
||||
|
||||
formats = []
|
||||
for ext, qualities in ALLOWED_QUALITIES:
|
||||
for quality in qualities:
|
||||
f = QUALITIES[quality].copy()
|
||||
clip_post = {
|
||||
'a': author,
|
||||
'cap': 'false',
|
||||
'cn': clip_id,
|
||||
'course': course,
|
||||
'lc': 'en',
|
||||
'm': name,
|
||||
'mt': ext,
|
||||
'q': '%dx%d' % (f['width'], f['height']),
|
||||
}
|
||||
request = compat_urllib_request.Request(
|
||||
'http://www.pluralsight.com/training/Player/ViewClip',
|
||||
json.dumps(clip_post).encode('utf-8'))
|
||||
request.add_header('Content-Type', 'application/json;charset=utf-8')
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
clip_url = self._download_webpage(
|
||||
request, display_id, 'Downloading %s URL' % format_id, fatal=False)
|
||||
if not clip_url:
|
||||
continue
|
||||
f.update({
|
||||
'url': clip_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
# TODO: captions
|
||||
# http://www.pluralsight.com/training/Player/ViewClip + cap = true
|
||||
# or
|
||||
# http://www.pluralsight.com/training/Player/Captions
|
||||
# { a = author, cn = clip_id, lc = end, m = name }
|
||||
|
||||
return {
|
||||
'id': clip['clipName'],
|
||||
'title': '%s - %s' % (module['title'], clip['title']),
|
||||
'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')),
|
||||
'creator': author,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class PluralsightCourseIE(InfoExtractor):
|
||||
IE_NAME = 'pluralsight:course'
|
||||
_VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
# Free course from Pluralsight Starter Subscription for Microsoft TechNet
|
||||
# https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
|
||||
'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
|
||||
'info_dict': {
|
||||
'id': 'hosting-sql-server-windows-azure-iaas',
|
||||
'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals',
|
||||
'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
|
||||
},
|
||||
'playlist_count': 31,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
|
||||
# TODO: PSM cookie
|
||||
|
||||
course = self._download_json(
|
||||
'http://www.pluralsight.com/data/course/%s' % course_id,
|
||||
course_id, 'Downloading course JSON')
|
||||
|
||||
title = course['title']
|
||||
description = course.get('description') or course.get('shortDescription')
|
||||
|
||||
course_data = self._download_json(
|
||||
'http://www.pluralsight.com/data/course/content/%s' % course_id,
|
||||
course_id, 'Downloading course data JSON')
|
||||
|
||||
entries = []
|
||||
for module in course_data:
|
||||
for clip in module.get('clips', []):
|
||||
player_parameters = clip.get('playerParameters')
|
||||
if not player_parameters:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://www.pluralsight.com/training/player?%s' % player_parameters,
|
||||
'Pluralsight'))
|
||||
|
||||
return self.playlist_result(entries, course_id, title, description)
|
@@ -22,6 +22,7 @@ class Porn91IE(InfoExtractor):
|
||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
||||
'ext': 'mp4',
|
||||
'duration': 431,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,4 +69,5 @@ class Porn91IE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -8,22 +9,28 @@ class RTL2IE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
|
||||
'md5': 'bfcc179030535b08dc2b36b469b5adc7',
|
||||
'info_dict': {
|
||||
'id': 'folge-203-0',
|
||||
'ext': 'f4v',
|
||||
'title': 'GRIP sucht den Sommerkönig',
|
||||
'description': 'Matthias, Det und Helge treten gegeneinander an.'
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
|
||||
'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
|
||||
'info_dict': {
|
||||
'id': '21040-anna-erwischt-alex',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anna erwischt Alex!',
|
||||
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -34,12 +41,18 @@ class RTL2IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
vico_id = self._html_search_regex(
|
||||
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||
vivi_id = self._html_search_regex(
|
||||
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||
mobj = re.search(
|
||||
r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
vico_id = mobj.group('vico_id')
|
||||
vivi_id = mobj.group('vivi_id')
|
||||
else:
|
||||
vico_id = self._html_search_regex(
|
||||
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||
vivi_id = self._html_search_regex(
|
||||
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
|
||||
webpage = self._download_webpage(info_url, '')
|
||||
|
||||
info = self._download_json(info_url, video_id)
|
||||
video_info = info['video']
|
||||
|
@@ -18,6 +18,10 @@ class RTPIE(InfoExtractor):
|
||||
'description': 'As paixões musicais de António Cartaxo e António Macedo',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
|
||||
'only_matching': True,
|
||||
|
@@ -6,7 +6,7 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import compat_urllib_request, compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@@ -102,7 +102,9 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
if info['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
png_request = compat_urllib_request.Request(png_url)
|
||||
png_request.add_header('Referer', url)
|
||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
auth_url = video_url.replace(
|
||||
|
47
youtube_dl/extractor/rtvnh.py
Normal file
47
youtube_dl/extractor/rtvnh.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class RTVNHIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.rtvnh.nl/video/131946',
|
||||
'md5': '6e1d0ab079e2a00b6161442d3ceacfc1',
|
||||
'info_dict': {
|
||||
'id': '131946',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
|
||||
'thumbnail': 're:^https?:.*\.jpg$'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
meta = self._parse_json(self._download_webpage(
|
||||
'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)
|
||||
|
||||
status = meta.get('status')
|
||||
if status != 200:
|
||||
raise ExtractorError(
|
||||
'%s returned error code %d' % (self.IE_NAME, status), expected=True)
|
||||
|
||||
formats = self._extract_smil_formats(
|
||||
'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False)
|
||||
|
||||
for item in meta['source']['fb']:
|
||||
if item.get('type') == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
elif item.get('type') == '':
|
||||
formats.append({'url': item['file']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': meta['title'].strip(),
|
||||
'thumbnail': meta.get('image'),
|
||||
'formats': formats
|
||||
}
|
@@ -30,6 +30,7 @@ class RutubeIE(InfoExtractor):
|
||||
'uploader': 'NTDRussian',
|
||||
'uploader_id': '29790',
|
||||
'upload_date': '20131016',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# It requires ffmpeg (m3u8 download)
|
||||
|
@@ -20,7 +20,6 @@ from ..utils import (
|
||||
class SafariBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
|
||||
_NETRC_MACHINE = 'safari'
|
||||
|
||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
|
||||
@@ -37,9 +36,7 @@ class SafariBaseIE(InfoExtractor):
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
self._ACCOUNT_CREDENTIALS_HINT,
|
||||
expected=True)
|
||||
self.raise_login_required('safaribooksonline.com account is required')
|
||||
|
||||
headers = std_headers
|
||||
if 'Referer' not in headers:
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -10,8 +12,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class ScreenwaveMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
|
||||
|
||||
_VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
|
||||
EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
|
||||
_TESTS = [{
|
||||
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
|
||||
'only_matching': True,
|
||||
@@ -31,34 +33,57 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
'http://player.screenwavemedia.com/player.js',
|
||||
video_id, 'Downloading playerconfig webpage')
|
||||
|
||||
videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver')
|
||||
videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
|
||||
|
||||
sources = self._parse_json(
|
||||
js_to_json(
|
||||
self._search_regex(
|
||||
r"sources\s*:\s*(\[[^\]]+?\])", playerconfig,
|
||||
'sources',
|
||||
).replace(
|
||||
"' + thisObj.options.videoserver + '",
|
||||
videoserver
|
||||
).replace(
|
||||
"' + playerVidId + '",
|
||||
video_id
|
||||
re.sub(
|
||||
r'(?s)/\*.*?\*/', '',
|
||||
self._search_regex(
|
||||
r"sources\s*:\s*(\[[^\]]+?\])", playerconfig,
|
||||
'sources',
|
||||
).replace(
|
||||
"' + thisObj.options.videoserver + '",
|
||||
videoserver
|
||||
).replace(
|
||||
"' + playerVidId + '",
|
||||
video_id
|
||||
)
|
||||
)
|
||||
),
|
||||
video_id
|
||||
video_id, fatal=False
|
||||
)
|
||||
|
||||
# Fallback to hardcoded sources if JS changes again
|
||||
if not sources:
|
||||
self.report_warning('Falling back to a hardcoded list of streams')
|
||||
sources = [{
|
||||
'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
|
||||
'type': 'mp4',
|
||||
'label': format_label,
|
||||
} for format_id, format_label in (
|
||||
('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))]
|
||||
sources.append({
|
||||
'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id),
|
||||
'type': 'hls',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source['type'] == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id))
|
||||
else:
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
continue
|
||||
format_label = source.get('label')
|
||||
format_id = self._search_regex(
|
||||
r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]', format_label, 'height', default=None))
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'format_id': format_id,
|
||||
'format': format_label,
|
||||
'ext': source.get('type'),
|
||||
'height': height,
|
||||
|
@@ -29,6 +29,7 @@ class SexyKarmaIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
|
||||
|
107
youtube_dl/extractor/shahid.py
Normal file
107
youtube_dl/extractor/shahid.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ShahidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html',
|
||||
'info_dict': {
|
||||
'id': '90574',
|
||||
'ext': 'm3u8',
|
||||
'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3',
|
||||
'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان',
|
||||
'duration': 2972,
|
||||
'timestamp': 1422057420,
|
||||
'upload_date': '20150123',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# shahid plus subscriber only
|
||||
'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
|
||||
expected=True)
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||
response = super(ShahidIE, self)._download_json(url, video_id, note)['data']
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
api_vars = {
|
||||
'id': video_id,
|
||||
'type': 'player',
|
||||
'url': 'http://api.shahid.net/api/v1_1',
|
||||
'playerType': 'episode',
|
||||
}
|
||||
|
||||
flashvars = self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None)
|
||||
if flashvars:
|
||||
for key in api_vars.keys():
|
||||
value = self._search_regex(
|
||||
r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key,
|
||||
flashvars, 'type', default=None, group='value')
|
||||
if value:
|
||||
api_vars[key] = value
|
||||
|
||||
player = self._download_json(
|
||||
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html'
|
||||
% (video_id, api_vars['type']), video_id, 'Downloading player JSON')
|
||||
|
||||
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
|
||||
|
||||
video = self._download_json(
|
||||
'%s/%s/%s?%s' % (
|
||||
api_vars['url'], api_vars['playerType'], api_vars['id'],
|
||||
compat_urllib_parse.urlencode({
|
||||
'apiKey': 'sh@hid0nlin3',
|
||||
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
|
||||
}).encode('utf-8')),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
video = video[api_vars['playerType']]
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = video.get('thumbnailUrl')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('referenceDate'))
|
||||
categories = [
|
||||
category['name']
|
||||
for category in video.get('genres', []) if 'name' in category]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
@@ -14,17 +14,28 @@ from ..utils import (
|
||||
|
||||
|
||||
class SharedIE(InfoExtractor):
|
||||
_VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
|
||||
IE_DESC = 'shared.sx and vivo.sx'
|
||||
_VALID_URL = r'http://(?:shared|vivo)\.sx/(?P<id>[\da-z]{10})'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://shared.sx/0060718775',
|
||||
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
|
||||
'info_dict': {
|
||||
'id': '0060718775',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bmp4',
|
||||
'filesize': 1720110,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vivo.sx/d7ddda0e78',
|
||||
'md5': '15b3af41be0b4fe01f4df075c2678b2c',
|
||||
'info_dict': {
|
||||
'id': 'd7ddda0e78',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chicken',
|
||||
'filesize': 528031,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -330,10 +330,7 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
'Erotic broadcasts allowed only for registered users, '
|
||||
'use --username and --password options to provide account credentials.',
|
||||
expected=True)
|
||||
self.raise_login_required('Erotic broadcasts allowed only for registered users')
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
|
@@ -309,7 +309,7 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (All)',
|
||||
},
|
||||
'playlist_mincount': 112,
|
||||
'playlist_mincount': 111,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
|
||||
'info_dict': {
|
||||
@@ -330,14 +330,14 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (Reposts)',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 7,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
|
||||
'info_dict': {
|
||||
'id': '114582580',
|
||||
'title': 'The Akashic Chronicler (Likes)',
|
||||
},
|
||||
'playlist_mincount': 333,
|
||||
'playlist_mincount': 321,
|
||||
}, {
|
||||
'url': 'https://soundcloud.com/grynpyret/spotlight',
|
||||
'info_dict': {
|
||||
|
@@ -16,8 +16,9 @@ from ..aes import aes_decrypt_text
|
||||
|
||||
|
||||
class SpankwireIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
|
||||
_TESTS = [{
|
||||
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
|
||||
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
||||
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
|
||||
'info_dict': {
|
||||
@@ -30,14 +31,27 @@ class SpankwireIE(InfoExtractor):
|
||||
'upload_date': '20070507',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# download URL pattern: */mp4_<format_id>_<video_id>.mp4
|
||||
'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
|
||||
'md5': '09b3c20833308b736ae8902db2f8d7e6',
|
||||
'info_dict': {
|
||||
'id': '1921551',
|
||||
'ext': 'mp4',
|
||||
'title': 'Titcums Compiloation I',
|
||||
'description': 'cum on tits',
|
||||
'uploader': 'dannyh78999',
|
||||
'uploader_id': '3056053',
|
||||
'upload_date': '20150822',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req = compat_urllib_request.Request('http://www.' + mobj.group('url'))
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
@@ -54,7 +68,7 @@ class SpankwireIE(InfoExtractor):
|
||||
r'by:\s*<a [^>]*>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
|
||||
r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'</a> on (.+?) at \d+:\d+',
|
||||
@@ -67,9 +81,10 @@ class SpankwireIE(InfoExtractor):
|
||||
r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
video_urls = list(map(
|
||||
compat_urllib_parse_unquote,
|
||||
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)))
|
||||
videos = re.findall(
|
||||
r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
|
||||
heights = [int(video[0]) for video in videos]
|
||||
video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
password = self._search_regex(
|
||||
r'flashvars\.video_title = "([^"]+)',
|
||||
@@ -79,21 +94,22 @@ class SpankwireIE(InfoExtractor):
|
||||
video_urls))
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
for height, video_url in zip(heights, video_urls):
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
format = path.split('/')[4].split('_')[:2]
|
||||
resolution, bitrate_str = format
|
||||
format = "-".join(format)
|
||||
height = int(resolution.rstrip('Pp'))
|
||||
tbr = int(bitrate_str.rstrip('Kk'))
|
||||
formats.append({
|
||||
_, quality = path.split('/')[4].split('_')[:2]
|
||||
f = {
|
||||
'url': video_url,
|
||||
'resolution': resolution,
|
||||
'format': format,
|
||||
'tbr': tbr,
|
||||
'height': height,
|
||||
'format_id': format,
|
||||
})
|
||||
}
|
||||
tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
|
||||
if tbr:
|
||||
f.update({
|
||||
'tbr': int(tbr),
|
||||
'format_id': '%dp' % height,
|
||||
})
|
||||
else:
|
||||
f['format_id'] = quality
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
@@ -38,10 +38,12 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'upload_date': '20140825',
|
||||
'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
|
||||
'timestamp': 1408976060,
|
||||
'duration': 2732,
|
||||
'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
'categories': ['Li-Ning Badminton WM 2014'],
|
||||
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -50,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
sport_id = mobj.group('sport')
|
||||
|
||||
api_url = 'http://splink.tv/api/permalinks/%s/%s' % (
|
||||
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
||||
sport_id, video_id)
|
||||
req = compat_urllib_request.Request(api_url, headers={
|
||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||
@@ -58,12 +60,11 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
})
|
||||
data = self._download_json(req, video_id)
|
||||
|
||||
categories = list(data.get('section', {}).get('tags', {}).values())
|
||||
asset = data['asset']
|
||||
assets_info = self._download_json(asset['url'], video_id)
|
||||
categories = [data['section']['title']]
|
||||
|
||||
formats = []
|
||||
smil_url = assets_info['video']
|
||||
smil_url = asset['video']
|
||||
if '.smil' in smil_url:
|
||||
m3u8_url = smil_url.replace('.smil', '.m3u8')
|
||||
formats.extend(
|
||||
@@ -91,6 +92,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'title': asset['title'],
|
||||
'thumbnail': asset.get('image'),
|
||||
'description': asset.get('teaser'),
|
||||
'duration': asset.get('duration'),
|
||||
'categories': categories,
|
||||
'view_count': asset.get('views'),
|
||||
'rtmp_live': asset.get('live'),
|
||||
|
@@ -6,7 +6,7 @@ from .mitele import MiTeleIE
|
||||
|
||||
class TelecincoIE(MiTeleIE):
|
||||
IE_NAME = 'telecinco.es'
|
||||
_VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html'
|
||||
_VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
|
||||
@@ -23,4 +23,7 @@ class TelecincoIE(MiTeleIE):
|
||||
}, {
|
||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
35
youtube_dl/extractor/telegraaf.py
Normal file
35
youtube_dl/extractor/telegraaf.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
|
||||
|
||||
class TelegraafIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
|
||||
'md5': '83245a9779bcc4a24454bfd53c65b6dc',
|
||||
'info_dict': {
|
||||
'id': '24353229',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tikibad ontruimd wegens brand',
|
||||
'description': 'md5:05ca046ff47b931f9b04855015e163a4',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 33,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"iframe\.loadPlayer\('([^']+)'", webpage, 'player')
|
||||
|
||||
entries = self._extract_xspf_playlist(playlist_url, playlist_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' - VIDEO')
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
@@ -1,7 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
import hmac
|
||||
import binascii
|
||||
@@ -10,7 +10,8 @@ import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@@ -18,12 +19,69 @@ from ..utils import (
|
||||
xpath_with_ns,
|
||||
unsmuggle_url,
|
||||
int_or_none,
|
||||
url_basename,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
|
||||
class ThePlatformIE(InfoExtractor):
|
||||
class ThePlatformBaseIE(InfoExtractor):
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
meta = self._download_xml(smil_url, video_id, note=note)
|
||||
try:
|
||||
error_msg = next(
|
||||
n.attrib['abstract']
|
||||
for n in meta.findall(_x('.//smil:ref'))
|
||||
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
meta, smil_url, video_id, namespace=default_ns,
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'},
|
||||
transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src))
|
||||
|
||||
for _format in formats:
|
||||
ext = determine_ext(_format['url'])
|
||||
if ext == 'once':
|
||||
_format['ext'] = 'mp4'
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._parse_smil_subtitles(meta, default_ns)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def get_metadata(self, path, video_id):
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info = self._download_json(info_url, video_id)
|
||||
|
||||
subtitles = {}
|
||||
captions = info.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||
subtitles[lang] = [{
|
||||
'ext': 'srt' if mime == 'text/srt' else 'ttml',
|
||||
'url': src,
|
||||
}]
|
||||
|
||||
return {
|
||||
'title': info['title'],
|
||||
'subtitles': subtitles,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
@@ -67,6 +125,20 @@ class ThePlatformIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
|
||||
'md5': '734f3790fb5fc4903da391beeebc4836',
|
||||
'info_dict': {
|
||||
'id': 'tdy_or_siri_150701',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPhone Siri’s sassy response to a math question has people talking',
|
||||
'description': 'md5:a565d1deadd5086f3331d57298ec6333',
|
||||
'duration': 83.0,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1435752600,
|
||||
'upload_date': '20150701',
|
||||
'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -101,6 +173,24 @@ class ThePlatformIE(InfoExtractor):
|
||||
path += '/media'
|
||||
path += '/' + video_id
|
||||
|
||||
qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
if 'guid' in qs_dict:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
|
||||
feed_id = None
|
||||
# feed id usually locates in the last script.
|
||||
# Seems there's no pattern for the interested script filename, so
|
||||
# I try one by one
|
||||
for script in reversed(scripts):
|
||||
feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
|
||||
feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
|
||||
if feed_id is not None:
|
||||
break
|
||||
if feed_id is None:
|
||||
raise ExtractorError('Unable to find feed id')
|
||||
return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
|
||||
provider_id, feed_id, qs_dict['guid'][0]))
|
||||
|
||||
if smuggled_data.get('force_smil_url', False):
|
||||
smil_url = url
|
||||
elif mobj.group('config'):
|
||||
@@ -108,7 +198,11 @@ class ThePlatformIE(InfoExtractor):
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||
config = self._download_json(config_url, video_id, 'Downloading config')
|
||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
if 'releaseUrl' in config:
|
||||
release_url = config['releaseUrl']
|
||||
else:
|
||||
release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path
|
||||
|
||||
@@ -116,95 +210,85 @@ class ThePlatformIE(InfoExtractor):
|
||||
if sig:
|
||||
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
|
||||
|
||||
meta = self._download_xml(smil_url, video_id)
|
||||
try:
|
||||
error_msg = next(
|
||||
n.attrib['abstract']
|
||||
for n in meta.findall(_x('.//smil:ref'))
|
||||
if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
|
||||
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)
|
||||
|
||||
subtitles = {}
|
||||
captions = info.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||
subtitles[lang] = [{
|
||||
'ext': 'srt' if mime == 'text/srt' else 'ttml',
|
||||
'url': src,
|
||||
}]
|
||||
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
|
||||
f4m_node = body.find(_x('smil:seq//smil:video'))
|
||||
if f4m_node is None:
|
||||
f4m_node = body.find(_x('smil:seq/smil:video'))
|
||||
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
|
||||
f4m_url = f4m_node.attrib['src']
|
||||
if 'manifest.f4m?' not in f4m_url:
|
||||
f4m_url += '?'
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
else:
|
||||
formats = []
|
||||
switch = body.find(_x('smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par//smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par/smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:par'))
|
||||
if switch is not None:
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int_or_none(attr.get('width'))
|
||||
height = int_or_none(attr.get('height'))
|
||||
vbr = int_or_none(attr.get('system-bitrate'), 1000)
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
switch = body.find(_x('smil:seq//smil:switch'))
|
||||
if switch is None:
|
||||
switch = body.find(_x('smil:seq/smil:switch'))
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
vbr = int_or_none(attr.get('system-bitrate'), 1000)
|
||||
ext = determine_ext(attr['src'])
|
||||
if ext == 'once':
|
||||
ext = 'mp4'
|
||||
formats.append({
|
||||
'format_id': compat_str(vbr),
|
||||
'url': attr['src'],
|
||||
'vbr': vbr,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
ret = self.get_metadata(path, video_id)
|
||||
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
|
||||
ret.update({
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
}
|
||||
'subtitles': combined_subtitles,
|
||||
})
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)'
|
||||
_TEST = {
|
||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||
'md5': '22d2b84f058d3586efcd99e57d59d314',
|
||||
'info_dict': {
|
||||
'id': 'n_hardball_5biden_140207',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Biden factor: will Joe run in 2016?',
|
||||
'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140208',
|
||||
'timestamp': 1391824260,
|
||||
'duration': 467.0,
|
||||
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
provider_id = mobj.group('provider_id')
|
||||
feed_id = mobj.group('feed_id')
|
||||
|
||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id)
|
||||
feed = self._download_json(real_url, video_id)
|
||||
entry = feed['entries'][0]
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
first_video_id = None
|
||||
duration = None
|
||||
for item in entry['media$content']:
|
||||
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
|
||||
cur_video_id = url_basename(smil_url)
|
||||
if first_video_id is None:
|
||||
first_video_id = cur_video_id
|
||||
duration = float_or_none(item.get('plfile$duration'))
|
||||
cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)
|
||||
formats.extend(cur_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail['plfile$url'],
|
||||
'width': int_or_none(thumbnail.get('plfile$width')),
|
||||
'height': int_or_none(thumbnail.get('plfile$height')),
|
||||
} for thumbnail in entry.get('media$thumbnails', [])]
|
||||
|
||||
timestamp = int_or_none(entry.get('media$availableDate'), scale=1000)
|
||||
categories = [item['media$name'] for item in entry.get('media$categories', [])]
|
||||
|
||||
ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id)
|
||||
subtitles = self._merge_subtitles(subtitles, ret['subtitles'])
|
||||
ret.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'categories': categories,
|
||||
})
|
||||
|
||||
return ret
|
||||
|
@@ -60,9 +60,7 @@ class TubiTvIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage):
|
||||
raise ExtractorError(
|
||||
'This video requires login, use --username and --password '
|
||||
'options to provide account credentials.', expected=True)
|
||||
self.raise_login_required('This video requires login')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
@@ -104,6 +104,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'duration': 1492,
|
||||
'timestamp': 1330522854,
|
||||
'upload_date': '20120229',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
|
@@ -1,19 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TweakersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html',
|
||||
'md5': '1b5afa817403bb5baa08359dca31e6df',
|
||||
'md5': '3147e4ddad366f97476a93863e4557c8',
|
||||
'info_dict': {
|
||||
'id': '9926',
|
||||
'ext': 'mp4',
|
||||
@@ -25,41 +19,7 @@ class TweakersIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
playlist = self._download_xml(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id,
|
||||
video_id)
|
||||
|
||||
NS_MAP = {
|
||||
'xspf': 'http://xspf.org/ns/0/',
|
||||
's1': 'http://static.streamone.nl/player/ns/0',
|
||||
}
|
||||
|
||||
track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP))
|
||||
|
||||
title = xpath_text(
|
||||
track, xpath_with_ns('./xspf:title', NS_MAP), 'title')
|
||||
description = xpath_text(
|
||||
track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
|
||||
thumbnail = xpath_text(
|
||||
track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
|
||||
duration = float_or_none(
|
||||
xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'),
|
||||
1000)
|
||||
|
||||
formats = [{
|
||||
'url': location.text,
|
||||
'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
|
||||
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
|
||||
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
|
||||
} for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
playlist_id = self._match_id(url)
|
||||
entries = self._extract_xspf_playlist(
|
||||
'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id)
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -12,9 +12,11 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
@@ -26,7 +28,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'http://usher.twitch.tv'
|
||||
_LOGIN_URL = 'https://secure.twitch.tv/login'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/authorize'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
@@ -69,8 +71,15 @@ class TwitchBaseIE(InfoExtractor):
|
||||
'password': password.encode('utf-8'),
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post url', default=self._LOGIN_POST_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
@@ -132,14 +141,14 @@ class TwitchItemBaseIE(TwitchBaseIE):
|
||||
def _extract_info(self, info):
|
||||
return {
|
||||
'id': info['_id'],
|
||||
'title': info['title'],
|
||||
'description': info['description'],
|
||||
'duration': info['length'],
|
||||
'thumbnail': info['preview'],
|
||||
'uploader': info['channel']['display_name'],
|
||||
'uploader_id': info['channel']['name'],
|
||||
'timestamp': parse_iso8601(info['recorded_at']),
|
||||
'view_count': info['views'],
|
||||
'title': info.get('title') or 'Untitled Broadcast',
|
||||
'description': info.get('description'),
|
||||
'duration': int_or_none(info.get('length')),
|
||||
'thumbnail': info.get('preview'),
|
||||
'uploader': info.get('channel', {}).get('display_name'),
|
||||
'uploader_id': info.get('channel', {}).get('name'),
|
||||
'timestamp': parse_iso8601(info.get('recorded_at')),
|
||||
'view_count': int_or_none(info.get('views')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -187,7 +196,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_ITEM_TYPE = 'vod'
|
||||
_ITEM_SHORTCUT = 'v'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
|
||||
'info_dict': {
|
||||
'id': 'v6528877',
|
||||
@@ -206,7 +215,26 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Untitled broadcast (title is None)
|
||||
'url': 'http://www.twitch.tv/belkao_o/v/11230755',
|
||||
'info_dict': {
|
||||
'id': 'v11230755',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled Broadcast',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1638,
|
||||
'timestamp': 1439746708,
|
||||
'upload_date': '20150816',
|
||||
'uploader': 'BelkAO_o',
|
||||
'uploader_id': 'belkao_o',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
|
@@ -70,9 +70,7 @@ class UdemyIE(InfoExtractor):
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
'Udemy account is required, use --username and --password options to provide account credentials.',
|
||||
expected=True)
|
||||
self.raise_login_required('Udemy account is required')
|
||||
|
||||
login_popup = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
@@ -1,81 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class VideoBamIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://videobam.com/OiJQM',
|
||||
'md5': 'db471f27763a531f10416a0c58b5a1e0',
|
||||
'info_dict': {
|
||||
'id': 'OiJQM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Is Alcohol Worse Than Ecstasy?',
|
||||
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
|
||||
'uploader': 'frihetsvinge',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://videobam.com/pqLvq',
|
||||
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
|
||||
'note': 'HD video',
|
||||
'info_dict': {
|
||||
'id': 'pqLvq',
|
||||
'ext': 'mp4',
|
||||
'title': '_',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
|
||||
|
||||
formats = []
|
||||
|
||||
for preference, format_id in enumerate(['low', 'high']):
|
||||
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'preference': preference,
|
||||
})
|
||||
|
||||
if not formats:
|
||||
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
|
||||
formats = [{
|
||||
'url': item['url'],
|
||||
'ext': 'mp4',
|
||||
} for item in player_config['playlist'] if 'autoPlay' in item]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(page, default='_', fatal=False)
|
||||
description = self._og_search_description(page, default=None)
|
||||
thumbnail = self._og_search_thumbnail(page)
|
||||
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
||||
view_count = int_or_none(
|
||||
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VideoLecturesNetIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
|
||||
_VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
IE_NAME = 'videolectures.net'
|
||||
|
||||
_TEST = {
|
||||
|
@@ -29,6 +29,7 @@ from ..utils import (
|
||||
class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
_LOGIN_REQUIRED = False
|
||||
_LOGIN_URL = 'https://vimeo.com/log_in'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
@@ -37,21 +38,25 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token')
|
||||
webpage = self._download_webpage(self._LOGIN_URL, None, False)
|
||||
token = self._extract_xsrft(webpage)
|
||||
data = urlencode_postdata({
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'action': 'login',
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
login_request.add_header('Referer', self._LOGIN_URL)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _extract_xsrft(self, webpage):
|
||||
return self._search_regex(
|
||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
webpage, 'login token', group='xsrft')
|
||||
|
||||
|
||||
class VimeoIE(VimeoBaseInfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
@@ -193,7 +198,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
token = self._extract_xsrft(webpage)
|
||||
data = urlencode_postdata({
|
||||
'password': password,
|
||||
'token': token,
|
||||
@@ -203,7 +208,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
url = url.replace('http://', 'https://')
|
||||
password_request = compat_urllib_request.Request(url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
password_request.add_header('Referer', url)
|
||||
return self._download_webpage(
|
||||
password_request, video_id,
|
||||
'Verifying the password', 'Wrong password')
|
||||
@@ -422,10 +427,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class VimeoChannelIE(InfoExtractor):
|
||||
class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vimeo:channel'
|
||||
_VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
_TITLE = None
|
||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/channels/tributes',
|
||||
@@ -440,7 +446,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
return '%s/videos/page:%d/' % (base_url, pagenum)
|
||||
|
||||
def _extract_list_title(self, webpage):
|
||||
return self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||
return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title')
|
||||
|
||||
def _login_list_password(self, page_url, list_id, webpage):
|
||||
login_form = self._search_regex(
|
||||
@@ -453,7 +459,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
if password is None:
|
||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
||||
fields = self._hidden_inputs(login_form)
|
||||
token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token')
|
||||
token = self._extract_xsrft(webpage)
|
||||
fields['token'] = token
|
||||
fields['password'] = password
|
||||
post = urlencode_postdata(fields)
|
||||
@@ -499,7 +505,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
|
||||
class VimeoUserIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:user'
|
||||
_VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
|
||||
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
@@ -603,14 +609,14 @@ class VimeoReviewIE(InfoExtractor):
|
||||
return self.url_result(player_url, 'Vimeo', video_id)
|
||||
|
||||
|
||||
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
class VimeoWatchLaterIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:watchlater'
|
||||
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
|
||||
_VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
|
||||
_VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
|
||||
_TITLE = 'Watch Later'
|
||||
_LOGIN_REQUIRED = True
|
||||
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/home/watchlater',
|
||||
'url': 'https://vimeo.com/watchlater',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -626,7 +632,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
return request
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
|
||||
|
||||
|
||||
class VimeoLikesIE(InfoExtractor):
|
||||
|
@@ -1,18 +1,38 @@
|
||||
# coding=utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class YandexMusicBaseIE(InfoExtractor):
|
||||
class YandexMusicTrackIE(InfoExtractor):
|
||||
IE_NAME = 'yandexmusic:track'
|
||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'duration': 193.04,
|
||||
}
|
||||
}
|
||||
|
||||
def _get_track_url(self, storage_dir, track_id):
|
||||
data = self._download_json(
|
||||
'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
|
||||
@@ -35,24 +55,6 @@ class YandexMusicBaseIE(InfoExtractor):
|
||||
'duration': float_or_none(track.get('durationMs'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
IE_NAME = 'yandexmusic:track'
|
||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'duration': 193.04,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
album_id, track_id = mobj.group('album_id'), mobj.group('id')
|
||||
@@ -64,7 +66,15 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
return self._get_track_info(track)
|
||||
|
||||
|
||||
class YandexMusicAlbumIE(YandexMusicBaseIE):
|
||||
class YandexMusicPlaylistBaseIE(InfoExtractor):
|
||||
def _build_playlist(self, tracks):
|
||||
return [
|
||||
self.url_result(
|
||||
'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
|
||||
for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
|
||||
|
||||
|
||||
class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||
IE_NAME = 'yandexmusic:album'
|
||||
IE_DESC = 'Яндекс.Музыка - Альбом'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
||||
@@ -85,7 +95,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
|
||||
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
|
||||
album_id, 'Downloading album JSON')
|
||||
|
||||
entries = [self._get_track_info(track) for track in album['volumes'][0]]
|
||||
entries = self._build_playlist(album['volumes'][0])
|
||||
|
||||
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
|
||||
year = album.get('year')
|
||||
@@ -95,12 +105,12 @@ class YandexMusicAlbumIE(YandexMusicBaseIE):
|
||||
return self.playlist_result(entries, compat_str(album['id']), title)
|
||||
|
||||
|
||||
class YandexMusicPlaylistIE(YandexMusicBaseIE):
|
||||
class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||
IE_NAME = 'yandexmusic:playlist'
|
||||
IE_DESC = 'Яндекс.Музыка - Плейлист'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
|
||||
'info_dict': {
|
||||
'id': '1245',
|
||||
@@ -108,20 +118,54 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE):
|
||||
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}
|
||||
}, {
|
||||
# playlist exceeding the limit of 150 tracks shipped with webpage (see
|
||||
# https://github.com/rg3/youtube-dl/issues/6666)
|
||||
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
|
||||
'info_dict': {
|
||||
'id': '1036',
|
||||
'title': 'Музыка 90-х',
|
||||
},
|
||||
'playlist_count': 310,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
mu = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
|
||||
playlist_id)['pageData']['playlist']
|
||||
playlist_id)
|
||||
|
||||
entries = [self._get_track_info(track) for track in playlist['tracks']]
|
||||
playlist = mu['pageData']['playlist']
|
||||
tracks, track_ids = playlist['tracks'], playlist['trackIds']
|
||||
|
||||
# tracks dictionary shipped with webpage is limited to 150 tracks,
|
||||
# missing tracks should be retrieved manually.
|
||||
if len(tracks) < len(track_ids):
|
||||
present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')])
|
||||
missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids)
|
||||
request = compat_urllib_request.Request(
|
||||
'https://music.yandex.ru/handlers/track-entries.jsx',
|
||||
compat_urllib_parse.urlencode({
|
||||
'entries': ','.join(missing_track_ids),
|
||||
'lang': mu.get('settings', {}).get('lang', 'en'),
|
||||
'external-domain': 'music.yandex.ru',
|
||||
'overembed': 'false',
|
||||
'sign': mu.get('authData', {}).get('user', {}).get('sign'),
|
||||
'strict': 'true',
|
||||
}).encode('utf-8'))
|
||||
request.add_header('Referer', url)
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
||||
missing_tracks = self._download_json(
|
||||
request, playlist_id, 'Downloading missing tracks JSON', fatal=False)
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, compat_str(playlist_id),
|
||||
self._build_playlist(tracks),
|
||||
compat_str(playlist_id),
|
||||
playlist['title'], playlist.get('description'))
|
||||
|
@@ -49,6 +49,17 @@ class YoukuIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 13,
|
||||
'skip': 'Available in China only',
|
||||
}, {
|
||||
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
||||
'note': 'Video protected with password',
|
||||
'info_dict': {
|
||||
'id': 'XNjA1NzA2Njgw',
|
||||
'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起',
|
||||
},
|
||||
'playlist_count': 19,
|
||||
'params': {
|
||||
'videopassword': '100600',
|
||||
},
|
||||
}]
|
||||
|
||||
def construct_video_urls(self, data1, data2):
|
||||
@@ -185,9 +196,15 @@ class YoukuIE(InfoExtractor):
|
||||
raw_data = self._download_json(req, video_id, note=note)
|
||||
return raw_data['data'][0]
|
||||
|
||||
video_password = self._downloader.params.get('videopassword', None)
|
||||
|
||||
# request basic data
|
||||
basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
|
||||
if video_password:
|
||||
basic_data_url += '?password=%s' % video_password
|
||||
|
||||
data1 = retrieve_data(
|
||||
'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
|
||||
basic_data_url,
|
||||
'Downloading JSON metadata 1')
|
||||
data2 = retrieve_data(
|
||||
'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
|
||||
|
@@ -33,6 +33,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
@@ -46,7 +47,7 @@ from ..utils import (
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
"""Provide base functions for Youtube extractors"""
|
||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor'
|
||||
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
@@ -128,40 +129,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# Two-Factor
|
||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None:
|
||||
tfa_code = self._get_tfa_info()
|
||||
if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None:
|
||||
tfa_code = self._get_tfa_info('2-step verification code')
|
||||
|
||||
if tfa_code is None:
|
||||
self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
|
||||
self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
if not tfa_code:
|
||||
self._downloader.report_warning(
|
||||
'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
|
||||
'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
return False
|
||||
|
||||
# Unlike the first login form, secTok and timeStmp are both required for the TFA form
|
||||
tfa_code = remove_start(tfa_code, 'G-')
|
||||
|
||||
match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning('Failed to get secTok - did the page structure change?')
|
||||
secTok = match.group(1)
|
||||
match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
|
||||
timeStmp = match.group(1)
|
||||
tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
|
||||
|
||||
tfa_form_strs = {
|
||||
'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
|
||||
'smsToken': '',
|
||||
'smsUserPin': tfa_code,
|
||||
'smsVerifyPin': 'Verify',
|
||||
tfa_form_strs.update({
|
||||
'Pin': tfa_code,
|
||||
'TrustDevice': 'on',
|
||||
})
|
||||
|
||||
'PersistentCookie': 'yes',
|
||||
'checkConnection': '',
|
||||
'checkedDomains': 'youtube',
|
||||
'pstMsg': '1',
|
||||
'secTok': secTok,
|
||||
'timeStmp': timeStmp,
|
||||
'service': 'youtube',
|
||||
'hl': 'en_US',
|
||||
}
|
||||
tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items())
|
||||
tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii')
|
||||
|
||||
@@ -173,8 +158,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
if tfa_results is False:
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
|
||||
self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||
if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None:
|
||||
self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
|
||||
return False
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
||||
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||
@@ -213,11 +198,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
(?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
(?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
v=
|
||||
)
|
||||
))
|
||||
|youtu\.be/ # just youtu.be/xxxx
|
||||
|(?:
|
||||
youtu\.be| # just youtu.be/xxxx
|
||||
vid\.plus # or vid.plus/xxxx
|
||||
)/
|
||||
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
|
||||
)
|
||||
)? # all until now is optional -> you can pass the naked ID
|
||||
@@ -365,6 +353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||
'uploader': 'justintimberlakeVEVO',
|
||||
'uploader_id': 'justintimberlakeVEVO',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -380,6 +369,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': 'setindia'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY',
|
||||
'note': 'Use the first video ID in the URL',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'phihag',
|
||||
'upload_date': '20121002',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
||||
'note': '256k DASH audio (format 141) via DASH manifest',
|
||||
@@ -421,7 +430,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'id': 'nfWlot6h_JM',
|
||||
'ext': 'm4a',
|
||||
'title': 'Taylor Swift - Shake It Off',
|
||||
'description': 'md5:2acfda1b285bdd478ccec22f9918199d',
|
||||
'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
|
||||
'uploader': 'TaylorSwiftVEVO',
|
||||
'uploader_id': 'TaylorSwiftVEVO',
|
||||
'upload_date': '20140818',
|
||||
@@ -455,6 +464,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'The Witcher',
|
||||
'uploader_id': 'WitcherGame',
|
||||
'upload_date': '20140605',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# Age-gate video with encrypted signature
|
||||
@@ -468,6 +478,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'LloydVEVO',
|
||||
'uploader_id': 'LloydVEVO',
|
||||
'upload_date': '20110629',
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
||||
@@ -492,7 +503,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'lqQg6PlCWgI',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120731',
|
||||
'upload_date': '20120724',
|
||||
'uploader_id': 'olympic',
|
||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||
'uploader': 'Olympics',
|
||||
@@ -521,7 +532,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'qEJwOuvDf7I',
|
||||
'info_dict': {
|
||||
'id': 'qEJwOuvDf7I',
|
||||
'ext': 'mp4',
|
||||
'ext': 'webm',
|
||||
'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
|
||||
'description': '',
|
||||
'upload_date': '20150404',
|
||||
@@ -616,6 +627,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vid.plus/FlRa-iH7PGw',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -645,7 +660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
if not id_m:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
@@ -1228,7 +1243,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
|
||||
if 'rtmpe%3Dyes' in encoded_url_map:
|
||||
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
|
||||
url_map = {}
|
||||
formats = []
|
||||
for url_data_str in encoded_url_map.split(','):
|
||||
url_data = compat_parse_qs(url_data_str)
|
||||
if 'itag' not in url_data or 'url' not in url_data:
|
||||
@@ -1274,7 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
r'html5player-([^/]+?)(?:/html5player)?\.js',
|
||||
r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
@@ -1288,8 +1303,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
url += '&signature=' + signature
|
||||
if 'ratebypass' not in url:
|
||||
url += '&ratebypass=yes'
|
||||
url_map[format_id] = url
|
||||
formats = _map_to_format_list(url_map)
|
||||
|
||||
# Some itags are not included in DASH manifest thus corresponding formats will
|
||||
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
|
||||
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
|
||||
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
|
||||
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
|
||||
dct = {
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'player_url': player_url,
|
||||
'filesize': int_or_none(url_data.get('clen', [None])[0]),
|
||||
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'fps': int_or_none(url_data.get('fps', [None])[0]),
|
||||
'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
|
||||
}
|
||||
type_ = url_data.get('type', [None])[0]
|
||||
if type_:
|
||||
type_split = type_.split(';')
|
||||
kind_ext = type_split[0].split('/')
|
||||
if len(kind_ext) == 2:
|
||||
kind, ext = kind_ext
|
||||
dct['ext'] = ext
|
||||
if kind in ('audio', 'video'):
|
||||
codecs = None
|
||||
for mobj in re.finditer(
|
||||
r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
|
||||
if mobj.group('key') == 'codecs':
|
||||
codecs = mobj.group('val')
|
||||
break
|
||||
if codecs:
|
||||
codecs = codecs.split(',')
|
||||
if len(codecs) == 2:
|
||||
acodec, vcodec = codecs[0], codecs[1]
|
||||
else:
|
||||
acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
|
||||
dct.update({
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
if format_id in self._formats:
|
||||
dct.update(self._formats[format_id])
|
||||
formats.append(dct)
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
@@ -1754,7 +1811,7 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
|
||||
|
||||
part_codes = re.findall(
|
||||
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
|
||||
r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code)
|
||||
entries = []
|
||||
for part_code in part_codes:
|
||||
part_title = self._html_search_regex(
|
||||
|
@@ -320,7 +320,7 @@ def parseOpts(overrideArguments=None):
|
||||
authentication.add_option(
|
||||
'--video-password',
|
||||
dest='videopassword', metavar='PASSWORD',
|
||||
help='Video password (vimeo, smotri)')
|
||||
help='Video password (vimeo, smotri, youku)')
|
||||
|
||||
video_format = optparse.OptionGroup(parser, 'Video Format Options')
|
||||
video_format.add_option(
|
||||
|
@@ -587,6 +587,11 @@ class ContentTooShortError(Exception):
|
||||
|
||||
|
||||
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
|
||||
# expected HTTP responses to meet HTTP/1.0 or later (see also
|
||||
# https://github.com/rg3/youtube-dl/issues/6727)
|
||||
if sys.version_info < (3, 0):
|
||||
kwargs['strict'] = True
|
||||
hc = http_class(*args, **kwargs)
|
||||
source_address = ydl_handler._params.get('source_address')
|
||||
if source_address is not None:
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.08.06.1'
|
||||
__version__ = '2015.09.03'
|
||||
|
Reference in New Issue
Block a user