Compare commits
360 Commits
2016.01.14
...
2016.02.13
Author | SHA1 | Date | |
---|---|---|---|
35ced3985a | |||
3e18700d45 | |||
f9f49d87c2 | |||
6863631c26 | |||
9d939cec48 | |||
4c77d3f52a | |||
7be747b921 | |||
bb20526b64 | |||
bcbb1b08b2 | |||
3d98f97c64 | |||
c349456ef6 | |||
5a4905924d | |||
b826035dd5 | |||
a7cab4d039 | |||
fc3810f6d1 | |||
3dc71d82ce | |||
9c7b38981c | |||
8b85ac3fd9 | |||
81e1c4e2fc | |||
388ae76b52 | |||
b67d63149d | |||
28280e8ded | |||
6b3fbd3425 | |||
a7ab46375b | |||
b14d5e26f6 | |||
9a61dfba0c | |||
154c209e2d | |||
d1ea5e171f | |||
a1188d0ed0 | |||
47d205a646 | |||
80f772c28a | |||
f817d9bec1 | |||
e2effb08a4 | |||
7fcea295c5 | |||
cc799437ea | |||
89d23f37f2 | |||
b92071ef00 | |||
47246ae26c | |||
9c15869c28 | |||
51e9094f4a | |||
5e3a6fec33 | |||
d413095f7e | |||
1bedf4de06 | |||
3967a761f4 | |||
b081350bd9 | |||
16f1430ba6 | |||
085ad71157 | |||
35972ba172 | |||
3834d3e35c | |||
8d0a2a2a4e | |||
11c0339bec | |||
915dd77783 | |||
b6bfa6fb79 | |||
f070197bd7 | |||
5a7699bb2e | |||
8628d26f38 | |||
8411229bd5 | |||
72b9ebc65d | |||
3b799ca14c | |||
0474512e30 | |||
f0905c6ec3 | |||
86296ad2cd | |||
52f5889f77 | |||
81e0b4f2d1 | |||
cbecc9b903 | |||
b8b465af3e | |||
59b35c6745 | |||
7032833011 | |||
f406c78785 | |||
f326b5837a | |||
5dd4b3468f | |||
d4f8e83404 | |||
7b8b007cd9 | |||
3547d26587 | |||
7e62c2eb6d | |||
56401e1e5f | |||
860db2d508 | |||
4b8874975c | |||
bd6b6f6622 | |||
4340727e6c | |||
3ceccade87 | |||
28ad7df65d | |||
79a3508579 | |||
1b840245bd | |||
6a3828fddd | |||
91cb6b5065 | |||
0826a0b555 | |||
bcbbb98bfe | |||
66159b38aa | |||
23d17e4beb | |||
d97b0e3241 | |||
eb2533ec4c | |||
b7b365067f | |||
86e284e028 | |||
d9e543b680 | |||
c773c232d8 | |||
58ae24336a | |||
7d3a035ee0 | |||
e06e75c7e7 | |||
593e0f43b4 | |||
008ab0f814 | |||
3f7e8750d4 | |||
f1ed3acae5 | |||
920d21b9d3 | |||
2fb35d1c28 | |||
09be85b8dd | |||
eadc3ccd50 | |||
255732f0d3 | |||
53c269c6fd | |||
675d001633 | |||
58be922079 | |||
c84d3a557d | |||
d577c79632 | |||
6ad2b01e14 | |||
fd3a1f3d60 | |||
87de7069b9 | |||
6fba62c87a | |||
f14be22816 | |||
1df4141196 | |||
fae45ede08 | |||
4e0cff2a50 | |||
9c74423510 | |||
5976e7ab57 | |||
a1a22572fb | |||
c11875b328 | |||
8ff648e4f9 | |||
1bac34556f | |||
0436157b95 | |||
ae0db349c1 | |||
08411970d5 | |||
dc724e0c8b | |||
0a5d1ec706 | |||
58250eff2b | |||
11a4efc505 | |||
7537b35fb8 | |||
33cc74eeeb | |||
f021acee49 | |||
abe694ca95 | |||
b286f201a8 | |||
bd93a12e85 | |||
92769650fa | |||
dc4fe5c6d7 | |||
566bda51f2 | |||
f63757ec35 | |||
7a0ed06909 | |||
9934fe76be | |||
a8aad21001 | |||
d055bf91cc | |||
0e1b1a011d | |||
eab3c2895c | |||
163da6a484 | |||
324916d11a | |||
3ccb0655c1 | |||
e04398e397 | |||
231ea2a3bb | |||
b99d88c6a1 | |||
189d72d5fd | |||
a7aab0c23e | |||
a69bee4762 | |||
9acd33094d | |||
8e7aad2075 | |||
ce5879fa14 | |||
7b7507d6e1 | |||
14823decf3 | |||
673fb82e65 | |||
181cf24bc0 | |||
89f2602880 | |||
db9b1dbcd9 | |||
e881c4bcab | |||
670ad51ade | |||
eb6fc7d32a | |||
ed1a390583 | |||
809e1857c5 | |||
7c38af48b9 | |||
60ad3eb970 | |||
a7685b3a6b | |||
8f1fddc816 | |||
1bf996fa5c | |||
248ae880b6 | |||
2d2fa82d17 | |||
c94678957f | |||
16f38a699f | |||
a6c2c24479 | |||
b8c9926c0a | |||
df374b5222 | |||
5ea1eb78f5 | |||
5d2c0fd9ba | |||
0803753fea | |||
2c2f1efdcd | |||
b323e1707d | |||
09104e9930 | |||
5fa1702ca6 | |||
17b598d30c | |||
53be8894e4 | |||
c3deacd562 | |||
8ab3fe81d8 | |||
2f0a33d8a3 | |||
05d0d131a7 | |||
c140629995 | |||
7d106a65ca | |||
0179f6a830 | |||
830afe85dc | |||
8bf39420b4 | |||
71d08b3e29 | |||
06ffa33485 | |||
874e05975b | |||
f5d30d521c | |||
e047922be0 | |||
83ab8a79cc | |||
350cf045d8 | |||
68a0ea15b4 | |||
2b4f5e68d1 | |||
055f417278 | |||
70029bc348 | |||
cf57433bbd | |||
1ac6e794cb | |||
a853427427 | |||
50e989e263 | |||
10e6ed9341 | |||
38c84acae5 | |||
29f46c2bee | |||
39c10a2b6e | |||
b913348d5f | |||
2b14cb566f | |||
b0df5223be | |||
ed7cd1e859 | |||
f125d9115b | |||
a9d5f12fec | |||
7f32e5dc35 | |||
c3111ab34f | |||
9339774af2 | |||
b0d21deda9 | |||
fab6f0e65b | |||
b6c33fd544 | |||
fb4b345800 | |||
af9c2a07ae | |||
ab180fc648 | |||
682f8c43b5 | |||
f693213567 | |||
9165d6bab9 | |||
2975fe1a7b | |||
de691a498d | |||
2e6e742c3c | |||
e9bd0f772b | |||
77f785076f | |||
94278f7202 | |||
a0d8d704df | |||
f6861ec96f | |||
f733b05302 | |||
6fa73386cb | |||
5ca01bb9e4 | |||
1ca59daca9 | |||
594c4d79a5 | |||
1f16b958b1 | |||
4c0d13df9b | |||
b2c6528baf | |||
ea17820432 | |||
1257b049bc | |||
b969813548 | |||
10677ece81 | |||
d570746e45 | |||
4fcd9d147d | |||
9c54ae3387 | |||
24114fee74 | |||
220ee33f2b | |||
4118cc02c1 | |||
32d77eeb04 | |||
032f232626 | |||
4d318be195 | |||
6b45f9aba2 | |||
1e10d02fec | |||
51290d8457 | |||
582f4f834e | |||
e87d98b0dd | |||
383496e65e | |||
4519c1f43c | |||
a616f65471 | |||
1f78ed189a | |||
7dde358adc | |||
27b83249c9 | |||
56aa074538 | |||
9d90e7de03 | |||
7d4d9c526a | |||
fe6856b059 | |||
a54fbf2ca6 | |||
d8024aebe5 | |||
8652bd22f1 | |||
f15a9ca301 | |||
65ced034b8 | |||
bec30224ff | |||
0428106da3 | |||
73e7442456 | |||
26de1bba83 | |||
e0690782b8 | |||
8fff4f61e5 | |||
10defdd06a | |||
485139c15c | |||
b605ebb609 | |||
aecfcd4e59 | |||
942d46196f | |||
78be2eca7c | |||
1fa2b9841d | |||
9fbd0822aa | |||
e323cf3ff3 | |||
8ceabd4df3 | |||
a8776b107b | |||
096b533982 | |||
dae503afaa | |||
b39eab7f94 | |||
e5a66240c0 | |||
e0ef13ddeb | |||
855f90fa6f | |||
614db89ae3 | |||
1358b94163 | |||
350e02d40d | |||
0b26ba3fc8 | |||
3a0a78731b | |||
6be16ed24b | |||
b555942428 | |||
b2dca40d81 | |||
15870bbd01 | |||
10d33b3473 | |||
ac25992bc7 | |||
30783c442d | |||
a50a8003a0 | |||
315bdae00a | |||
2ddfd26f1b | |||
f3ed5df611 | |||
b4e44234bc | |||
4ca2a3cf3c | |||
33d2fc2f64 | |||
27a95f51aa | |||
a78d6a9bb1 | |||
567f9a5809 | |||
3a421c724f | |||
34dd81c03a | |||
b3f502cdb9 | |||
587dfd44a4 | |||
52767c1ba0 | |||
014b5c59d8 | |||
fad7a336a1 | |||
ffbc0baf72 | |||
345f12196c | |||
5769b68bc0 | |||
4e2743abd9 | |||
be2d40a58a | |||
81549898c0 | |||
0baedd1851 | |||
6b559c2fbc | |||
986986064e | |||
4654c1d016 | |||
163e8369b0 | |||
5cc9c5dfa8 | |||
fbd90643cb | |||
30e2f2d76f | |||
4240d50496 | |||
6240b0a278 | |||
e37afbe0b8 | |||
e1a0bfdffe | |||
5f432ac8f5 |
5
AUTHORS
5
AUTHORS
@ -152,3 +152,8 @@ Evan Lu
|
||||
flatgreen
|
||||
Brian Foley
|
||||
Vignesh Venkat
|
||||
Tom Gijselinck
|
||||
Founder Fang
|
||||
Andrew Alexeyew
|
||||
Saso Bezlaj
|
||||
Erwin de Haan
|
||||
|
175
README.md
175
README.md
@ -173,6 +173,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg (experimental)
|
||||
--hls-use-mpegts Use the mpegts container for HLS videos,
|
||||
allowing to play the video while
|
||||
downloading (some players may not be able
|
||||
to play it)
|
||||
--external-downloader COMMAND Use the specified external downloader.
|
||||
Currently supports
|
||||
aria2c,axel,curl,httpie,wget
|
||||
@ -339,8 +343,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
preference, for example: "srt" or
|
||||
"ass/srt/best"
|
||||
--sub-lang LANGS Languages of the subtitles to download
|
||||
(optional) separated by commas, use IETF
|
||||
language tags like 'en,pt'
|
||||
(optional) separated by commas, use --list-
|
||||
subs for available language tags
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME Login with this account ID
|
||||
@ -438,41 +442,172 @@ On Windows you may also need to setup the `%HOME%` environment variable manually
|
||||
|
||||
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a lowercase S. Allowed names are:
|
||||
|
||||
- `id`: The sequence will be replaced by the video identifier.
|
||||
- `url`: The sequence will be replaced by the video URL.
|
||||
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
|
||||
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
|
||||
- `title`: The sequence will be replaced by the video title.
|
||||
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
|
||||
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
|
||||
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
|
||||
- `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video.
|
||||
- `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist.
|
||||
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
|
||||
- `duration`: The sequence will be replaced by the length of the video in seconds.
|
||||
- `id`: Video identifier
|
||||
- `title`: Video title
|
||||
- `url`: Video URL
|
||||
- `ext`: Video filename extension
|
||||
- `alt_title`: A secondary title of the video
|
||||
- `display_id`: An alternative identifier for the video
|
||||
- `uploader`: Full name of the video uploader
|
||||
- `creator`: The main artist who created the video
|
||||
- `release_date`: The date (YYYYMMDD) when the video was released
|
||||
- `timestamp`: UNIX timestamp of the moment the video became available
|
||||
- `upload_date`: Video upload date (YYYYMMDD)
|
||||
- `uploader_id`: Nickname or id of the video uploader
|
||||
- `location`: Physical location where the video was filmed
|
||||
- `duration`: Length of the video in seconds
|
||||
- `view_count`: How many users have watched the video on the platform
|
||||
- `like_count`: Number of positive ratings of the video
|
||||
- `dislike_count`: Number of negative ratings of the video
|
||||
- `repost_count`: Number of reposts of the video
|
||||
- `average_rating`: Average rating give by users, the scale used depends on the webpage
|
||||
- `comment_count`: Number of comments on the video
|
||||
- `age_limit`: Age restriction for the video (years)
|
||||
- `format`: A human-readable description of the format
|
||||
- `format_id`: Format code specified by `--format`
|
||||
- `format_note`: Additional info about the format
|
||||
- `width`: Width of the video
|
||||
- `height`: Height of the video
|
||||
- `resolution`: Textual description of width and height
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `fps`: Frame rate
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `filesize`: The number of bytes, if known in advance
|
||||
- `filesize_approx`: An estimate for the number of bytes
|
||||
- `protocol`: The protocol that will be used for the actual download
|
||||
- `extractor`: Name of the extractor
|
||||
- `extractor_key`: Key name of the extractor
|
||||
- `epoch`: Unix epoch when creating the file
|
||||
- `autonumber`: Five-digit number that will be increased with each download, starting at zero
|
||||
- `playlist`: Name or id of the playlist that contains the video
|
||||
- `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
- `chapter`: Name or title of the chapter the video belongs to
|
||||
- `chapter_number`: Number of the chapter the video belongs to
|
||||
- `chapter_id`: Id of the chapter the video belongs to
|
||||
|
||||
Available for the video that is an episode of some series or programme:
|
||||
- `series`: Title of the series or programme the video episode belongs to
|
||||
- `season`: Title of the season the video episode belongs to
|
||||
- `season_number`: Number of the season the video episode belongs to
|
||||
- `season_id`: Id of the season the video episode belongs to
|
||||
- `episode`: Title of the video episode
|
||||
- `episode_number`: Number of the video episode within a season
|
||||
- `episode_id`: Id of the video episode
|
||||
|
||||
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
|
||||
Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
|
||||
To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
Examples (note on Windows you may need to use double quotes instead of single):
|
||||
|
||||
```bash
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
|
||||
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
|
||||
$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.mp4 # A simple file name
|
||||
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
|
||||
|
||||
# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
|
||||
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
||||
|
||||
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
||||
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
|
||||
|
||||
# Stream the video being downloaded to stdout
|
||||
$ youtube-dl -o - BaW_jenozKc
|
||||
```
|
||||
|
||||
# FORMAT SELECTION
|
||||
|
||||
By default youtube-dl tries to download the best quality, but sometimes you may want to download in a different format.
|
||||
The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`.
|
||||
By default youtube-dl tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, youtube-dl will guess it for you by **default**.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
|
||||
|
||||
You can also use special names to select particular edge case format:
|
||||
- `best`: Select best quality format represented by single file with video and audio
|
||||
- `worst`: Select worst quality format represented by single file with video and audio
|
||||
- `bestvideo`: Select best quality video only format (e.g. DASH video), may not be available
|
||||
- `worstvideo`: Select worst quality video only format, may not be available
|
||||
- `bestaudio`: Select best quality audio only format, may not be available
|
||||
- `worstaudio`: Select worst quality audio only format, may not be available
|
||||
|
||||
For example, to download worst quality video only format you can use `-f worstvideo`.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
|
||||
If you want to download several formats of the same video use comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or more sophisticated example combined with precedence feature `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
|
||||
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
|
||||
|
||||
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
|
||||
- `filesize`: The number of bytes, if known in advance
|
||||
- `width`: Width of the video, if known
|
||||
- `height`: Height of the video, if known
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
|
||||
Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
|
||||
- `ext`: File extension
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
||||
|
||||
You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download best video only format, best audio only format and mux them together with ffmpeg/avconv.
|
||||
|
||||
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
Examples (note on Windows you may need to use double quotes instead of single):
|
||||
```bash
|
||||
# Download best mp4 format available or any other best if no mp4 available
|
||||
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
|
||||
|
||||
# Download best format available but not better that 480p
|
||||
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
|
||||
|
||||
# Download best video only format but no bigger that 50 MB
|
||||
$ youtube-dl -f 'best[filesize<50M]'
|
||||
|
||||
# Download best format available via direct link over HTTP/HTTPS protocol
|
||||
$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
|
||||
```
|
||||
|
||||
|
||||
# VIDEO SELECTION
|
||||
|
||||
Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`. They accept dates in two formats:
|
||||
|
@ -24,6 +24,7 @@
|
||||
- **AdobeTVShow**
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **Aftonbladet**
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
@ -42,6 +43,7 @@
|
||||
- **ARD:mediathek**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
- **arte.tv:concert**
|
||||
- **arte.tv:creative**
|
||||
- **arte.tv:ddc**
|
||||
@ -53,6 +55,7 @@
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@ -85,8 +88,12 @@
|
||||
- **CamdemyFolder**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
- **Canvas**
|
||||
- **CBC**
|
||||
- **CBCPlayer**
|
||||
- **CBS**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
@ -115,6 +122,7 @@
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
@ -122,12 +130,16 @@
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
- **CWTV**
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
- **dailymotion:user**
|
||||
- **DailymotionCloud**
|
||||
- **daum.net**
|
||||
- **daum.net:clip**
|
||||
- **daum.net:playlist**
|
||||
- **daum.net:user**
|
||||
- **DBTV**
|
||||
- **DCN**
|
||||
- **dcn:live**
|
||||
@ -138,6 +150,7 @@
|
||||
- **defense.gouv.fr**
|
||||
- **democracynow**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Digiteka**
|
||||
- **Discovery**
|
||||
- **Dotsub**
|
||||
- **DouyuTV**: 斗鱼
|
||||
@ -174,6 +187,7 @@
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **facebook**
|
||||
- **facebook:post**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
- **Fczenit**
|
||||
@ -229,7 +243,6 @@
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HistoricFilms**
|
||||
- **History**
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HornBunny**
|
||||
@ -252,7 +265,7 @@
|
||||
- **Instagram**
|
||||
- **instagram:user**: Instagram user profile
|
||||
- **InternetVideoArchive**
|
||||
- **IPrima** (Currently broken)
|
||||
- **IPrima**
|
||||
- **iqiyi**: 爱奇艺
|
||||
- **Ir90Tv**
|
||||
- **ivi**: ivi.ru
|
||||
@ -273,6 +286,7 @@
|
||||
- **KeezMovies**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
- **Ku6**
|
||||
@ -285,7 +299,9 @@
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Lecture2Go**
|
||||
- **Lemonde**
|
||||
- **Letv**: 乐视网
|
||||
- **LetvCloud**: 乐视云
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
@ -298,6 +314,7 @@
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
- **LoveHomePorn**
|
||||
- **lrt.lt**
|
||||
- **lynda**: lynda.com videos
|
||||
- **lynda:course**: lynda.com online courses
|
||||
@ -306,6 +323,7 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakerTV**
|
||||
- **Malemotion**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **metacafe**
|
||||
@ -430,6 +448,7 @@
|
||||
- **PlanetaPlay**
|
||||
- **play.fm**
|
||||
- **played.to**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
- **Playvid**
|
||||
- **Playwire**
|
||||
@ -484,6 +503,7 @@
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RTVNH**
|
||||
- **RUHD**
|
||||
- **RulePorn**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
- **rutube:embed**: Rutube embedded videos
|
||||
@ -497,6 +517,7 @@
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
- **schooltv**
|
||||
- **SciVee**
|
||||
- **screen.yahoo:search**: Yahoo screen search
|
||||
- **Screencast**
|
||||
@ -600,6 +621,7 @@
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **trollvids**
|
||||
- **TruTube**
|
||||
- **Tube8**
|
||||
- **TubiTv**
|
||||
@ -638,7 +660,6 @@
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **Ultimedia**
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **ustream**
|
||||
@ -665,7 +686,9 @@
|
||||
- **VideoPremium**
|
||||
- **VideoTt**: video.tt - Your True Tube (Currently broken)
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidme**
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
- **Vidzi**
|
||||
- **vier**
|
||||
- **vier:videos**
|
||||
@ -705,6 +728,7 @@
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
@ -756,3 +780,4 @@
|
||||
- **ZDFChannel**
|
||||
- **zingmp3:album**: mp3.zing.vn albums
|
||||
- **zingmp3:song**: mp3.zing.vn songs
|
||||
- **ZippCast**
|
||||
|
@ -12,8 +12,9 @@ import copy
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_str
|
||||
from youtube_dl.compat import compat_str, compat_urllib_error
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.postprocessor.common import PostProcessor
|
||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
||||
|
||||
@ -221,6 +222,16 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||
|
||||
formats = [
|
||||
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
||||
@ -237,6 +248,17 @@ class TestFormatSelection(unittest.TestCase):
|
||||
|
||||
def format_info(f_id):
|
||||
info = YoutubeIE._formats[f_id].copy()
|
||||
|
||||
# XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec'
|
||||
# and 'vcodec', while in tests such information is incomplete since
|
||||
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
|
||||
# test_YoutubeDL.test_youtube_format_selection is broken without
|
||||
# this fix
|
||||
if 'acodec' in info and 'vcodec' not in info:
|
||||
info['vcodec'] = 'none'
|
||||
elif 'vcodec' in info and 'acodec' not in info:
|
||||
info['acodec'] = 'none'
|
||||
|
||||
info['format_id'] = f_id
|
||||
info['url'] = 'url:' + f_id
|
||||
return info
|
||||
@ -631,6 +653,47 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
result = get_ids({'playlist_items': '10'})
|
||||
self.assertEqual(result, [])
|
||||
|
||||
def test_urlopen_no_file_protocol(self):
|
||||
# see https://github.com/rg3/youtube-dl/issues/8227
|
||||
ydl = YDL()
|
||||
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
|
||||
|
||||
def test_do_not_override_ie_key_in_url_transparent(self):
|
||||
ydl = YDL()
|
||||
|
||||
class Foo1IE(InfoExtractor):
|
||||
_VALID_URL = r'foo1:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'foo2:',
|
||||
'ie_key': 'Foo2',
|
||||
}
|
||||
|
||||
class Foo2IE(InfoExtractor):
|
||||
_VALID_URL = r'foo2:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'foo3:',
|
||||
'ie_key': 'Foo3',
|
||||
}
|
||||
|
||||
class Foo3IE(InfoExtractor):
|
||||
_VALID_URL = r'foo3:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return _make_result([{'url': TEST_URL}])
|
||||
|
||||
ydl.add_info_extractor(Foo1IE(ydl))
|
||||
ydl.add_info_extractor(Foo2IE(ydl))
|
||||
ydl.add_info_extractor(Foo3IE(ydl))
|
||||
ydl.extract_info('foo1:')
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -56,7 +56,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||
|
@ -21,7 +21,7 @@ from youtube_dl.extractor import (
|
||||
NPOIE,
|
||||
ComedyCentralIE,
|
||||
NRKTVIE,
|
||||
RaiIE,
|
||||
RaiTVIE,
|
||||
VikiIE,
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
@ -65,16 +65,16 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 13)
|
||||
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
|
||||
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
|
||||
for lang in ['it', 'fr', 'de']:
|
||||
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
|
||||
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
|
||||
for lang in ['fr', 'de']:
|
||||
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
|
||||
|
||||
def test_youtube_subtitles_sbv_format(self):
|
||||
def test_youtube_subtitles_ttml_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['subtitlesformat'] = 'sbv'
|
||||
self.DL.params['subtitlesformat'] = 'ttml'
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
|
||||
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
|
||||
|
||||
def test_youtube_subtitles_vtt_format(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@ -260,7 +260,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
|
||||
class TestRaiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiIE
|
||||
IE = RaiTVIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
30
test/test_update.py
Normal file
30
test/test_update.py
Normal file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import json
|
||||
from youtube_dl.update import rsa_verify
|
||||
|
||||
|
||||
class TestUpdate(unittest.TestCase):
|
||||
def test_rsa_verify(self):
|
||||
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
|
||||
with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'versions.json'), 'rb') as f:
|
||||
versions_info = f.read().decode()
|
||||
versions_info = json.loads(versions_info)
|
||||
signature = versions_info['signature']
|
||||
del versions_info['signature']
|
||||
self.assertTrue(rsa_verify(
|
||||
json.dumps(versions_info, sort_keys=True).encode('utf-8'),
|
||||
signature, UPDATES_RSA_KEY))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -22,6 +22,7 @@ from youtube_dl.utils import (
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
@ -450,6 +451,28 @@ class TestUtil(unittest.TestCase):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
'false': False,
|
||||
'zero': 0,
|
||||
'empty_string': '',
|
||||
'empty_list': [],
|
||||
}
|
||||
d = FALSE_VALUES.copy()
|
||||
d['a'] = 42
|
||||
self.assertEqual(dict_get(d, 'a'), 42)
|
||||
self.assertEqual(dict_get(d, 'b'), None)
|
||||
self.assertEqual(dict_get(d, 'b', 42), 42)
|
||||
self.assertEqual(dict_get(d, ('a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
||||
for key, false_value in FALSE_VALUES.items():
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
||||
|
||||
def test_encode_compat_str(self):
|
||||
self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест')
|
||||
self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест')
|
||||
@ -471,6 +494,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'STATUS': 'OK'})
|
||||
|
||||
stripped = strip_jsonp('ps.embedHandler({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
@ -34,7 +34,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
# TODO find a > 100 (paginating?) videos course
|
||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
entries = result['entries']
|
||||
entries = list(result['entries'])
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
34
test/versions.json
Normal file
34
test/versions.json
Normal file
@ -0,0 +1,34 @@
|
||||
{
|
||||
"latest": "2013.01.06",
|
||||
"signature": "72158cdba391628569ffdbea259afbcf279bbe3d8aeb7492690735dc1cfa6afa754f55c61196f3871d429599ab22f2667f1fec98865527b32632e7f4b3675a7ef0f0fbe084d359256ae4bba68f0d33854e531a70754712f244be71d4b92e664302aa99653ee4df19800d955b6c4149cd2b3f24288d6e4b40b16126e01f4c8ce6",
|
||||
"versions": {
|
||||
"2013.01.02": {
|
||||
"bin": [
|
||||
"http://youtube-dl.org/downloads/2013.01.02/youtube-dl",
|
||||
"f5b502f8aaa77675c4884938b1e4871ebca2611813a0c0e74f60c0fbd6dcca6b"
|
||||
],
|
||||
"exe": [
|
||||
"http://youtube-dl.org/downloads/2013.01.02/youtube-dl.exe",
|
||||
"75fa89d2ce297d102ff27675aa9d92545bbc91013f52ec52868c069f4f9f0422"
|
||||
],
|
||||
"tar": [
|
||||
"http://youtube-dl.org/downloads/2013.01.02/youtube-dl-2013.01.02.tar.gz",
|
||||
"6a66d022ac8e1c13da284036288a133ec8dba003b7bd3a5179d0c0daca8c8196"
|
||||
]
|
||||
},
|
||||
"2013.01.06": {
|
||||
"bin": [
|
||||
"http://youtube-dl.org/downloads/2013.01.06/youtube-dl",
|
||||
"64b6ed8865735c6302e836d4d832577321b4519aa02640dc508580c1ee824049"
|
||||
],
|
||||
"exe": [
|
||||
"http://youtube-dl.org/downloads/2013.01.06/youtube-dl.exe",
|
||||
"58609baf91e4389d36e3ba586e21dab882daaaee537e4448b1265392ae86ff84"
|
||||
],
|
||||
"tar": [
|
||||
"http://youtube-dl.org/downloads/2013.01.06/youtube-dl-2013.01.06.tar.gz",
|
||||
"fe77ab20a95d980ed17a659aa67e371fdd4d656d19c4c7950e7b720b0c2f1a86"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
@ -46,6 +46,7 @@ from .utils import (
|
||||
DateRange,
|
||||
DEFAULT_OUTTMPL,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
DownloadError,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
@ -262,7 +263,7 @@ class YoutubeDL(object):
|
||||
the downloader (see youtube_dl/downloader/common.py):
|
||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||
xattr_set_filesize, external_downloader_args.
|
||||
xattr_set_filesize, external_downloader_args, hls_use_mpegts.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
@ -706,7 +707,6 @@ class YoutubeDL(object):
|
||||
It will also download the videos if 'download'.
|
||||
Returns the resolved ie_result.
|
||||
"""
|
||||
|
||||
result_type = ie_result.get('_type', 'video')
|
||||
|
||||
if result_type in ('url', 'url_transparent'):
|
||||
@ -735,7 +735,7 @@ class YoutubeDL(object):
|
||||
|
||||
force_properties = dict(
|
||||
(k, v) for k, v in ie_result.items() if v is not None)
|
||||
for f in ('_type', 'url'):
|
||||
for f in ('_type', 'url', 'ie_key'):
|
||||
if f in force_properties:
|
||||
del force_properties[f]
|
||||
new_result = info.copy()
|
||||
@ -898,11 +898,14 @@ class YoutubeDL(object):
|
||||
STR_OPERATORS = {
|
||||
'=': operator.eq,
|
||||
'!=': operator.ne,
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol)
|
||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9_-]+)
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
m = str_operator_rex.search(filter_spec)
|
||||
@ -1244,6 +1247,12 @@ class YoutubeDL(object):
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
for field in ('chapter', 'season', 'episode'):
|
||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
subtitles = info_dict.get('subtitles')
|
||||
if subtitles:
|
||||
for _, subtitle in subtitles.items():
|
||||
@ -1279,6 +1288,9 @@ class YoutubeDL(object):
|
||||
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
else:
|
||||
# Sanitize format_id from characters used in format selector expression
|
||||
format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
|
||||
format_id = format['format_id']
|
||||
if format_id not in formats_dict:
|
||||
formats_dict[format_id] = []
|
||||
@ -1300,6 +1312,10 @@ class YoutubeDL(object):
|
||||
# Automatically determine file extension if missing
|
||||
if 'ext' not in format:
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
# Automatically determine protocol if missing (useful for format
|
||||
# selection purposes)
|
||||
if 'protocol' not in format:
|
||||
format['protocol'] = determine_protocol(format)
|
||||
# Add HTTP headers, so that external programs can use them from the
|
||||
# json output
|
||||
full_format_info = info_dict.copy()
|
||||
@ -1325,7 +1341,6 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format_list = []
|
||||
if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
|
||||
info_dict['extractor'] in ['youtube', 'ted'] and
|
||||
not info_dict.get('is_live')):
|
||||
merger = FFmpegMergerPP(self)
|
||||
if merger.available and merger.can_merge():
|
||||
@ -1782,7 +1797,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
res = '%sp' % format['height']
|
||||
elif format.get('width') is not None:
|
||||
res = '?x%d' % format['width']
|
||||
res = '%dx?' % format['width']
|
||||
else:
|
||||
res = default
|
||||
return res
|
||||
@ -1986,8 +2001,19 @@ class YoutubeDL(object):
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
data_handler = compat_urllib_request_DataHandler()
|
||||
|
||||
# When passing our own FileHandler instance, build_opener won't add the
|
||||
# default FileHandler and allows us to disable the file protocol, which
|
||||
# can be used for malicious purposes (see
|
||||
# https://github.com/rg3/youtube-dl/issues/8227)
|
||||
file_handler = compat_urllib_request.FileHandler()
|
||||
|
||||
def file_open(*args, **kwargs):
|
||||
raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
|
||||
file_handler.file_open = file_open
|
||||
|
||||
opener = compat_urllib_request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
|
@ -369,6 +369,7 @@ def _real_main(argv=None):
|
||||
'no_color': opts.no_color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
'hls_use_mpegts': opts.hls_use_mpegts,
|
||||
'external_downloader_args': external_downloader_args,
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
|
@ -433,7 +433,7 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
else:
|
||||
compat_getpass = getpass.getpass
|
||||
|
||||
# Old 2.6 and 2.7 releases require kwargs to be bytes
|
||||
# Python < 2.6.5 require kwargs to be bytes
|
||||
try:
|
||||
def _testfunc(x):
|
||||
pass
|
||||
|
@ -45,6 +45,7 @@ class FileDownloader(object):
|
||||
(experimental)
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
@ -295,7 +296,7 @@ class FileDownloader(object):
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
|
||||
self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
|
@ -40,9 +40,10 @@ class DashSegmentsFD(FileDownloader):
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
append_url_to_file(
|
||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||
'initialization segment')
|
||||
if info_dict.get('initialization_url'):
|
||||
append_url_to_file(
|
||||
outf, combine_url(base_url, info_dict['initialization_url']),
|
||||
'initialization segment')
|
||||
for i, segment_url in enumerate(segment_urls):
|
||||
segment_len = append_url_to_file(
|
||||
outf, combine_url(base_url, segment_url),
|
||||
|
@ -273,15 +273,21 @@ class F4mFD(FragmentFD):
|
||||
return fragments_list
|
||||
|
||||
def _parse_bootstrap_node(self, node, base_url):
|
||||
if node.text is None:
|
||||
# Sometimes non empty inline bootstrap info can be specified along
|
||||
# with bootstrap url attribute (e.g. dummy inline bootstrap info
|
||||
# contains whitespace characters in [1]). We will prefer bootstrap
|
||||
# url over inline bootstrap info when present.
|
||||
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
|
||||
bootstrap_url = node.get('url')
|
||||
if bootstrap_url:
|
||||
bootstrap_url = compat_urlparse.urljoin(
|
||||
base_url, node.attrib['url'])
|
||||
base_url, bootstrap_url)
|
||||
boot_info = self._get_bootstrap_from_url(bootstrap_url)
|
||||
else:
|
||||
bootstrap_url = None
|
||||
bootstrap = base64.b64decode(node.text.encode('ascii'))
|
||||
boot_info = read_bootstrap_info(bootstrap)
|
||||
return (boot_info, bootstrap_url)
|
||||
return boot_info, bootstrap_url
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
@ -316,7 +322,8 @@ class F4mFD(FragmentFD):
|
||||
metadata = None
|
||||
|
||||
fragments_list = build_fragments_list(boot_info)
|
||||
if self.params.get('test', False):
|
||||
test = self.params.get('test', False)
|
||||
if test:
|
||||
# We only download the first fragment
|
||||
fragments_list = fragments_list[:1]
|
||||
total_frags = len(fragments_list)
|
||||
@ -326,6 +333,7 @@ class F4mFD(FragmentFD):
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': total_frags,
|
||||
'live': live,
|
||||
}
|
||||
|
||||
self._prepare_frag_download(ctx)
|
||||
@ -380,7 +388,7 @@ class F4mFD(FragmentFD):
|
||||
else:
|
||||
raise
|
||||
|
||||
if not fragments_list and live and bootstrap_url:
|
||||
if not fragments_list and not test and live and bootstrap_url:
|
||||
fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
|
||||
total_frags += len(fragments_list)
|
||||
if fragments_list and (fragments_list[0][1] > frag_i + 1):
|
||||
|
@ -26,7 +26,11 @@ class FragmentFD(FileDownloader):
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
self.to_screen(
|
||||
'[%s] Total fragments: %s'
|
||||
% (self.FD_NAME, ctx['total_frags'] if not ctx['live'] else 'unknown (live)'))
|
||||
self.report_destination(ctx['filename'])
|
||||
dl = HttpQuietDownloader(
|
||||
self.ydl,
|
||||
@ -74,14 +78,14 @@ class FragmentFD(FileDownloader):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
return
|
||||
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
time_now = time.time()
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
state['elapsed'] = time_now - start
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['frag_index'] += 1
|
||||
@ -91,9 +95,10 @@ class FragmentFD(FileDownloader):
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size,
|
||||
state['downloaded_bytes'])
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size,
|
||||
state['downloaded_bytes'])
|
||||
state['speed'] = s.get('speed')
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
self._hook_progress(state)
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .common import FileDownloader
|
||||
from .fragment import FragmentFD
|
||||
@ -39,14 +40,29 @@ class HlsFD(FileDownloader):
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||
|
||||
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||
args += ['-i', url, '-c', 'copy']
|
||||
if self.params.get('hls_use_mpegts', False):
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
retval = subprocess.call(args, stdin=subprocess.PIPE)
|
||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE)
|
||||
try:
|
||||
retval = proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/rg3/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
raise
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||
|
@ -15,6 +15,7 @@ from .adobetv import (
|
||||
AdobeTVVideoIE,
|
||||
)
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aenetworks import AENetworksIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
@ -41,6 +42,7 @@ from .arte import (
|
||||
ArteTVCreativeIE,
|
||||
ArteTVConcertIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVCinemaIE,
|
||||
ArteTVDDCIE,
|
||||
ArteTVEmbedIE,
|
||||
)
|
||||
@ -48,7 +50,7 @@ from .atresplayer import AtresPlayerIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
from .audimedia import AudiMediaIE
|
||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||
from .azubu import AzubuIE
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
@ -86,8 +88,16 @@ from .camdemy import (
|
||||
)
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .canvas import CanvasIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbsnews import CBSNewsIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
@ -120,6 +130,7 @@ from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crunchyroll import (
|
||||
@ -128,6 +139,8 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .cwtv import CWTVIE
|
||||
from .dailymotion import (
|
||||
DailymotionIE,
|
||||
DailymotionPlaylistIE,
|
||||
@ -137,6 +150,8 @@ from .dailymotion import (
|
||||
from .daum import (
|
||||
DaumIE,
|
||||
DaumClipIE,
|
||||
DaumPlaylistIE,
|
||||
DaumUserIE,
|
||||
)
|
||||
from .dbtv import DBTVIE
|
||||
from .dcn import (
|
||||
@ -191,7 +206,10 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPostIE,
|
||||
)
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .fczenit import FczenitIE
|
||||
@ -262,7 +280,6 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .history import HistoryIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
@ -316,6 +333,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
@ -330,10 +348,12 @@ from .kuwo import (
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
from .letv import (
|
||||
LetvIE,
|
||||
LetvTvIE,
|
||||
LetvPlaylistIE
|
||||
LetvPlaylistIE,
|
||||
LetvCloudIE,
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import (
|
||||
@ -352,6 +372,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .lnkgo import LnkGoIE
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import LRTIE
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
@ -362,6 +383,7 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makertv import MakerTVIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
@ -472,6 +494,7 @@ from .npo import (
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
SchoolTVIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
)
|
||||
@ -515,6 +538,7 @@ from .planetaplay import PlanetaPlayIE
|
||||
from .pladform import PladformIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .plays import PlaysTVIE
|
||||
from .playtvak import PlaytvakIE
|
||||
from .playvid import PlayvidIE
|
||||
from .playwire import PlaywireIE
|
||||
@ -574,6 +598,7 @@ from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .ruhd import RUHDIE
|
||||
from .ruleporn import RulePornIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
@ -720,6 +745,7 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trollvids import TrollvidsIE
|
||||
from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubitv import TubiTvIE
|
||||
@ -774,7 +800,7 @@ from .udemy import (
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .ultimedia import UltimediaIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
@ -803,7 +829,11 @@ from .videomore import (
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .vidme import VidmeIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
VidmeUserIE,
|
||||
VidmeUserLikesIE,
|
||||
)
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewster import ViewsterIE
|
||||
@ -853,6 +883,7 @@ from .webofstories import (
|
||||
WebOfStoriesPlaylistIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
@ -913,6 +944,7 @@ from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
)
|
||||
from .zippcast import ZippCastIE
|
||||
|
||||
_ALL_CLASSES = [
|
||||
klass
|
||||
|
@ -8,11 +8,7 @@ from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class ACastBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
class ACastIE(InfoExtractor):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
@ -23,14 +19,19 @@ class ACastIE(ACastBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'timestamp': 1196172000000,
|
||||
'description': 'md5:0c5d8201dfea2b93218ea986c91eee6e',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'duration': 211,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
cast_data = self._download_json(self._API_BASE_URL + 'channels/%s/acasts/%s/playback' % (channel, display_id), display_id)
|
||||
|
||||
embed_page = self._download_webpage(
|
||||
re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id)
|
||||
cast_data = self._parse_json(self._search_regex(
|
||||
r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'),
|
||||
display_id)['GetAcast/%s/%s' % (channel, display_id)]
|
||||
|
||||
return {
|
||||
'id': compat_str(cast_data['id']),
|
||||
@ -44,7 +45,7 @@ class ACastIE(ACastBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class ACastChannelIE(ACastBaseIE):
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
@ -56,6 +57,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
_API_BASE_URL = 'https://www.acast.com/api/'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
|
66
youtube_dl/extractor/aenetworks.py
Normal file
66
youtube_dl/extractor/aenetworks.py
Normal file
@ -0,0 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class AENetworksIE(InfoExtractor):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
'info_dict': {
|
||||
'id': 'g12m5Gyt3fdR',
|
||||
'ext': 'mp4',
|
||||
'title': "Bet You Didn't Know: Valentine's Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'expected_warnings': ['JSON-LD'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'info_dict': {
|
||||
'id': 'eg47EERs_JsZ',
|
||||
'ext': 'mp4',
|
||||
'title': "Winter Is Coming",
|
||||
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url_re = [
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
r"media_url\s*=\s*'([^']+)'"
|
||||
]
|
||||
video_url = self._search_regex(video_url_re, webpage, 'video url')
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}),
|
||||
})
|
||||
return info
|
@ -8,6 +8,8 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
xpath_element,
|
||||
)
|
||||
|
||||
|
||||
@ -31,7 +33,7 @@ class AllocineIE(InfoExtractor):
|
||||
'id': '19540403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
|
||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
@ -41,7 +43,7 @@ class AllocineIE(InfoExtractor):
|
||||
'id': '19544709',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
|
||||
'description': 'md5:601d15393ac40f249648ef000720e7e3',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
@ -59,14 +61,18 @@ class AllocineIE(InfoExtractor):
|
||||
if typ == 'film':
|
||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||
else:
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
|
||||
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
|
||||
if player:
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
else:
|
||||
model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
|
||||
model_data = self._parse_json(unescapeHTML(model), display_id)
|
||||
video_id = compat_str(model_data['id'])
|
||||
|
||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||
|
||||
video = xml.find('.//AcVisionVideo').attrib
|
||||
video = xpath_element(xml, './/AcVisionVideo').attrib
|
||||
quality = qualities(['ld', 'md', 'hd'])
|
||||
|
||||
formats = []
|
||||
|
@ -1,11 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class AnitubeIE(InfoExtractor):
|
||||
class AnitubeIE(NuevoBaseIE):
|
||||
IE_NAME = 'anitube.se'
|
||||
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
|
||||
|
||||
@ -22,38 +20,11 @@ class AnitubeIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
key = self._search_regex(
|
||||
r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
|
||||
|
||||
config_xml = self._download_xml(
|
||||
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
|
||||
|
||||
video_title = config_xml.find('title').text
|
||||
thumbnail = config_xml.find('image').text
|
||||
duration = float(config_xml.find('duration').text)
|
||||
|
||||
formats = []
|
||||
video_url = config_xml.find('file')
|
||||
if video_url is not None:
|
||||
formats.append({
|
||||
'format_id': 'sd',
|
||||
'url': video_url.text,
|
||||
})
|
||||
video_url = config_xml.find('filehd')
|
||||
if video_url is not None:
|
||||
formats.append({
|
||||
'format_id': 'hd',
|
||||
'url': video_url.text,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats
|
||||
}
|
||||
return self._extract_nuevo(
|
||||
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id)
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
qualities,
|
||||
)
|
||||
|
||||
@ -93,9 +94,18 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url')
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url')
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
@ -199,25 +209,19 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:future'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(thema|sujet)/.*?#article-anchor-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
|
||||
_TESTS = [{
|
||||
'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses',
|
||||
'info_dict': {
|
||||
'id': '5201',
|
||||
'id': '050940-028-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les champignons au secours de la planète',
|
||||
'upload_date': '20131101',
|
||||
'title': 'Les écrevisses aussi peuvent être anxieuses',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
anchor_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, anchor_id)
|
||||
row = self._search_regex(
|
||||
r'(?s)id="%s"[^>]*>.+?(<div[^>]*arte_vp_url[^>]*>)' % anchor_id,
|
||||
webpage, 'row')
|
||||
return self._extract_from_webpage(row, anchor_id, lang)
|
||||
}, {
|
||||
'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
@ -255,6 +259,23 @@ class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
}
|
||||
|
||||
|
||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:cinema'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://cinema.arte.tv/de/node/38291',
|
||||
'md5': '6b275511a5107c60bacbeeda368c3aa1',
|
||||
'info_dict': {
|
||||
'id': '055876-000_PWA12025-D',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tod auf dem Nil',
|
||||
'upload_date': '20160122',
|
||||
'description': 'md5:7f749bbb77d800ef2be11d54529b96bc',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
|
@ -132,11 +132,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
||||
m3u8_url = player.get('urlVideoHls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
timestamp = int_or_none(self._download_webpage(
|
||||
self._TIME_API_URL,
|
||||
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
||||
|
@ -3,7 +3,11 @@ from __future__ import unicode_literals
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
@ -91,3 +95,37 @@ class AzubuIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
user = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
|
||||
user)['data']
|
||||
if info['type'] != 'STREAM':
|
||||
raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
|
||||
req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
|
||||
bc_info = self._download_json(req, user)
|
||||
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': info['id'],
|
||||
'title': self._live_title(info['title']),
|
||||
'uploader_id': user,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'thumbnail': bc_info['poster'],
|
||||
}
|
||||
|
@ -193,6 +193,19 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# compact player (https://github.com/rg3/youtube-dl/issues/8147)
|
||||
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||
'info_dict': {
|
||||
'id': 'p028bfkj',
|
||||
'ext': 'flv',
|
||||
'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@ -482,9 +495,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title')
|
||||
(r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
|
||||
r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
(r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
|
||||
r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
|
||||
webpage, 'description', default=None)
|
||||
if not description:
|
||||
description = self._html_search_meta('description', webpage)
|
||||
@ -718,19 +733,10 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
timestamp = None
|
||||
playlist_title = None
|
||||
playlist_description = None
|
||||
|
||||
ld = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script type="application/ld\+json">(.+?)</script>',
|
||||
webpage, 'ld json', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
if ld:
|
||||
timestamp = parse_iso8601(ld.get('datePublished'))
|
||||
playlist_title = ld.get('headline')
|
||||
playlist_description = ld.get('articleBody')
|
||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
|
||||
timestamp = json_ld_info.get('timestamp')
|
||||
playlist_title = json_ld_info.get('title')
|
||||
playlist_description = json_ld_info.get('description')
|
||||
|
||||
if not timestamp:
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
|
@ -1,7 +1,13 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
@ -10,7 +16,8 @@ class BpbIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': '0792086e8e2bfbac9cdf27835d5f2093',
|
||||
# md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
@ -25,13 +32,26 @@ class BpbIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'(http://film\.bpb\.de/player/dokument_[0-9]+\.mp4)',
|
||||
webpage, 'video URL')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
|
||||
quality = video_info['quality']
|
||||
video_url = video_info['src']
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'preference': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
65
youtube_dl/extractor/canvas.py
Normal file
65
youtube_dl/extractor/canvas.py
Normal file
@ -0,0 +1,65 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
|
||||
'md5': 'ea838375a547ac787d4064d8c7860a6c',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
|
||||
'ext': 'mp4',
|
||||
'title': 'De afspraak veilt voor de Warmste Week',
|
||||
'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 49.02,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
if format_type == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, entry_protocol='m3u8_native',
|
||||
ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, display_id, f4m_id=format_type, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
'url': format_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration'), 1000),
|
||||
'thumbnail': data.get('posterImageUrl'),
|
||||
}
|
113
youtube_dl/extractor/cbc.py
Normal file
113
youtube_dl/extractor/cbc.py
Normal file
@ -0,0 +1,113 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||
'info_dict': {
|
||||
'id': '2682904050',
|
||||
'ext': 'flv',
|
||||
'title': 'Don Cherry – All-Stars',
|
||||
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||
'timestamp': 1454475540,
|
||||
'upload_date': '20160203',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with clipId
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'info_dict': {
|
||||
'id': '2487345465',
|
||||
'ext': 'flv',
|
||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '2680832926',
|
||||
'ext': 'flv',
|
||||
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '2658915080',
|
||||
'ext': 'flv',
|
||||
'title': 'Fly like an eagle!',
|
||||
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||
'upload_date': '19700101',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_init = self._search_regex(
|
||||
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
|
||||
default=None)
|
||||
if player_init:
|
||||
player_info = self._parse_json(player_init, display_id, js_to_json)
|
||||
media_id = player_info.get('mediaId')
|
||||
if not media_id:
|
||||
clip_id = player_info['clipId']
|
||||
media_id = self._download_json(
|
||||
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
|
||||
clip_id)['entries'][0]['id'].split('/')[-1]
|
||||
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
else:
|
||||
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'info_dict': {
|
||||
'id': '2683190193',
|
||||
'ext': 'flv',
|
||||
'title': 'Gerry Runs a Sweat Shop',
|
||||
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||
'timestamp': 1455067800,
|
||||
'upload_date': '20160210',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||
'ThePlatformFeed', video_id)
|
@ -1,16 +1,14 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_start
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class CBSNewsIE(InfoExtractor):
|
||||
class CBSNewsIE(ThePlatformIE):
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -31,59 +29,54 @@ class CBSNewsIE(InfoExtractor):
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'info_dict': {
|
||||
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 205,
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = json.loads(self._html_search_regex(
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
webpage, 'video JSON info'))
|
||||
webpage, 'video JSON info'), video_id)
|
||||
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
title = item.get('articleTitle') or item.get('hed')
|
||||
duration = item.get('duration')
|
||||
thumbnail = item.get('mediaImage') or item.get('thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
if 'mpxRefId' in video_info:
|
||||
subtitles['en'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'],
|
||||
}]
|
||||
|
||||
formats = []
|
||||
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
|
||||
uri = item.get('media' + format_id + 'URI')
|
||||
if not uri:
|
||||
pid = item.get('media' + format_id)
|
||||
if not pid:
|
||||
continue
|
||||
uri = remove_start(uri, '{manifest:none}')
|
||||
fmt = {
|
||||
'url': uri,
|
||||
'format_id': format_id,
|
||||
}
|
||||
if uri.startswith('rtmp'):
|
||||
play_path = re.sub(
|
||||
r'{slistFilePath}', '',
|
||||
uri.split('<break>')[-1].split('{break}')[-1])
|
||||
play_path = re.sub(
|
||||
r'{manifest:.+}.*$', '', play_path)
|
||||
fmt.update({
|
||||
'app': 'ondemand?auth=cbs',
|
||||
'play_path': 'mp4:' + play_path,
|
||||
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
|
||||
'page_url': 'http://www.cbsnews.com',
|
||||
'ext': 'flv',
|
||||
})
|
||||
elif uri.endswith('.m3u8'):
|
||||
fmt['ext'] = 'mp4'
|
||||
formats.append(fmt)
|
||||
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -91,4 +84,43 @@ class CBSNewsIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
'ext': 'flv',
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||
|
||||
hdcore_sign = 'hdcore=3.3.1'
|
||||
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||
if f4m_formats:
|
||||
for entry in f4m_formats:
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['headline'],
|
||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||
'duration': parse_duration(video_info.get('segmentDur')),
|
||||
'formats': f4m_formats,
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
|
||||
'info_dict': {
|
||||
'id': 'miranda-sings-happy-thanksgiving-miranda',
|
||||
'id': '2494164',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141127',
|
||||
'timestamp': 1417107600,
|
||||
'duration': 1232,
|
||||
'title': 'Happy Thanksgiving Miranda',
|
||||
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
|
||||
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires ffmpeg',
|
||||
@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
|
||||
r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
|
||||
display_id)['videoData']
|
||||
|
||||
video_id = full_data['activeVideo']['video']
|
||||
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
|
||||
display_id = full_data['activeVideo']['video']
|
||||
video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
|
||||
video_id = compat_str(video_data['mediaId'])
|
||||
thumbnails = [{
|
||||
'url': video_data['images']['thumb'],
|
||||
}, {
|
||||
'url': video_data['images']['poster'],
|
||||
}]
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['mediaUrl'], video_id, ext='mp4')
|
||||
|
||||
timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
|
||||
video_data.get('pubDate'))
|
||||
@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
|
||||
video_data.get('duration'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'crackle:%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_data['title'],
|
||||
@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ import re
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
@ -34,6 +35,7 @@ from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
sanitized_Request,
|
||||
@ -43,6 +45,7 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@ -762,6 +765,42 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
def _search_json_ld(self, html, video_id, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
if not json_ld:
|
||||
return {}
|
||||
return self._json_ld(json_ld, video_id, fatal=kwargs.get('fatal', True))
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True):
|
||||
if isinstance(json_ld, compat_str):
|
||||
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
|
||||
if not json_ld:
|
||||
return {}
|
||||
info = {}
|
||||
if json_ld.get('@context') == 'http://schema.org':
|
||||
item_type = json_ld.get('@type')
|
||||
if item_type == 'TVEpisode':
|
||||
info.update({
|
||||
'episode': unescapeHTML(json_ld.get('name')),
|
||||
'episode_number': int_or_none(json_ld.get('episodeNumber')),
|
||||
'description': unescapeHTML(json_ld.get('description')),
|
||||
})
|
||||
part_of_season = json_ld.get('partOfSeason')
|
||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||
part_of_series = json_ld.get('partOfSeries')
|
||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||
elif item_type == 'Article':
|
||||
info.update({
|
||||
'timestamp': parse_iso8601(json_ld.get('datePublished')),
|
||||
'title': unescapeHTML(json_ld.get('headline')),
|
||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
||||
})
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
@ -788,6 +827,12 @@ class InfoExtractor(object):
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found')
|
||||
|
||||
for f in formats:
|
||||
# Automatically determine tbr when missing based on abr and vbr (improves
|
||||
# formats sorting in some cases)
|
||||
if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
|
||||
f['tbr'] = f['abr'] + f['vbr']
|
||||
|
||||
def _formats_key(f):
|
||||
# TODO remove the following workaround
|
||||
from ..utils import determine_ext
|
||||
@ -977,6 +1022,18 @@ class InfoExtractor(object):
|
||||
return []
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
# A Media Playlist Tag MUST NOT appear in a Master Playlist
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# The EXT-X-TARGETDURATION tag is REQUIRED for every M3U8 Media Playlists
|
||||
# https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc:
|
||||
return [{
|
||||
'url': m3u8_url,
|
||||
'format_id': m3u8_id,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
last_info = None
|
||||
last_media = None
|
||||
kv_rex = re.compile(
|
||||
@ -1021,9 +1078,9 @@ class InfoExtractor(object):
|
||||
# TODO: looks like video codec is not always necessarily goes first
|
||||
va_codecs = codecs.split(',')
|
||||
if va_codecs[0]:
|
||||
f['vcodec'] = va_codecs[0].partition('.')[0]
|
||||
f['vcodec'] = va_codecs[0]
|
||||
if len(va_codecs) > 1 and va_codecs[1]:
|
||||
f['acodec'] = va_codecs[1].partition('.')[0]
|
||||
f['acodec'] = va_codecs[1]
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
@ -1127,12 +1184,15 @@ class InfoExtractor(object):
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
||||
srcs = []
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
if not src or src in srcs:
|
||||
continue
|
||||
srcs.append(src)
|
||||
|
||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
@ -1164,10 +1224,20 @@ class InfoExtractor(object):
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||
if len(m3u8_formats) == 1:
|
||||
m3u8_count += 1
|
||||
m3u8_formats[0].update({
|
||||
'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
@ -1200,11 +1270,13 @@ class InfoExtractor(object):
|
||||
return formats
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
subtitles = {}
|
||||
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
|
||||
src = textstream.get('src')
|
||||
if not src:
|
||||
if not src or src in urls:
|
||||
continue
|
||||
urls.append(src)
|
||||
ext = textstream.get('ext') or determine_ext(src)
|
||||
if not ext:
|
||||
type_ = textstream.get('type')
|
||||
@ -1265,6 +1337,163 @@ class InfoExtractor(object):
|
||||
})
|
||||
return entries
|
||||
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
|
||||
res = self._download_webpage_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
errnote=errnote or 'Failed to download MPD manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
if mpd_doc.get('type') == 'dynamic':
|
||||
return []
|
||||
|
||||
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
||||
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, namespace)
|
||||
|
||||
def is_drm_protected(element):
|
||||
return element.find(_add_ns('ContentProtection')) is not None
|
||||
|
||||
def extract_multisegment_info(element, ms_parent_info):
|
||||
ms_info = ms_parent_info.copy()
|
||||
segment_list = element.find(_add_ns('SegmentList'))
|
||||
if segment_list is not None:
|
||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||
if segment_urls_e:
|
||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||
initialization = segment_list.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
else:
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
start_number = segment_template.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
for s in s_e:
|
||||
ms_info['total_number'] += 1 + int(s.get('r', '0'))
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = segment_template.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
initialization = segment_template.get('initialization')
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
else:
|
||||
initialization = segment_template.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
return ms_info
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats = []
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||
period_ms_info = extract_multisegment_info(period, {
|
||||
'start_number': 1,
|
||||
'timescale': 1,
|
||||
})
|
||||
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
|
||||
if is_drm_protected(adaptation_set):
|
||||
continue
|
||||
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
|
||||
for representation in adaptation_set.findall(_add_ns('Representation')):
|
||||
if is_drm_protected(representation):
|
||||
continue
|
||||
representation_attrib = adaptation_set.attrib.copy()
|
||||
representation_attrib.update(representation.attrib)
|
||||
mime_type = representation_attrib.get('mimeType')
|
||||
content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType')
|
||||
if content_type == 'text':
|
||||
# TODO implement WebVTT downloading
|
||||
pass
|
||||
elif content_type == 'video' or content_type == 'audio':
|
||||
base_url = ''
|
||||
for element in (representation, adaptation_set, period, mpd_doc):
|
||||
base_url_e = element.find(_add_ns('BaseURL'))
|
||||
if base_url_e is not None:
|
||||
base_url = base_url_e.text + base_url
|
||||
if re.match(r'^https?://', base_url):
|
||||
break
|
||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
||||
mpd_base_url += '/'
|
||||
base_url = mpd_base_url + base_url
|
||||
representation_id = representation_attrib.get('id')
|
||||
lang = representation_attrib.get('lang')
|
||||
url_el = representation.find(_add_ns('BaseURL'))
|
||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
|
||||
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template)
|
||||
media_template.replace('$$', '$')
|
||||
representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
if fo['format_id'] == representation_id)
|
||||
except StopIteration:
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
else:
|
||||
existing_format.update(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
|
95
youtube_dl/extractor/crackle.py
Normal file
95
youtube_dl/extractor/crackle.py
Normal file
@ -0,0 +1,95 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crackle.com/the-art-of-more/2496419',
|
||||
'info_dict': {
|
||||
'id': '2496419',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heavy Lies the Head',
|
||||
'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
|
||||
_SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
|
||||
_UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
|
||||
_THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
|
||||
|
||||
# extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'c544.flv': {
|
||||
'width': 544,
|
||||
'height': 306,
|
||||
},
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 478,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
item = self._download_xml(
|
||||
'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id,
|
||||
video_id).find('i')
|
||||
title = item.attrib['t']
|
||||
|
||||
thumbnail = None
|
||||
subtitles = {}
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id),
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=None)
|
||||
path = item.attrib.get('p')
|
||||
if path:
|
||||
thumbnail = self._THUMBNAIL_TEMPLATE % path
|
||||
http_base_url = 'http://ahttp.crackle.com/' + path
|
||||
for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
|
||||
formats.append({
|
||||
'url': http_base_url + mfs_path,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
for cc in item.findall('cc'):
|
||||
locale = cc.attrib.get('l')
|
||||
v = cc.attrib.get('v')
|
||||
if locale and v:
|
||||
if locale not in subtitles:
|
||||
subtitles[locale] = []
|
||||
subtitles[locale] = [{
|
||||
'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': item.attrib.get('d'),
|
||||
'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
|
||||
'series': item.attrib.get('sn'),
|
||||
'season_number': int_or_none(item.attrib.get('se')),
|
||||
'episode_number': int_or_none(item.attrib.get('ep')),
|
||||
'thumbnail': thumbnail,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@ -68,11 +68,16 @@ class CSpanIE(InfoExtractor):
|
||||
video_type, video_id = matches.groups()
|
||||
video_type = 'clip' if video_type == 'id' else 'program'
|
||||
else:
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
title = self._og_search_title(webpage)
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
m = re.search(r'data-(?P<type>clip|prog)id=["\'](?P<id>\d+)', webpage)
|
||||
if m:
|
||||
video_id = m.group('id')
|
||||
video_type = 'program' if m.group('type') == 'prog' else 'clip'
|
||||
else:
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
title = self._og_search_title(webpage)
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
if video_type is None or video_id is None:
|
||||
raise ExtractorError('unable to find video id and type')
|
||||
|
||||
@ -107,6 +112,13 @@ class CSpanIE(InfoExtractor):
|
||||
'height': int_or_none(get_text_attr(quality, 'height')),
|
||||
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
||||
})
|
||||
if not formats:
|
||||
path = unescapeHTML(get_text_attr(f, 'path'))
|
||||
if not path:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
|
63
youtube_dl/extractor/cultureunplugged.py
Normal file
63
youtube_dl/extractor/cultureunplugged.py
Normal file
@ -0,0 +1,63 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P<id>\d+)(?:/(?P<display_id>[^/]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West',
|
||||
'md5': 'ac6c093b089f7d05e79934dcb3d228fc',
|
||||
'info_dict': {
|
||||
'id': '53662',
|
||||
'display_id': 'The-Next--Best-West',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Next, Best West',
|
||||
'description': 'md5:0423cd00833dea1519cf014e9d0903b1',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'creator': 'Coldstream Creative',
|
||||
'duration': 2203,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
movie_data = self._download_json(
|
||||
'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id)
|
||||
|
||||
video_url = movie_data['url']
|
||||
title = movie_data['title']
|
||||
|
||||
description = movie_data.get('synopsis')
|
||||
creator = movie_data.get('producer')
|
||||
duration = int_or_none(movie_data.get('duration'))
|
||||
view_count = int_or_none(movie_data.get('views'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': movie_data['%s_thumb' % size],
|
||||
'id': size,
|
||||
'preference': preference,
|
||||
} for preference, size in enumerate((
|
||||
'small', 'large')) if movie_data.get('%s_thumb' % size)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'creator': creator,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
88
youtube_dl/extractor/cwtv.py
Normal file
88
youtube_dl/extractor/cwtv.py
Normal file
@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'ext': 'mp4',
|
||||
'title': 'Legends of Yesterday',
|
||||
'description': 'Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote location to keep them hidden from Vandal Savage while they figure out how to defeat him.',
|
||||
'duration': 2665,
|
||||
'series': 'Arrow',
|
||||
'season_number': 4,
|
||||
'season': '4',
|
||||
'episode_number': 8,
|
||||
'upload_date': '20151203',
|
||||
'timestamp': 1449122100,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
|
||||
'info_dict': {
|
||||
'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jeff Davis 4',
|
||||
'description': 'Jeff Davis is back to make you laugh.',
|
||||
'duration': 1263,
|
||||
'series': 'Whose Line Is It Anyway?',
|
||||
'season_number': 11,
|
||||
'season': '11',
|
||||
'episode_number': 20,
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
|
||||
|
||||
thumbnails = [{
|
||||
'url': image['uri'],
|
||||
'width': image.get('width'),
|
||||
'height': image.get('height'),
|
||||
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
||||
|
||||
video_metadata = video_data['assetFields']
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': video_metadata['UnicornCcUrl'],
|
||||
}],
|
||||
} if video_metadata.get('UnicornCcUrl') else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_metadata['title'],
|
||||
'description': video_metadata.get('description'),
|
||||
'duration': int_or_none(video_metadata.get('duration')),
|
||||
'series': video_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(video_metadata.get('seasonNumber')),
|
||||
'season': video_metadata.get('seasonName'),
|
||||
'episode_number': int_or_none(video_metadata.get('episodeNumber')),
|
||||
'timestamp': parse_iso8601(video_data.get('startTime')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -37,7 +37,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
|
||||
_VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:embed|swf|#)/)?video/(?P<id>[^/?_]+)'
|
||||
IE_NAME = 'dailymotion'
|
||||
|
||||
_FORMATS = [
|
||||
@ -104,6 +104,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@ -336,7 +340,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
|
@ -2,17 +2,26 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
xpath_text,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P<id>[^?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
|
||||
IE_NAME = 'daum.net'
|
||||
|
||||
_TESTS = [{
|
||||
@ -23,25 +32,57 @@ class DaumIE(InfoExtractor):
|
||||
'title': '마크 헌트 vs 안토니오 실바',
|
||||
'description': 'Mark Hunt vs Antonio Silva',
|
||||
'upload_date': '20131217',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 2117,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/v/65139429',
|
||||
'info_dict': {
|
||||
'id': '65139429',
|
||||
'ext': 'mp4',
|
||||
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
|
||||
'description': 'md5:79794514261164ff27e36a21ad229fc5',
|
||||
'upload_date': '20150604',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 154,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
|
||||
'info_dict': {
|
||||
'id': 'vwIpVpCQsT8$',
|
||||
'ext': 'flv',
|
||||
'title': '01-Korean War ( Trouble on the horizon )',
|
||||
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
|
||||
'upload_date': '20080223',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 249,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||
query = compat_urllib_parse.urlencode({'vid': video_id})
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
'Downloading video info')
|
||||
movie_data = self._download_json(
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||
'Downloading video info')
|
||||
|
||||
formats = []
|
||||
for format_el in movie_data['output_list']['output_list']:
|
||||
profile = format_el['profile']
|
||||
@ -76,8 +117,9 @@ class DaumIE(InfoExtractor):
|
||||
|
||||
|
||||
class DaumClipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||
IE_NAME = 'daum.net:clip'
|
||||
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
@ -87,11 +129,19 @@ class DaumClipIE(InfoExtractor):
|
||||
'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||
'upload_date': '20130831',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 3868,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
clip_info = self._download_json(
|
||||
@ -102,7 +152,7 @@ class DaumClipIE(InfoExtractor):
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
||||
'title': clip_info['title'],
|
||||
'title': unescapeHTML(clip_info['title']),
|
||||
'thumbnail': clip_info.get('thumb_url'),
|
||||
'description': clip_info.get('contents'),
|
||||
'duration': int_or_none(clip_info.get('duration')),
|
||||
@ -110,3 +160,139 @@ class DaumClipIE(InfoExtractor):
|
||||
'view_count': int_or_none(clip_info.get('play_count')),
|
||||
'ie_key': 'Daum',
|
||||
}
|
||||
|
||||
|
||||
class DaumListIE(InfoExtractor):
|
||||
def _get_entries(self, list_id, list_id_type):
|
||||
name = None
|
||||
entries = []
|
||||
for pagenum in itertools.count(1):
|
||||
list_info = self._download_json(
|
||||
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
|
||||
pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
|
||||
|
||||
entries.extend([
|
||||
self.url_result(
|
||||
'http://tvpot.daum.net/v/%s' % clip['vid'])
|
||||
for clip in list_info['clip_list']
|
||||
])
|
||||
|
||||
if not name:
|
||||
name = list_info.get('playlist_bean', {}).get('name') or \
|
||||
list_info.get('potInfo', {}).get('name')
|
||||
|
||||
if not list_info.get('has_more'):
|
||||
break
|
||||
|
||||
return name, entries
|
||||
|
||||
def _check_clip(self, url, list_id):
|
||||
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||
if 'clipid' in query_dict:
|
||||
clip_id = query_dict['clipid'][0]
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
|
||||
return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
|
||||
|
||||
|
||||
class DaumPlaylistIE(DaumListIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
|
||||
IE_NAME = 'daum.net:playlist'
|
||||
_URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
|
||||
|
||||
_TESTS = [{
|
||||
'note': 'Playlist url with clipid',
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||
'info_dict': {
|
||||
'id': '6213966',
|
||||
'title': 'Woorissica Official',
|
||||
},
|
||||
'playlist_mincount': 181
|
||||
}, {
|
||||
'note': 'Playlist url with clipid - noplaylist',
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
|
||||
'info_dict': {
|
||||
'id': '73806844',
|
||||
'ext': 'mp4',
|
||||
'title': '151017 Airport',
|
||||
'upload_date': '20160117',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
clip_result = self._check_clip(url, list_id)
|
||||
if clip_result:
|
||||
return clip_result
|
||||
|
||||
name, entries = self._get_entries(list_id, 'playlistid')
|
||||
|
||||
return self.playlist_result(entries, list_id, name)
|
||||
|
||||
|
||||
class DaumUserIE(DaumListIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
|
||||
IE_NAME = 'daum.net:user'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
|
||||
'info_dict': {
|
||||
'id': 'o2scDLIVbHc0',
|
||||
'title': '마이 리틀 텔레비전',
|
||||
},
|
||||
'playlist_mincount': 213
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
|
||||
'info_dict': {
|
||||
'id': '73801156',
|
||||
'ext': 'mp4',
|
||||
'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
|
||||
'upload_date': '20160117',
|
||||
'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
|
||||
'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
|
||||
'info_dict': {
|
||||
'id': '6196631',
|
||||
'title': '마이 리틀 텔레비전 - 20160109',
|
||||
},
|
||||
'playlist_count': 11
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
clip_result = self._check_clip(url, list_id)
|
||||
if clip_result:
|
||||
return clip_result
|
||||
|
||||
query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||
if 'playlistid' in query_dict:
|
||||
playlist_id = query_dict['playlistid'][0]
|
||||
return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
|
||||
|
||||
name, entries = self._get_entries(list_id, 'ownerid')
|
||||
|
||||
return self.playlist_result(entries, list_id, name)
|
||||
|
@ -7,9 +7,9 @@ from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class UltimediaIE(InfoExtractor):
|
||||
class DigitekaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?ultimedia\.com/
|
||||
https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
|
||||
(?:
|
||||
deliver/
|
||||
(?P<embed_type>
|
||||
@ -56,6 +56,9 @@ class UltimediaIE(InfoExtractor):
|
||||
'timestamp': 1424760500,
|
||||
'uploader_id': '3rfzk',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
@ -91,7 +91,7 @@ class DRTVIE(InfoExtractor):
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
LANGS = {
|
||||
'Danish': 'dk',
|
||||
'Danish': 'da',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
|
@ -53,8 +53,8 @@ class ESPNIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'class="video-play-button"[^>]+data-id="(\d+)',
|
||||
webpage, 'video id')
|
||||
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
cms = 'espn'
|
||||
if 'data-source="intl"' in webpage:
|
||||
|
@ -6,9 +6,11 @@ import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
)
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
@ -23,19 +25,30 @@ from ..utils import (
|
||||
|
||||
class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:\w+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=|
|
||||
[^/]+/videos/(?:[^/]+/)?
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?:.*)'''
|
||||
(?:
|
||||
https?://
|
||||
(?:\w+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
video/embed
|
||||
)\?(?:.*?)(?:v|video_id)=|
|
||||
[^/]+/videos/(?:[^/]+/)?
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
'''
|
||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = 'facebook'
|
||||
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
'md5': '6a40d33c0eccbb1af76cf0485a052659',
|
||||
@ -57,6 +70,16 @@ class FacebookIE(InfoExtractor):
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
]
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
'md5': '54706e4db4f5ad58fbad82dde1f1213f',
|
||||
'info_dict': {
|
||||
'id': '957955867617029',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
|
||||
'uploader': 'Demy de Zeeuw',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@ -66,6 +89,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'facebook:544765982287235',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@ -136,13 +162,36 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
|
||||
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_data = None
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});\n'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
|
||||
if not m:
|
||||
if m:
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data = json.loads(params_raw)['video_data']
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
for item in video_data:
|
||||
format_id = item['stream_type']
|
||||
ret.setdefault(format_id, []).append(item)
|
||||
return ret
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
|
||||
for item in server_js_data['instances']:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
break
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
@ -150,12 +199,9 @@ class FacebookIE(InfoExtractor):
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
|
||||
formats = []
|
||||
for format_id, f in params['video_data'].items():
|
||||
for format_id, f in video_data.items():
|
||||
if not f or not isinstance(f, list):
|
||||
continue
|
||||
for quality in ('sd', 'hd'):
|
||||
@ -167,9 +213,15 @@ class FacebookIE(InfoExtractor):
|
||||
'url': src,
|
||||
'preference': -10 if format_id == 'progressive' else 0,
|
||||
})
|
||||
dash_manifest = f[0].get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage, 'title',
|
||||
default=None)
|
||||
@ -188,3 +240,33 @@ class FacebookIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
class FacebookPostIE(InfoExtractor):
|
||||
IE_NAME = 'facebook:post'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
|
||||
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
|
||||
'info_dict': {
|
||||
'id': '544765982287235',
|
||||
'ext': 'mp4',
|
||||
'title': '"What are you doing running in the snow?"',
|
||||
'uploader': 'FailArmy',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, post_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||
for video_id in self._parse_json(
|
||||
self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
||||
webpage, 'video ids', group='ids'),
|
||||
post_id)]
|
||||
|
||||
return self.playlist_result(entries, post_id)
|
||||
|
@ -9,6 +9,7 @@ class FOXIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||
'info_dict': {
|
||||
'id': '255180355939',
|
||||
'ext': 'mp4',
|
||||
@ -17,10 +18,6 @@ class FOXIE(InfoExtractor):
|
||||
'duration': 129,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -29,7 +26,7 @@ class FOXIE(InfoExtractor):
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url'] + '&manifest=m3u'
|
||||
video_id)['release_url'] + '&switch=http'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
@ -6,24 +6,29 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
# YouTube embed video
|
||||
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
'md5': '5208d3a17adeaef829a7861887cb9029',
|
||||
'info_dict': {
|
||||
'id': 'phoenix-wright-ace-attorney-dual-destinies-review',
|
||||
'id': 'HkSQKetlGOU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
||||
'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review',
|
||||
'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ',
|
||||
'uploader': 'Gamekings Vault',
|
||||
'upload_date': '20151123',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# vimeo video
|
||||
'url': 'http://www.gamekings.tv/videos/the-legend-of-zelda-majoras-mask/',
|
||||
'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/',
|
||||
'md5': '12bf04dfd238e70058046937657ea68d',
|
||||
'info_dict': {
|
||||
'id': 'the-legend-of-zelda-majoras-mask',
|
||||
@ -33,7 +38,7 @@ class GamekingsIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.gamekings.tv/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||
'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -43,7 +48,11 @@ class GamekingsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'gogoVideo\(\s*\d+\s*,\s*"([^"]+)', webpage, 'playlist id')
|
||||
r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id')
|
||||
|
||||
# Check if a YouTube embed is used
|
||||
if YoutubeIE.suitable(playlist_id):
|
||||
return self.url_result(playlist_id, ie='Youtube')
|
||||
|
||||
playlist = self._download_xml(
|
||||
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
||||
|
@ -57,7 +57,7 @@ from .pladform import PladformIE
|
||||
from .videomore import VideomoreIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .ultimedia import UltimediaIE
|
||||
from .digiteka import DigitekaIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -224,6 +224,20 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# MPD from http://dash-mse-test.appspot.com/media.html
|
||||
{
|
||||
'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
|
||||
'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
|
||||
'info_dict': {
|
||||
'id': 'car-20120827-manifest',
|
||||
'ext': 'mp4',
|
||||
'title': 'car-20120827-manifest',
|
||||
'formats': 'mincount:9',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
},
|
||||
# google redirect
|
||||
{
|
||||
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
|
||||
@ -1229,19 +1243,24 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = head_response.headers.get('Content-Type', '')
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
|
||||
if m:
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
formats = []
|
||||
if m.group('format_id').endswith('mpegurl'):
|
||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||
else:
|
||||
formats = [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'direct': True,
|
||||
'formats': [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||
}],
|
||||
'formats': formats,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
@ -1284,7 +1303,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed, a SMIL file or a XSPF playlist?
|
||||
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||
try:
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
if doc.tag == 'rss':
|
||||
@ -1293,6 +1312,13 @@ class GenericIE(InfoExtractor):
|
||||
return self._parse_smil(doc, url, video_id)
|
||||
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'formats': self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0]),
|
||||
}
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
@ -1402,7 +1428,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
@ -1814,10 +1840,21 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
|
||||
|
||||
# Look for Ulltimedia embeds
|
||||
ultimedia_url = UltimediaIE._extract_url(webpage)
|
||||
if ultimedia_url:
|
||||
return self.url_result(self._proto_relative_url(ultimedia_url), 'Ultimedia')
|
||||
# Look for Digiteka embeds
|
||||
digiteka_url = DigitekaIE._extract_url(webpage)
|
||||
if digiteka_url:
|
||||
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
return self.url_result('limelight:%s:%s' % (
|
||||
lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2))
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
@ -1935,6 +1972,8 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
|
||||
else:
|
||||
entry_info_dict['url'] = video_url
|
||||
|
||||
|
@ -1,31 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class HistoryIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
|
||||
'info_dict': {
|
||||
'id': 'bLx5Dv5Aka1G',
|
||||
'ext': 'mp4',
|
||||
'title': "Bet You Didn't Know: Valentine's Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
webpage, 'video url')
|
||||
|
||||
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
|
@ -159,6 +159,9 @@ class HitboxLiveIE(HitboxIE):
|
||||
cdns = player_config.get('cdns')
|
||||
servers = []
|
||||
for cdn in cdns:
|
||||
# Subscribe URLs are not playable
|
||||
if cdn.get('rtmpSubscribe') is True:
|
||||
continue
|
||||
base_url = cdn.get('netConnectionUrl')
|
||||
host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
|
||||
if base_url not in servers:
|
||||
|
@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class HotStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s'
|
||||
_GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s'
|
||||
|
@ -21,6 +21,18 @@ class InstagramIE(InfoExtractor):
|
||||
'title': 'Video by naomipq',
|
||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||
}
|
||||
}, {
|
||||
# missing description
|
||||
'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears',
|
||||
'info_dict': {
|
||||
'id': 'BA-pQFBG8HZ',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'britneyspears',
|
||||
'title': 'Video by britneyspears',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
@ -32,8 +44,8 @@ class InstagramIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
||||
fatal=False)
|
||||
desc = self._search_regex(
|
||||
r'"caption":"(.+?)"', webpage, 'description', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -2,46 +2,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from random import random
|
||||
from math import floor
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
||||
'info_dict': {
|
||||
'id': 'p136534',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gondíci s. r. o. (34)',
|
||||
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/particka-92',
|
||||
'info_dict': {
|
||||
'id': '39152',
|
||||
'ext': 'flv',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45',
|
||||
'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/particka/tchibo-particka-jarni-moda',
|
||||
'info_dict': {
|
||||
'id': '9718337',
|
||||
'ext': 'flv',
|
||||
'title': 'Tchibo Partička - Jarní móda',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
}, {
|
||||
'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -51,62 +35,24 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
|
||||
raise ExtractorError(
|
||||
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
|
||||
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||
|
||||
player_url = (
|
||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||
(floor(random() * 1073741824), floor(random() * 1073741824))
|
||||
)
|
||||
|
||||
req = sanitized_Request(player_url)
|
||||
req = sanitized_Request(
|
||||
'http://play.iprima.cz/prehravac/init?_infuse=1'
|
||||
'&_ts=%s&productId=%s' % (round(time.time()), video_id))
|
||||
req.add_header('Referer', url)
|
||||
playerpage = self._download_webpage(req, video_id)
|
||||
playerpage = self._download_webpage(req, video_id, note='Downloading player')
|
||||
|
||||
base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
|
||||
m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
|
||||
|
||||
zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
|
||||
if zoneGEO != '0':
|
||||
base_url = base_url.replace('token', 'token_' + zoneGEO)
|
||||
|
||||
formats = []
|
||||
for format_id in ['lq', 'hq', 'hd']:
|
||||
filename = self._html_search_regex(
|
||||
r'"%s_id":(.+?),' % format_id, webpage, 'filename')
|
||||
|
||||
if filename == 'null':
|
||||
continue
|
||||
|
||||
real_id = self._search_regex(
|
||||
r'Prima-(?:[0-9]{10}|WEB)-([0-9]+)[-_]',
|
||||
filename, 'real video id')
|
||||
|
||||
if format_id == 'lq':
|
||||
quality = 0
|
||||
elif format_id == 'hq':
|
||||
quality = 1
|
||||
elif format_id == 'hd':
|
||||
quality = 2
|
||||
filename = 'hq/' + filename
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'quality': quality,
|
||||
'play_path': 'mp4:' + filename.replace('"', '')[:-4],
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'),
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._search_regex(
|
||||
r'<p[^>]+itemprop="description"[^>]*>([^<]+)',
|
||||
webpage, 'description', default=None),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
@ -214,8 +214,8 @@ class IqiyiIE(InfoExtractor):
|
||||
|
||||
def get_enc_key(self, swf_url, video_id):
|
||||
# TODO: automatic key extraction
|
||||
# last update at 2015-12-18 for Zombie::bite
|
||||
enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1]
|
||||
# last update at 2016-01-22 for Zombie::bite
|
||||
enc_key = '6ab6d0280511493ba85594779759d4ed'
|
||||
return enc_key
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -49,7 +49,7 @@ class KanalPlayIE(InfoExtractor):
|
||||
subs = self._download_json(
|
||||
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
||||
video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||
return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -2,12 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class KickStarterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
|
||||
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
|
||||
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
||||
'info_dict': {
|
||||
'id': '1404461844',
|
||||
@ -27,7 +28,8 @@ class KickStarterIE(InfoExtractor):
|
||||
'uploader_id': 'pebble',
|
||||
'uploader': 'Pebble Technology',
|
||||
'title': 'Pebble iOS Notifications',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html',
|
||||
'info_dict': {
|
||||
@ -43,7 +45,7 @@ class KickStarterIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>\s*(.*?)(?:\s*— Kickstarter)?\s*</title>',
|
||||
r'<title>\s*(.*?)(?:\s*—\s*Kickstarter)?\s*</title>',
|
||||
webpage, 'title')
|
||||
video_url = self._search_regex(
|
||||
r'data-video-url="(.*?)"',
|
||||
@ -52,7 +54,7 @@ class KickStarterIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Generic',
|
||||
'url': url,
|
||||
'url': smuggle_url(url, {'to_generic': True}),
|
||||
'title': title,
|
||||
}
|
||||
|
||||
|
107
youtube_dl/extractor/konserthusetplay.py
Normal file
107
youtube_dl/extractor/konserthusetplay.py
Normal file
@ -0,0 +1,107 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KonserthusetPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?konserthusetplay\.se/\?.*\bm=(?P<id>[^&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
|
||||
'info_dict': {
|
||||
'id': 'CKDDnlCY-dhWAAqiMERd-A',
|
||||
'ext': 'flv',
|
||||
'title': 'Orkesterns instrument: Valthornen',
|
||||
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'duration': 398.8,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
e = self._search_regex(
|
||||
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
|
||||
|
||||
rest = self._download_json(
|
||||
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
|
||||
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
media = rest['media']
|
||||
player_config = media['playerconfig']
|
||||
playlist = player_config['playlist']
|
||||
|
||||
source = next(f for f in playlist if f.get('bitrates'))
|
||||
|
||||
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
|
||||
|
||||
formats = []
|
||||
|
||||
fallback_url = source.get('fallbackUrl')
|
||||
fallback_format_id = None
|
||||
if fallback_url:
|
||||
fallback_format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
|
||||
|
||||
connection_url = (player_config.get('rtmp', {}).get(
|
||||
'netConnectionUrl') or player_config.get(
|
||||
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
|
||||
if connection_url:
|
||||
for f in source['bitrates']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = self._search_regex(
|
||||
FORMAT_ID_REGEX, video_url, 'format id', default=None)
|
||||
f_common = {
|
||||
'vbr': int_or_none(f.get('bitrate')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
}
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': connection_url,
|
||||
'play_path': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(f)
|
||||
if format_id and format_id == fallback_format_id:
|
||||
f = f_common.copy()
|
||||
f.update({
|
||||
'url': fallback_url,
|
||||
'format_id': 'http-%s' % format_id if format_id else 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
if not formats and fallback_url:
|
||||
formats.append({
|
||||
'url': fallback_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = player_config.get('title') or media['title']
|
||||
description = player_config.get('mediaInfo', {}).get('description')
|
||||
thumbnail = media.get('image')
|
||||
duration = float_or_none(media.get('duration'), 1000)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@ -31,6 +31,10 @@ class KuwoBaseIE(InfoExtractor):
|
||||
(file_format['ext'], file_format.get('br', ''), song_id),
|
||||
song_id, note='Download %s url info' % file_format['format'],
|
||||
)
|
||||
|
||||
if song_url == 'IPDeny':
|
||||
raise ExtractorError('This song is blocked in this region', expected=True)
|
||||
|
||||
if song_url.startswith('http://') or song_url.startswith('https://'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
|
34
youtube_dl/extractor/lemonde.py
Normal file
34
youtube_dl/extractor/lemonde.py
Normal file
@ -0,0 +1,34 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LemondeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?lemonde\.fr/(?:[^/]+/)*(?P<id>[^/]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lemonde.fr/police-justice/video/2016/01/19/comprendre-l-affaire-bygmalion-en-cinq-minutes_4849702_1653578.html',
|
||||
'md5': '01fb3c92de4c12c573343d63e163d302',
|
||||
'info_dict': {
|
||||
'id': 'lqm3kl',
|
||||
'ext': 'mp4',
|
||||
'title': "Comprendre l'affaire Bygmalion en 5 minutes",
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'duration': 320,
|
||||
'upload_date': '20160119',
|
||||
'timestamp': 1453194778,
|
||||
'uploader_id': '3pmkp',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://redaction.actu.lemonde.fr/societe/video/2016/01/18/calais-debut-des-travaux-de-defrichement-dans-la-jungle_4849233_3224.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
digiteka_url = self._proto_relative_url(self._search_regex(
|
||||
r'url\s*:\s*(["\'])(?P<url>(?:https?://)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/.+?)\1',
|
||||
webpage, 'digiteka url', group='url'))
|
||||
return self.url_result(digiteka_url, 'Digiteka')
|
@ -4,11 +4,14 @@ from __future__ import unicode_literals
|
||||
import datetime
|
||||
import re
|
||||
import time
|
||||
import base64
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_ord,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@ -16,7 +19,9 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
encode_data_uri,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@ -239,3 +244,113 @@ class LetvPlaylistIE(LetvTvIE):
|
||||
},
|
||||
'playlist_mincount': 7
|
||||
}]
|
||||
|
||||
|
||||
class LetvCloudIE(InfoExtractor):
|
||||
IE_DESC = '乐视云'
|
||||
_VALID_URL = r'https?://yuntv\.letv\.com/bcloud.html\?.+'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=467623dedf',
|
||||
'md5': '26450599afd64c513bc77030ad15db44',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_467623dedf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video p7jnfw5hw9_467623dedf',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=ec93197892&pu=2c7cd40209&auto_play=1&gpcflag=1&width=640&height=360',
|
||||
'md5': 'e03d9cc8d9c13191e1caf277e42dbd31',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_ec93197892',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video p7jnfw5hw9_ec93197892',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://yuntv.letv.com/bcloud.html?uu=p7jnfw5hw9&vu=187060b6fd',
|
||||
'md5': 'cb988699a776b22d4a41b9d43acfb3ac',
|
||||
'info_dict': {
|
||||
'id': 'p7jnfw5hw9_187060b6fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video p7jnfw5hw9_187060b6fd',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def sign_data(obj):
|
||||
if obj['cf'] == 'flash':
|
||||
salt = '2f9d6924b33a165a6d8b5d3d42f4f987'
|
||||
items = ['cf', 'format', 'ran', 'uu', 'ver', 'vu']
|
||||
elif obj['cf'] == 'html5':
|
||||
salt = 'fbeh5player12c43eccf2bec3300344'
|
||||
items = ['cf', 'ran', 'uu', 'bver', 'vu']
|
||||
input_data = ''.join([item + obj[item] for item in items]) + salt
|
||||
obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest()
|
||||
|
||||
def _get_formats(self, cf, uu, vu, media_id):
|
||||
def get_play_json(cf, timestamp):
|
||||
data = {
|
||||
'cf': cf,
|
||||
'ver': '2.2',
|
||||
'bver': 'firefox44.0',
|
||||
'format': 'json',
|
||||
'uu': uu,
|
||||
'vu': vu,
|
||||
'ran': compat_str(timestamp),
|
||||
}
|
||||
self.sign_data(data)
|
||||
return self._download_json(
|
||||
'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data),
|
||||
media_id, 'Downloading playJson data for type %s' % cf)
|
||||
|
||||
play_json = get_play_json(cf, time.time())
|
||||
# The server time may be different from local time
|
||||
if play_json.get('code') == 10071:
|
||||
play_json = get_play_json(cf, play_json['timestamp'])
|
||||
|
||||
if not play_json.get('data'):
|
||||
if play_json.get('message'):
|
||||
raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True)
|
||||
elif play_json.get('code'):
|
||||
raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
|
||||
else:
|
||||
raise ExtractorError('Letv cloud returned an unknwon error')
|
||||
|
||||
def b64decode(s):
|
||||
return base64.b64decode(s.encode('utf-8')).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
for media in play_json['data']['video_info']['media'].values():
|
||||
play_url = media['play_url']
|
||||
url = b64decode(play_url['main_url'])
|
||||
decoded_url = b64decode(url_basename(url))
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': determine_ext(decoded_url),
|
||||
'format_id': int_or_none(play_url.get('vtype')),
|
||||
'format_note': str_or_none(play_url.get('definition')),
|
||||
'width': int_or_none(play_url.get('vwidth')),
|
||||
'height': int_or_none(play_url.get('vheight')),
|
||||
})
|
||||
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
uu_mobj = re.search('uu=([\w]+)', url)
|
||||
vu_mobj = re.search('vu=([\w]+)', url)
|
||||
|
||||
if not uu_mobj or not vu_mobj:
|
||||
raise ExtractorError('Invalid URL: %s' % url, expected=True)
|
||||
|
||||
uu = uu_mobj.group(1)
|
||||
vu = vu_mobj.group(1)
|
||||
media_id = uu + '_' + vu
|
||||
|
||||
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': 'Video %s' % media_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -40,7 +40,8 @@ class LimelightBaseIE(InfoExtractor):
|
||||
if not stream_url:
|
||||
continue
|
||||
if '.f4m' in stream_url:
|
||||
formats.extend(self._extract_f4m_formats(stream_url, video_id))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'url': stream_url,
|
||||
@ -72,8 +73,8 @@ class LimelightBaseIE(InfoExtractor):
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=-1, m3u8_id=format_id))
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
|
37
youtube_dl/extractor/lovehomeporn.py
Normal file
37
youtube_dl/extractor/lovehomeporn.py
Normal file
@ -0,0 +1,37 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class LoveHomePornIE(NuevoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?lovehomeporn\.com/video/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||
_TEST = {
|
||||
'url': 'http://lovehomeporn.com/video/48483/stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick#menu',
|
||||
'info_dict': {
|
||||
'id': '48483',
|
||||
'display_id': 'stunning-busty-brunette-girlfriend-sucking-and-riding-a-big-dick',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stunning busty brunette girlfriend sucking and riding a big dick',
|
||||
'age_limit': 18,
|
||||
'duration': 238.47,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
info = self._extract_nuevo(
|
||||
'http://lovehomeporn.com/media/nuevo/config.php?key=%s' % video_id,
|
||||
video_id)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'age_limit': 18
|
||||
})
|
||||
return info
|
@ -4,6 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class MailRuIE(InfoExtractor):
|
||||
@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor):
|
||||
'id': '46843144_1263',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||
'timestamp': 1397217632,
|
||||
'upload_date': '20140411',
|
||||
'uploader': 'hitech',
|
||||
'timestamp': 1397039888,
|
||||
'upload_date': '20140409',
|
||||
'uploader': 'hitech@corp.mail.ru',
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
},
|
||||
{
|
||||
# only available via metaUrl API
|
||||
'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
|
||||
'md5': '3b26d2491c6949d031a32b96bd97c096',
|
||||
'info_dict': {
|
||||
'id': '56664382_502',
|
||||
'ext': 'mp4',
|
||||
'title': ':8336',
|
||||
'timestamp': 1449094163,
|
||||
'upload_date': '20151202',
|
||||
'uploader': '720pizle@mail.ru',
|
||||
'uploader_id': '720pizle@mail.ru',
|
||||
'duration': 6001,
|
||||
},
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor):
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = video_data.get('views_count')
|
||||
video_data = None
|
||||
|
||||
page_config = self._parse_json(self._search_regex(
|
||||
r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
|
||||
webpage, 'page config', default='{}'), video_id, fatal=False)
|
||||
if page_config:
|
||||
meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
|
||||
if meta_url:
|
||||
video_data = self._download_json(
|
||||
meta_url, video_id, 'Downloading video meta JSON', fatal=False)
|
||||
|
||||
# Fallback old approach
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
for f in video_data['videos']:
|
||||
video_url = f.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = f.get('key')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
meta_data = video_data['meta']
|
||||
content_id = '%s_%s' % (
|
||||
meta_data.get('accId', ''), meta_data['itemId'])
|
||||
title = meta_data['title']
|
||||
if title.endswith('.mp4'):
|
||||
title = title[:-4]
|
||||
thumbnail = meta_data['poster']
|
||||
duration = meta_data['duration']
|
||||
timestamp = meta_data['timestamp']
|
||||
title = remove_end(meta_data['title'], '.mp4')
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['key'],
|
||||
'height': int(video['key'].rstrip('p'))
|
||||
} for video in video_data['videos']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
author = video_data.get('author')
|
||||
uploader = author.get('name')
|
||||
uploader_id = author.get('id') or author.get('email')
|
||||
view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
|
||||
|
||||
acc_id = meta_data.get('accId')
|
||||
item_id = meta_data.get('itemId')
|
||||
content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
|
||||
|
||||
thumbnail = meta_data.get('poster')
|
||||
duration = int_or_none(meta_data.get('duration'))
|
||||
timestamp = int_or_none(meta_data.get('timestamp'))
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
|
55
youtube_dl/extractor/matchtv.py
Normal file
55
youtube_dl/extractor/matchtv.py
Normal file
@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MatchTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://matchtv\.ru/?#live-player'
|
||||
_TEST = {
|
||||
'url': 'http://matchtv.ru/#live-player',
|
||||
'info_dict': {
|
||||
'id': 'matchtv-live',
|
||||
'ext': 'flv',
|
||||
'title': 're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'matchtv-live'
|
||||
request = sanitized_Request(
|
||||
'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({
|
||||
'ts': '',
|
||||
'quality': 'SD',
|
||||
'contentId': '561d2c0df7159b37178b4567',
|
||||
'sign': '',
|
||||
'includeHighlights': '0',
|
||||
'userId': '',
|
||||
'sessionId': random.randint(1, 1000000000),
|
||||
'contentType': 'channel',
|
||||
'timeShift': '0',
|
||||
'platform': 'portal',
|
||||
}),
|
||||
headers={
|
||||
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
||||
})
|
||||
video_url = self._download_json(request, video_id)['data']['videoUrl']
|
||||
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title('Матч ТВ - Прямой эфир'),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
@ -18,13 +18,17 @@ class NBAIE(InfoExtractor):
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
'timestamp': 1354638466,
|
||||
'upload_date': '20121204',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||
'only_matching': True,
|
||||
@ -68,7 +72,7 @@ class NBAIE(InfoExtractor):
|
||||
if video_url.startswith('/'):
|
||||
continue
|
||||
if video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||
elif video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
|
@ -19,38 +19,45 @@ class NBCIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
|
||||
# md5 checksum is not stable
|
||||
'info_dict': {
|
||||
'id': 'c9xnCo0YPOPH',
|
||||
'ext': 'flv',
|
||||
'id': '112966',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
|
||||
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
|
||||
'info_dict': {
|
||||
'id': 'XwU9KZkp98TH',
|
||||
'id': '176',
|
||||
'ext': 'flv',
|
||||
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
|
||||
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
'skip': '404 Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||
'info_dict': {
|
||||
'id': '8iUuyzWDdYUZ',
|
||||
'ext': 'flv',
|
||||
'id': '2832821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Star Wars Teaser',
|
||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only works from US',
|
||||
},
|
||||
{
|
||||
# This video has expired but with an escaped embedURL
|
||||
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
|
||||
'skip': 'Expired'
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@ -66,7 +73,11 @@ class NBCIE(InfoExtractor):
|
||||
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
|
||||
if theplatform_url.startswith('//'):
|
||||
theplatform_url = 'http:' + theplatform_url
|
||||
return self.url_result(smuggle_url(theplatform_url, {'source_url': url}))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(theplatform_url, {'source_url': url}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
@ -193,7 +193,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||
src, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'))
|
||||
else:
|
||||
quality = f.get('quality')
|
||||
ff = {
|
||||
|
@ -12,7 +12,10 @@ from ..compat import (
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
from ..utils import sanitized_Request
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NetEaseMusicBaseIE(InfoExtractor):
|
||||
@ -32,23 +35,32 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
result = b64encode(m.digest()).decode('ascii')
|
||||
return result.replace('/', '_').replace('+', '-')
|
||||
|
||||
@classmethod
|
||||
def extract_formats(cls, info):
|
||||
def extract_formats(self, info):
|
||||
formats = []
|
||||
for song_format in cls._FORMATS:
|
||||
for song_format in self._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
continue
|
||||
formats.append({
|
||||
'url': 'http://m5.music.126.net/%s/%s.%s' %
|
||||
(cls._encrypt(details['dfsId']), details['dfsId'],
|
||||
details['extension']),
|
||||
'ext': details.get('extension'),
|
||||
'abr': details.get('bitrate', 0) / 1000,
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
})
|
||||
song_file_path = '/%s/%s.%s' % (
|
||||
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
|
||||
|
||||
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
|
||||
# from NetEase's CDN provider that can be used if m5.music.126.net does not
|
||||
# work, especially for users outside of Mainland China
|
||||
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
|
||||
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
|
||||
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
|
||||
song_url = host + song_file_path
|
||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'ext': details.get('extension'),
|
||||
'abr': float_or_none(details.get('bitrate'), scale=1000),
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
})
|
||||
break
|
||||
return formats
|
||||
|
||||
@classmethod
|
||||
|
@ -189,7 +189,7 @@ class NPOIE(NPOBaseIE):
|
||||
if not video_url:
|
||||
continue
|
||||
if format_id == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@ -406,6 +406,38 @@ class NPORadioFragmentIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class SchoolTVIE(InfoExtractor):
|
||||
IE_NAME = 'schooltv'
|
||||
_VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
|
||||
'info_dict': {
|
||||
'id': 'WO_NTR_429477',
|
||||
'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
|
||||
'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:abfa0ff690adb73fd0297fd033aaa631'
|
||||
},
|
||||
'params': {
|
||||
# Skip because of m3u8 download
|
||||
'skip_download': True
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'NPO',
|
||||
'url': 'npo:%s' % video_id,
|
||||
'display_id': display_id
|
||||
}
|
||||
|
||||
|
||||
class VPROIE(NPOIE):
|
||||
IE_NAME = 'vpro'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||
|
@ -133,26 +133,32 @@ class NRKTVIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'upload_date': '20140523',
|
||||
'duration': 1741.52,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
'ext': 'flv',
|
||||
'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||
'upload_date': '20140524',
|
||||
'duration': 4605.0,
|
||||
'duration': 4605.08,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
@ -34,7 +35,7 @@ class NTVDeIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._parse_json(self._search_regex(
|
||||
r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
|
||||
r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'),
|
||||
video_id, transform_source=js_to_json)
|
||||
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
|
||||
vdata = self._parse_json(self._search_regex(
|
||||
@ -42,18 +43,24 @@ class NTVDeIE(InfoExtractor):
|
||||
webpage, 'player data'),
|
||||
video_id, transform_source=js_to_json)
|
||||
duration = parse_duration(vdata.get('duration'))
|
||||
formats = [{
|
||||
'format_id': 'flash',
|
||||
'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
|
||||
}, {
|
||||
'format_id': 'mobile',
|
||||
'url': 'http://video.n-tv.de' + vdata['videoMp4'],
|
||||
'tbr': 400, # estimation
|
||||
}]
|
||||
m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', preference=0))
|
||||
|
||||
formats = []
|
||||
if vdata.get('video'):
|
||||
formats.append({
|
||||
'format_id': 'flash',
|
||||
'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'],
|
||||
})
|
||||
if vdata.get('videoMp4'):
|
||||
formats.append({
|
||||
'format_id': 'mobile',
|
||||
'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']),
|
||||
'tbr': 400, # estimation
|
||||
})
|
||||
if vdata.get('videoM3u8'):
|
||||
m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
preference=0, m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
38
youtube_dl/extractor/nuevo.py
Normal file
38
youtube_dl/extractor/nuevo.py
Normal file
@ -0,0 +1,38 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
xpath_text
|
||||
)
|
||||
|
||||
|
||||
class NuevoBaseIE(InfoExtractor):
|
||||
def _extract_nuevo(self, config_url, video_id):
|
||||
config = self._download_xml(
|
||||
config_url, video_id, transform_source=lambda s: s.strip())
|
||||
|
||||
title = xpath_text(config, './title', 'title', fatal=True).strip()
|
||||
video_id = xpath_text(config, './mediaid', default=video_id)
|
||||
thumbnail = xpath_text(config, ['./image', './thumb'])
|
||||
duration = float_or_none(xpath_text(config, './duration'))
|
||||
|
||||
formats = []
|
||||
for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
|
||||
video_url = xpath_text(config, element_name)
|
||||
if video_url:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._check_formats(formats, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats
|
||||
}
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class OdnoklassnikiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m|mobile)\.)?(?:odnoklassniki|ok)\.ru/(?:video(?:embed)?|web-api/video/moviePlayer)/(?P<id>[\d-]+)'
|
||||
_TESTS = [{
|
||||
# metadata in JSON
|
||||
'url': 'http://ok.ru/video/20079905452',
|
||||
@ -69,6 +69,12 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.ok.ru/videoembed/20648036891',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.ok.ru/video/20079905452',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://mobile.ok.ru/video/20079905452',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -21,7 +21,6 @@ class OraTVIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!',
|
||||
'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1',
|
||||
'duration': 1477,
|
||||
}
|
||||
}
|
||||
|
||||
@ -30,9 +29,9 @@ class OraTVIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_data = self._search_regex(
|
||||
r'"current"\s*:\s*({[^}]+?})', webpage, 'current video')
|
||||
r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video')
|
||||
m3u8_url = self._search_regex(
|
||||
r'"hls_stream"\s*:\s*"([^"]+)', video_data, 'm3u8 url', None)
|
||||
r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None)
|
||||
if m3u8_url:
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', 'm3u8_native',
|
||||
@ -62,14 +61,12 @@ class OraTVIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': self._search_regex(
|
||||
r'"video_id"\s*:\s*(\d+)', video_data, 'video id'),
|
||||
r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id),
|
||||
'display_id': display_id,
|
||||
'title': unescapeHTML(self._og_search_title(webpage)),
|
||||
'description': get_element_by_attribute(
|
||||
'class', 'video_txt_decription', webpage),
|
||||
'thumbnail': self._proto_relative_url(self._search_regex(
|
||||
r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)),
|
||||
'duration': int(self._search_regex(
|
||||
r'"duration"\s*:\s*(\d+)', video_data, 'duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -170,7 +170,21 @@ class ORFOE1IE(InfoExtractor):
|
||||
class ORFFM4IE(InfoExtractor):
|
||||
IE_NAME = 'orf:fm4'
|
||||
IE_DESC = 'radio FM4'
|
||||
_VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fm4.orf.at/player/20160110/IS/',
|
||||
'md5': '01e736e8f1cef7e13246e880a59ad298',
|
||||
'info_dict': {
|
||||
'id': '2016-01-10_2100_tl_54_7DaysSun13_11244',
|
||||
'ext': 'mp3',
|
||||
'title': 'Im Sumpf',
|
||||
'description': 'md5:384c543f866c4e422a55f66a62d669cd',
|
||||
'duration': 7173,
|
||||
'timestamp': 1452456073,
|
||||
'upload_date': '20160110',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
@ -199,7 +201,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365006249',
|
||||
'ext': 'mp4',
|
||||
'title': 'Constitution USA with Peter Sagal - A More Perfect Union',
|
||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||
'description': 'md5:36f341ae62e251b8f5bd2b754b95a071',
|
||||
'duration': 3190,
|
||||
},
|
||||
'params': {
|
||||
@ -213,7 +215,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2365297690',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - Losing Iraq',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9',
|
||||
'duration': 5050,
|
||||
},
|
||||
'params': {
|
||||
@ -227,7 +229,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': '2201174722',
|
||||
'ext': 'mp4',
|
||||
'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
||||
'description': 'md5:95a19f568689d09a166dff9edada3301',
|
||||
'duration': 801,
|
||||
},
|
||||
},
|
||||
@ -237,8 +239,8 @@ class PBSIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2365297708',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||
'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b',
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -278,7 +280,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'player',
|
||||
'ext': 'mp4',
|
||||
'title': 'American Experience - Death and the Civil War, Chapter 1',
|
||||
'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.',
|
||||
'description': 'md5:1b80a74e0380ed2a4fb335026de1600d',
|
||||
'duration': 682,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -287,20 +289,19 @@ class PBSIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.pbs.org/video/2365367186/',
|
||||
'url': 'http://www.pbs.org/video/2365245528/',
|
||||
'info_dict': {
|
||||
'id': '2365367186',
|
||||
'display_id': '2365367186',
|
||||
'id': '2365245528',
|
||||
'display_id': '2365245528',
|
||||
'ext': 'mp4',
|
||||
'title': 'To Catch A Comet - Full Episode',
|
||||
'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.',
|
||||
'duration': 3342,
|
||||
'title': 'FRONTLINE - United States of Secrets (Part One)',
|
||||
'description': 'md5:55756bd5c551519cc4b7703e373e217e',
|
||||
'duration': 6851,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
'skip': 'Expired',
|
||||
},
|
||||
{
|
||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||
@ -312,7 +313,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'a-chefs-life-season-3-episode-5-prickly-business',
|
||||
'ext': 'mp4',
|
||||
'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business",
|
||||
'description': 'md5:61db2ddf27c9912f09c241014b118ed1',
|
||||
'description': 'md5:54033c6baa1f9623607c6e2ed245888b',
|
||||
'duration': 1480,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -328,7 +329,7 @@ class PBSIE(InfoExtractor):
|
||||
'display_id': 'the-atomic-artists',
|
||||
'ext': 'mp4',
|
||||
'title': 'FRONTLINE - The Atomic Artists',
|
||||
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||
'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1',
|
||||
'duration': 723,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
@ -365,10 +366,14 @@ class PBSIE(InfoExtractor):
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
# tabbed frontline videos
|
||||
tabbed_videos = re.findall(
|
||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
MULTI_PART_REGEXES = (
|
||||
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
|
||||
r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
|
||||
)
|
||||
for p in MULTI_PART_REGEXES:
|
||||
tabbed_videos = re.findall(p, webpage)
|
||||
if tabbed_videos:
|
||||
return tabbed_videos, presumptive_id, upload_date
|
||||
|
||||
MEDIA_ID_REGEXES = [
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
@ -432,9 +437,21 @@ class PBSIE(InfoExtractor):
|
||||
for vid_id in video_id]
|
||||
return self.playlist_result(entries, display_id)
|
||||
|
||||
info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id)
|
||||
try:
|
||||
info = self._download_json(
|
||||
'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id,
|
||||
display_id, 'Downloading video info JSON')
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404:
|
||||
raise
|
||||
# videoInfo API may not work for some videos, fallback to portalplayer API
|
||||
player = self._download_webpage(
|
||||
'http://player.pbs.org/portalplayer/%s' % video_id, display_id)
|
||||
info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
player, 'video data', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
formats = []
|
||||
for encoding_name in ('recommended_encoding', 'alternate_encoding'):
|
||||
@ -493,7 +510,7 @@ class PBSIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'description': info['program'].get('description'),
|
||||
'description': info.get('description') or info.get('program', {}).get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'age_limit': age_limit,
|
||||
|
51
youtube_dl/extractor/plays.py
Normal file
51
youtube_dl/extractor/plays.py
Normal file
@ -0,0 +1,51 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class PlaysTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})'
|
||||
_TEST = {
|
||||
'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
|
||||
'md5': 'dfeac1198506652b5257a62762cec7bc',
|
||||
'info_dict': {
|
||||
'id': '56af17f56c95335490',
|
||||
'ext': 'mp4',
|
||||
'title': 'When you outplay the Azir wall',
|
||||
'description': 'Posted by Bjergsen',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
content = self._parse_json(
|
||||
self._search_regex(
|
||||
r'R\.bindContent\(({.+?})\);', webpage,
|
||||
'content'), video_id)['content']
|
||||
mpd_url, sources = re.search(
|
||||
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
|
||||
content).groups()
|
||||
formats = self._extract_mpd_formats(
|
||||
self._proto_relative_url(mpd_url), video_id, mpd_id='DASH')
|
||||
for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources):
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(format_url),
|
||||
'format_id': 'http-' + format_id,
|
||||
'height': int_or_none(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
@ -20,7 +20,7 @@ from ..utils import (
|
||||
class ProSiebenSat1IE(InfoExtractor):
|
||||
IE_NAME = 'prosiebensat1'
|
||||
IE_DESC = 'ProSiebenSat.1 Digital'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -32,7 +32,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2104602',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode 18 - Staffel 2',
|
||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||
'upload_date': '20131231',
|
||||
@ -138,14 +138,13 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
|
||||
'info_dict': {
|
||||
'id': '2572814',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Andreas Kümmert: Rocket Man',
|
||||
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
|
||||
'upload_date': '20131017',
|
||||
'duration': 469.88,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -153,13 +152,12 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
|
||||
'info_dict': {
|
||||
'id': '2156342',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Kurztrips zum Valentinstag',
|
||||
'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.',
|
||||
'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
|
||||
'duration': 307.24,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
@ -172,12 +170,26 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '4187506',
|
||||
'ext': 'flv',
|
||||
'title': 'Best of Circus HalliGalli',
|
||||
'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
|
||||
'upload_date': '20151229',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_CLIPID_REGEXES = [
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
r'clip[iI]d=(\d+)',
|
||||
r'clip[iI]d\s*=\s*["\'](\d+)',
|
||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||
]
|
||||
_TITLE_REGEXES = [
|
||||
@ -186,12 +198,16 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
r'<!-- start video -->\s*<h1>(.+?)</h1>',
|
||||
r'<h1 class="att-name">\s*(.+?)</h1>',
|
||||
r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
|
||||
r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
|
||||
r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
|
||||
]
|
||||
_DESCRIPTION_REGEXES = [
|
||||
r'<p itemprop="description">\s*(.+?)</p>',
|
||||
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
|
||||
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
|
||||
r'<p class="att-description">\s*(.+?)\s*</p>',
|
||||
r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
|
||||
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
|
||||
]
|
||||
_UPLOAD_DATE_REGEXES = [
|
||||
r'<meta property="og:published_time" content="(.+?)">',
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
@ -61,12 +63,15 @@ class RteIE(InfoExtractor):
|
||||
class RteRadioIE(InfoExtractor):
|
||||
IE_NAME = 'rte:radio'
|
||||
IE_DESC = 'Raidió Teilifís Éireann radio'
|
||||
# Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||
# Radioplayer URLs have two distinct specifier formats,
|
||||
# the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
|
||||
# the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_
|
||||
# where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
|
||||
# An <id> uniquely defines an individual recording, and is the only part we require.
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# Old-style player URL; HLS and RTMPE formats
|
||||
'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
|
||||
'info_dict': {
|
||||
'id': '10507902',
|
||||
@ -81,7 +86,23 @@ class RteRadioIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# New-style player URL; RTMPE formats only
|
||||
'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_',
|
||||
'info_dict': {
|
||||
'id': '3250678',
|
||||
'ext': 'flv',
|
||||
'title': 'The Lyric Concert with Paul Herriott',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'timestamp': 1333742400,
|
||||
'upload_date': '20120406',
|
||||
'duration': 7199.016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'f4m fails with --test atm'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
@ -102,8 +123,18 @@ class RteRadioIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
if mg.get('url') and not mg['url'].startswith('rtmpe:'):
|
||||
formats.append({'url': mg['url']})
|
||||
if mg.get('url'):
|
||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||
if m:
|
||||
m = m.groupdict()
|
||||
formats.append({
|
||||
'url': m['url'] + '/' + m['app'],
|
||||
'app': m['app'],
|
||||
'play_path': m['playpath'],
|
||||
'player_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
44
youtube_dl/extractor/ruleporn.py
Normal file
44
youtube_dl/extractor/ruleporn.py
Normal file
@ -0,0 +1,44 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class RulePornIE(NuevoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/',
|
||||
'md5': '86861ebc624a1097c7c10eaf06d7d505',
|
||||
'info_dict': {
|
||||
'id': '48212',
|
||||
'display_id': 'brunette-nympho-chick-takes-her-boyfriend-in-every-angle',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brunette Nympho Chick Takes Her Boyfriend In Every Angle',
|
||||
'description': 'md5:6d28be231b981fff1981deaaa03a04d5',
|
||||
'age_limit': 18,
|
||||
'duration': 635.1,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h2[^>]+title=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'title', group='url')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
info = self._extract_nuevo(
|
||||
'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id,
|
||||
video_id)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'age_limit': 18
|
||||
})
|
||||
return info
|
@ -71,7 +71,7 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
formats = []
|
||||
for source in sources:
|
||||
if source['type'] == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id))
|
||||
formats.extend(self._extract_m3u8_formats(source['file'], video_id, ext='mp4'))
|
||||
else:
|
||||
file_ = source.get('file')
|
||||
if not file_:
|
||||
@ -107,7 +107,11 @@ class TeamFourIE(InfoExtractor):
|
||||
'upload_date': '20130401',
|
||||
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
|
||||
'title': 'A Moment With TFS Episode 4',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -53,17 +53,25 @@ class SenateISVPIE(InfoExtractor):
|
||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||
'info_dict': {
|
||||
'id': 'commerce011514',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player'
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||
# checksum differs each time
|
||||
|
@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||
'info_dict': {
|
||||
@ -19,7 +19,11 @@ class SpankBangIE(InfoExtractor):
|
||||
'uploader': 'silly2587',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# 480p only
|
||||
'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -34,11 +38,12 @@ class SpankBangIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
} for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)]
|
||||
} for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.+?)</h1>', webpage, 'title')
|
||||
r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
description = self._search_regex(
|
||||
r'class="desc"[^>]*>([^<]+)',
|
||||
webpage, 'description', default=None)
|
||||
|
@ -70,14 +70,11 @@ class SRGSSRIE(InfoExtractor):
|
||||
asset_url, media_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
ext = None
|
||||
if protocol == 'RTMP':
|
||||
ext = self._search_regex(r'([a-z0-9]+):[^/]+', asset_url, 'ext')
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': asset_url,
|
||||
'preference': preference(quality),
|
||||
'ext': ext,
|
||||
'ext': 'flv' if protocol == 'RTMP' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -37,6 +37,14 @@ class SVTBaseIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_references = video_info.get('subtitleReferences')
|
||||
if isinstance(subtitle_references, list):
|
||||
for sr in subtitle_references:
|
||||
subtitle_url = sr.get('url')
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('sv', []).append({'url': subtitle_url})
|
||||
|
||||
duration = video_info.get('materialLength')
|
||||
age_limit = 18 if video_info.get('inappropriateForChildren') else 0
|
||||
|
||||
@ -44,6 +52,7 @@ class SVTBaseIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
@ -83,30 +92,23 @@ class SVTIE(SVTBaseIE):
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
|
||||
'md5': 'ade3def0643fa1c40587a422f98edfd9',
|
||||
_TEST = {
|
||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
||||
'info_dict': {
|
||||
'id': '2609989',
|
||||
'ext': 'flv',
|
||||
'title': 'SM veckan vinter, Örebro - Rally, final',
|
||||
'duration': 4500,
|
||||
'id': '5996901',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flygplan till Haile Selassie',
|
||||
'duration': 3527,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
'subtitles': {
|
||||
'sv': [{
|
||||
'ext': 'wsrt',
|
||||
}]
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
|
||||
'md5': 'c3101a17ce9634f4c1f9800f0746c187',
|
||||
'info_dict': {
|
||||
'id': '1058509',
|
||||
'ext': 'flv',
|
||||
'title': 'Farlig kryssning',
|
||||
'duration': 2566,
|
||||
'thumbnail': 're:^https?://.*[\.-]jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'skip': 'Only works from Sweden',
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -20,7 +20,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
@ -85,7 +84,7 @@ class ThePlatformBaseIE(InfoExtractor):
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
(?:(?P<media>(?:(?:[^/]+/)+select/)?media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
first_video_id = None
|
||||
duration = None
|
||||
for item in entry['media$content']:
|
||||
smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
|
||||
cur_video_id = url_basename(smil_url)
|
||||
smil_url = item['plfile$url'] + '&format=SMIL&mbr=true'
|
||||
cur_video_id = ThePlatformIE._match_id(smil_url)
|
||||
if first_video_id is None:
|
||||
first_video_id = cur_video_id
|
||||
duration = float_or_none(item.get('plfile$duration'))
|
||||
|
36
youtube_dl/extractor/trollvids.py
Normal file
36
youtube_dl/extractor/trollvids.py
Normal file
@ -0,0 +1,36 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class TrollvidsIE(NuevoBaseIE):
|
||||
_VALID_URL = r'http://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
|
||||
IE_NAME = 'trollvids'
|
||||
_TEST = {
|
||||
'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
'info_dict': {
|
||||
'id': '2349002',
|
||||
'ext': 'mp4',
|
||||
'title': '【MMD R-18】ガールフレンド carry_me_off',
|
||||
'age_limit': 18,
|
||||
'duration': 216.78,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
info = self._extract_nuevo(
|
||||
'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id,
|
||||
video_id)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'age_limit': 18
|
||||
})
|
||||
return info
|
@ -1,11 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import xpath_text
|
||||
from .nuevo import NuevoBaseIE
|
||||
|
||||
|
||||
class TruTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
|
||||
class TruTubeIE(NuevoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
|
||||
'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
|
||||
@ -22,19 +21,6 @@ class TruTubeIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_xml(
|
||||
return self._extract_nuevo(
|
||||
'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
|
||||
video_id, transform_source=lambda s: s.strip())
|
||||
|
||||
# filehd is always 404
|
||||
video_url = xpath_text(config, './file', 'video URL', fatal=True)
|
||||
title = xpath_text(config, './title', 'title').strip()
|
||||
thumbnail = xpath_text(config, './image', ' thumbnail')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
video_id)
|
||||
|
@ -1,10 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
@ -15,25 +14,23 @@ from ..aes import aes_decrypt_text
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
'md5': '44bf12b98313827dd52d35b8706a4ea0',
|
||||
'info_dict': {
|
||||
'id': '229795',
|
||||
'display_id': 'kasia-music-video',
|
||||
'ext': 'mp4',
|
||||
'description': 'hot teen Kasia grinding',
|
||||
'uploader': 'unknown',
|
||||
'title': 'Kasia music video',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
'md5': '65e20c48e6abff62ed0c3965fff13a39',
|
||||
'info_dict': {
|
||||
'id': '229795',
|
||||
'display_id': 'kasia-music-video',
|
||||
'ext': 'mp4',
|
||||
'description': 'hot teen Kasia grinding',
|
||||
'uploader': 'unknown',
|
||||
'title': 'Kasia music video',
|
||||
'age_limit': 18,
|
||||
'duration': 230,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -44,14 +41,28 @@ class Tube8IE(InfoExtractor):
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
||||
flashvars = json.loads(self._html_search_regex(
|
||||
r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'))
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'),
|
||||
video_id)
|
||||
|
||||
video_url = flashvars['video_url']
|
||||
if flashvars.get('encrypted') is True:
|
||||
video_url = aes_decrypt_text(video_url, flashvars['video_title'], 32).decode('utf-8')
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
format_id = '-'.join(path.split('/')[4].split('_')[:2])
|
||||
formats = []
|
||||
for key, video_url in flashvars.items():
|
||||
if not isinstance(video_url, compat_str) or not video_url.startswith('http'):
|
||||
continue
|
||||
height = self._search_regex(
|
||||
r'quality_(\d+)[pP]', key, 'height', default=None)
|
||||
if not height:
|
||||
continue
|
||||
if flashvars.get('encrypted') is True:
|
||||
video_url = aes_decrypt_text(
|
||||
video_url, flashvars['video_title'], 32).decode('utf-8')
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = flashvars.get('image_url')
|
||||
|
||||
@ -62,32 +73,31 @@ class Tube8IE(InfoExtractor):
|
||||
uploader = self._html_search_regex(
|
||||
r'<span class="username">\s*(.+?)\s*<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
|
||||
like_count = int_or_none(self._html_search_regex(
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
|
||||
dislike_count = int_or_none(self._html_search_regex(
|
||||
dislike_count = int_or_none(self._search_regex(
|
||||
r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
|
||||
view_count = self._html_search_regex(
|
||||
r'<strong>Views: </strong>([\d,\.]+)\s*</li>', webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = str_to_int(view_count)
|
||||
comment_count = self._html_search_regex(
|
||||
r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
|
||||
if comment_count:
|
||||
comment_count = str_to_int(comment_count)
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<strong>Views: </strong>([\d,\.]+)\s*</li>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._search_regex(
|
||||
r'<span id="allCommentsCount">(\d+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'format_id': format_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -17,18 +17,21 @@ class TV2IE(InfoExtractor):
|
||||
_VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tv2.no/v/916509/',
|
||||
'md5': '9cb9e3410b18b515d71892f27856e9b1',
|
||||
'info_dict': {
|
||||
'id': '916509',
|
||||
'ext': 'flv',
|
||||
'title': 'Se Gryttens hyllest av Steven Gerrard',
|
||||
'ext': 'mp4',
|
||||
'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
|
||||
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
|
||||
'timestamp': 1431715610,
|
||||
'upload_date': '20150515',
|
||||
'duration': 156.967,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -38,7 +38,7 @@ class UnistraIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
|
||||
files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
|
||||
|
||||
quality = qualities(['SD', 'HD'])
|
||||
formats = []
|
||||
|
@ -47,7 +47,7 @@ class UstreamIE(InfoExtractor):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
# some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
|
||||
# some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
|
||||
if m.group('type') == 'embed/recorded':
|
||||
video_id = m.group('id')
|
||||
desktop_url = 'http://www.ustream.tv/recorded/' + video_id
|
||||
|
@ -3,22 +3,20 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class VevoIE(InfoExtractor):
|
||||
"""
|
||||
'''
|
||||
Accepts urls from vevo.com or in the format 'vevo:{id}'
|
||||
(currently used by MTVIE and MySpaceIE)
|
||||
"""
|
||||
'''
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
|
||||
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||
@ -28,19 +26,15 @@ class VevoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
"md5": "95ee28ee45e70130e3ab02b0f579ae23",
|
||||
'md5': '95ee28ee45e70130e3ab02b0f579ae23',
|
||||
'info_dict': {
|
||||
'id': 'GB1101300280',
|
||||
'ext': 'mp4',
|
||||
"upload_date": "20130624",
|
||||
"uploader": "Hurts",
|
||||
"title": "Somebody to Die For",
|
||||
"duration": 230.12,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
# timestamp and upload_date are often incorrect; seem to change randomly
|
||||
'timestamp': int,
|
||||
}
|
||||
'title': 'Somebody to Die For',
|
||||
'upload_date': '20130624',
|
||||
'uploader': 'Hurts',
|
||||
'timestamp': 1372057200,
|
||||
},
|
||||
}, {
|
||||
'note': 'v3 SMIL format',
|
||||
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||
@ -48,28 +42,23 @@ class VevoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'USUV71302923',
|
||||
'ext': 'mp4',
|
||||
'title': 'I Wish I Could Break Your Heart',
|
||||
'upload_date': '20140219',
|
||||
'uploader': 'Cassadee Pope',
|
||||
'title': 'I Wish I Could Break Your Heart',
|
||||
'duration': 226.101,
|
||||
'age_limit': 0,
|
||||
'timestamp': int,
|
||||
}
|
||||
'timestamp': 1392796919,
|
||||
},
|
||||
}, {
|
||||
'note': 'Age-limited video',
|
||||
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
|
||||
'info_dict': {
|
||||
'id': 'USRV81300282',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
'title': 'Tunnel Vision (Explicit)',
|
||||
'upload_date': '20130703',
|
||||
'age_limit': 18,
|
||||
'uploader': 'Justin Timberlake',
|
||||
'upload_date': 're:2013070[34]',
|
||||
'timestamp': int,
|
||||
'timestamp': 1372888800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'true',
|
||||
}
|
||||
}, {
|
||||
'note': 'No video_info',
|
||||
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
|
||||
@ -77,69 +66,46 @@ class VevoIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'USUV71503000',
|
||||
'ext': 'mp4',
|
||||
'title': 'Till I Die - K Camp ft. T.I.',
|
||||
'duration': 193,
|
||||
'title': 'Till I Die',
|
||||
'upload_date': '20151207',
|
||||
'age_limit': 18,
|
||||
'uploader': 'K Camp',
|
||||
'timestamp': 1449468000,
|
||||
},
|
||||
'expected_warnings': ['Unable to download SMIL file'],
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'
|
||||
_SOURCE_TYPES = {
|
||||
0: 'youtube',
|
||||
1: 'brightcove',
|
||||
2: 'http',
|
||||
3: 'hls_ios',
|
||||
4: 'hls',
|
||||
5: 'smil', # http
|
||||
7: 'f4m_cc',
|
||||
8: 'f4m_ak',
|
||||
9: 'f4m_l3',
|
||||
10: 'ism',
|
||||
13: 'smil', # rtmp
|
||||
18: 'dash',
|
||||
}
|
||||
_VERSIONS = {
|
||||
0: 'youtube', # only in AuthenticateVideo videoVersions
|
||||
1: 'level3',
|
||||
2: 'akamai',
|
||||
3: 'level3',
|
||||
4: 'amazon',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
req = sanitized_Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token',
|
||||
fatal=False)
|
||||
if webpage is False:
|
||||
self._oauth_token = None
|
||||
else:
|
||||
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
||||
raise ExtractorError('%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
|
||||
|
||||
self._oauth_token = self._search_regex(
|
||||
r'access_token":\s*"([^"]+)"',
|
||||
webpage, 'access token', fatal=False)
|
||||
|
||||
def _formats_from_json(self, video_info):
|
||||
if not video_info:
|
||||
return []
|
||||
|
||||
last_version = {'version': -1}
|
||||
for version in video_info['videoVersions']:
|
||||
# These are the HTTP downloads, other types are for different manifests
|
||||
if version['sourceType'] == 2:
|
||||
if version['version'] > last_version['version']:
|
||||
last_version = version
|
||||
if last_version['version'] == -1:
|
||||
raise ExtractorError('Unable to extract last version of the video')
|
||||
|
||||
renditions = compat_etree_fromstring(last_version['data'])
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
formats = []
|
||||
# Already sorted from worst to best quality
|
||||
for rend in renditions.findall('rendition'):
|
||||
attr = rend.attrib
|
||||
format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr
|
||||
formats.append({
|
||||
'url': attr['url'],
|
||||
'format_id': attr['name'],
|
||||
'format_note': format_note,
|
||||
'height': int(attr['frameheight']),
|
||||
'width': int(attr['frameWidth']),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _formats_from_smil(self, smil_doc):
|
||||
formats = []
|
||||
els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
els = smil.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
|
||||
for el in els:
|
||||
src = el.attrib['src']
|
||||
m = re.match(r'''(?xi)
|
||||
(?P<ext>[a-z0-9]+):
|
||||
(?P<path>
|
||||
[/a-z0-9]+ # The directory and main part of the URL
|
||||
_(?P<cbr>[0-9]+)k
|
||||
_(?P<tbr>[0-9]+)k
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
@ -153,9 +119,10 @@ class VevoIE(InfoExtractor):
|
||||
format_url = self._SMIL_BASE_URL + m.group('path')
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'SMIL_' + m.group('cbr'),
|
||||
'format_id': 'smil_' + m.group('tbr'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'tbr': int(m.group('tbr')),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
@ -164,48 +131,154 @@ class VevoIE(InfoExtractor):
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_api_formats(self, video_id, video_url):
|
||||
if not self._oauth_token:
|
||||
self._downloader.report_warning(
|
||||
'No oauth token available, skipping API HLS download')
|
||||
return []
|
||||
def _initialize_api(self, video_id):
|
||||
req = sanitized_Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token')
|
||||
|
||||
api_url = compat_urlparse.urljoin(video_url, '//apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
|
||||
video_id, self._oauth_token))
|
||||
api_data = self._download_json(
|
||||
api_url, video_id,
|
||||
note='Downloading HLS formats',
|
||||
errnote='Failed to download HLS format list', fatal=False)
|
||||
if api_data is None:
|
||||
return []
|
||||
if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage:
|
||||
raise ExtractorError(
|
||||
'%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True)
|
||||
|
||||
m3u8_url = api_data[0]['url']
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||
preference=0)
|
||||
auth_info = self._parse_json(webpage, video_id)
|
||||
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
|
||||
|
||||
def _call_api(self, path, video_id, note, errnote, fatal=True):
|
||||
return self._download_json(self._api_url_template % path, video_id, note, errnote)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = None
|
||||
|
||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
response = self._download_json(json_url, video_id)
|
||||
video_info = response['video'] or {}
|
||||
|
||||
if not video_info and response.get('statusCode') != 909:
|
||||
if 'statusMessage' in response:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
|
||||
raise ExtractorError('Unable to extract videos')
|
||||
response = self._download_json(
|
||||
json_url, video_id, 'Downloading video info', 'Unable to download info')
|
||||
video_info = response.get('video') or {}
|
||||
video_versions = video_info.get('videoVersions')
|
||||
uploader = None
|
||||
timestamp = None
|
||||
view_count = None
|
||||
formats = []
|
||||
|
||||
if not video_info:
|
||||
if url.startswith('vevo:'):
|
||||
raise ExtractorError('Please specify full Vevo URL for downloading', expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if response.get('statusCode') != 909:
|
||||
ytid = response.get('errorInfo', {}).get('ytid')
|
||||
if ytid:
|
||||
self.report_warning(
|
||||
'Video is geoblocked, trying with the YouTube video %s' % ytid)
|
||||
return self.url_result(ytid, 'Youtube', ytid)
|
||||
|
||||
title = video_info.get('title') or self._og_search_title(webpage)
|
||||
if 'statusMessage' in response:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['statusMessage']), expected=True)
|
||||
raise ExtractorError('Unable to extract videos')
|
||||
|
||||
formats = self._formats_from_json(video_info)
|
||||
self._initialize_api(video_id)
|
||||
video_info = self._call_api(
|
||||
'video/%s' % video_id, video_id, 'Downloading api video info',
|
||||
'Failed to download video info')
|
||||
|
||||
video_versions = self._call_api(
|
||||
'video/%s/streams' % video_id, video_id,
|
||||
'Downloading video versions info',
|
||||
'Failed to download video versions info')
|
||||
|
||||
timestamp = parse_iso8601(video_info.get('releaseDate'))
|
||||
artists = video_info.get('artists')
|
||||
if artists:
|
||||
uploader = artists[0]['name']
|
||||
view_count = int_or_none(video_info.get('views', {}).get('total'))
|
||||
|
||||
for video_version in video_versions:
|
||||
version = self._VERSIONS.get(video_version['version'])
|
||||
version_url = video_version.get('url')
|
||||
if not version_url:
|
||||
continue
|
||||
|
||||
if '.ism' in version_url:
|
||||
continue
|
||||
elif '.mpd' in version_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
version_url, video_id, mpd_id='dash-%s' % version,
|
||||
note='Downloading %s MPD information' % version,
|
||||
errnote='Failed to download %s MPD information' % version,
|
||||
fatal=False))
|
||||
elif '.m3u8' in version_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
version_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls-%s' % version,
|
||||
note='Downloading %s m3u8 information' % version,
|
||||
errnote='Failed to download %s m3u8 information' % version,
|
||||
fatal=False))
|
||||
else:
|
||||
m = re.search(r'''(?xi)
|
||||
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
|
||||
_(?P<vcodec>[a-z0-9]+)
|
||||
_(?P<vbr>[0-9]+)
|
||||
_(?P<acodec>[a-z0-9]+)
|
||||
_(?P<abr>[0-9]+)
|
||||
\.(?P<ext>[a-z0-9]+)''', version_url)
|
||||
if not m:
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': version_url,
|
||||
'format_id': 'http-%s-%s' % (version, video_version['quality']),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'acodec': m.group('acodec'),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'abr': int(m.group('abr')),
|
||||
'ext': m.group('ext'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
else:
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'/Date\((\d+)\)/',
|
||||
video_info['releaseDate'], 'release date', fatal=False),
|
||||
scale=1000)
|
||||
artists = video_info.get('mainArtists')
|
||||
if artists:
|
||||
uploader = artists[0]['artistName']
|
||||
|
||||
smil_parsed = False
|
||||
for video_version in video_info['videoVersions']:
|
||||
version = self._VERSIONS.get(video_version['version'])
|
||||
if version == 'youtube':
|
||||
continue
|
||||
else:
|
||||
source_type = self._SOURCE_TYPES.get(video_version['sourceType'])
|
||||
renditions = compat_etree_fromstring(video_version['data'])
|
||||
if source_type == 'http':
|
||||
for rend in renditions.findall('rendition'):
|
||||
attr = rend.attrib
|
||||
formats.append({
|
||||
'url': attr['url'],
|
||||
'format_id': 'http-%s-%s' % (version, attr['name']),
|
||||
'height': int_or_none(attr.get('frameheight')),
|
||||
'width': int_or_none(attr.get('frameWidth')),
|
||||
'tbr': int_or_none(attr.get('totalBitrate')),
|
||||
'vbr': int_or_none(attr.get('videoBitrate')),
|
||||
'abr': int_or_none(attr.get('audioBitrate')),
|
||||
'vcodec': attr.get('videoCodec'),
|
||||
'acodec': attr.get('audioCodec'),
|
||||
})
|
||||
elif source_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
renditions.find('rendition').attrib['url'], video_id,
|
||||
'mp4', 'm3u8_native', m3u8_id='hls-%s' % version,
|
||||
note='Downloading %s m3u8 information' % version,
|
||||
errnote='Failed to download %s m3u8 information' % version,
|
||||
fatal=False))
|
||||
elif source_type == 'smil' and version == 'level3' and not smil_parsed:
|
||||
formats.extend(self._extract_smil_formats(
|
||||
renditions.find('rendition').attrib['url'], video_id, False))
|
||||
smil_parsed = True
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = video_info['title']
|
||||
|
||||
is_explicit = video_info.get('isExplicit')
|
||||
if is_explicit is True:
|
||||
@ -215,43 +288,16 @@ class VevoIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
# Download via HLS API
|
||||
formats.extend(self._download_api_formats(video_id, url))
|
||||
|
||||
# Download SMIL
|
||||
smil_blocks = sorted((
|
||||
f for f in video_info.get('videoVersions', [])
|
||||
if f['sourceType'] == 13),
|
||||
key=lambda f: f['version'])
|
||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||
if smil_blocks:
|
||||
smil_url_m = self._search_regex(
|
||||
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
||||
default=None)
|
||||
if smil_url_m is not None:
|
||||
smil_url = smil_url_m
|
||||
if smil_url:
|
||||
smil_doc = self._download_smil(smil_url, video_id, fatal=False)
|
||||
if smil_doc:
|
||||
formats.extend(self._formats_from_smil(smil_doc))
|
||||
|
||||
self._sort_formats(formats)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'/Date\((\d+)\)/',
|
||||
video_info['launchDate'], 'launch date', fatal=False),
|
||||
scale=1000) if video_info else None
|
||||
|
||||
duration = video_info.get('duration') or int_or_none(
|
||||
self._html_search_meta('video:duration', webpage))
|
||||
duration = video_info.get('duration')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_info.get('imageUrl'),
|
||||
'thumbnail': video_info.get('imageUrl') or video_info.get('thumbnailUrl'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': video_info['mainArtists'][0]['artistName'] if video_info else None,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@ -86,10 +86,9 @@ class VGTVIE(XstreamIE):
|
||||
{
|
||||
# streamType: wasLive
|
||||
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
|
||||
'md5': '458f4841239dab414343b50e5af8869c',
|
||||
'info_dict': {
|
||||
'id': '113063',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'V75 fra Solvalla 30.05.15',
|
||||
'description': 'md5:b3743425765355855f88e096acc93231',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
@ -98,6 +97,10 @@ class VGTVIE(XstreamIE):
|
||||
'upload_date': '20150530',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
|
||||
|
@ -1,6 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.viddler.com/v/43903784',
|
||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
||||
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
|
||||
'info_dict': {
|
||||
'id': '43903784',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mov',
|
||||
'title': 'Video Made Easy',
|
||||
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||
'uploader': 'viddler',
|
||||
@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
||||
'md5': 'f12c5a7fa839c47a79363bfdf69404fb',
|
||||
'info_dict': {
|
||||
'id': '4d03aad9',
|
||||
'ext': 'mp4',
|
||||
'ext': 'ts',
|
||||
'title': 'WALL-TO-GORTAT',
|
||||
'upload_date': '20150126',
|
||||
'uploader': 'deadspin',
|
||||
@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
||||
'md5': '740511f61d3d1bb71dc14a0fe01a1c10',
|
||||
'info_dict': {
|
||||
'id': '221ebbbd',
|
||||
'ext': 'mp4',
|
||||
'ext': 'mov',
|
||||
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||
'description': ' ',
|
||||
'upload_date': '20140929',
|
||||
@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
# secret protected
|
||||
'url': 'http://www.viddler.com/v/890c0985?secret=34051570',
|
||||
'info_dict': {
|
||||
'id': '890c0985',
|
||||
'ext': 'mp4',
|
||||
'title': 'Complete Property Training - Traineeships',
|
||||
'description': ' ',
|
||||
'upload_date': '20130606',
|
||||
'uploader': 'TiffanyBowtell',
|
||||
'timestamp': 1370496993,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
json_url = (
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||
video_id)
|
||||
query = {
|
||||
'video_id': video_id,
|
||||
'key': 'v0vhrt7bg2xq1vyxhkct',
|
||||
}
|
||||
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
secret = qs.get('secret', [None])[0]
|
||||
if secret:
|
||||
query['secret'] = secret
|
||||
|
||||
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||
request = sanitized_Request(json_url, None, headers)
|
||||
request = sanitized_Request(
|
||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
|
||||
% compat_urllib_parse.urlencode(query), None, headers)
|
||||
data = self._download_json(request, video_id)['video']
|
||||
|
||||
formats = []
|
||||
|
@ -114,7 +114,7 @@ class VideomoreIE(InfoExtractor):
|
||||
|
||||
data = self._download_json(
|
||||
'http://videomore.ru/video/tracks/%s.json' % video_id,
|
||||
video_id, 'Downloadinng video JSON')
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
title = data.get('title') or data['project_title']
|
||||
description = data.get('description') or data.get('description_raw')
|
||||
|
@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
@ -11,7 +13,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class VidmeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
|
||||
IE_NAME = 'vidme'
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://vid.me/QNB',
|
||||
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
|
||||
@ -202,3 +205,69 @@ class VidmeIE(InfoExtractor):
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class VidmeListBaseIE(InfoExtractor):
|
||||
# Max possible limit according to https://docs.vid.me/#api-Videos-List
|
||||
_LIMIT = 100
|
||||
|
||||
def _entries(self, user_id, user_name):
|
||||
for page_num in itertools.count(1):
|
||||
page = self._download_json(
|
||||
'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
|
||||
% (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
|
||||
user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
|
||||
|
||||
videos = page.get('videos', [])
|
||||
if not videos:
|
||||
break
|
||||
|
||||
for video in videos:
|
||||
video_url = video.get('full_url') or video.get('embed_url')
|
||||
if video_url:
|
||||
yield self.url_result(video_url, VidmeIE.ie_key())
|
||||
|
||||
total = int_or_none(page.get('page', {}).get('total'))
|
||||
if total and self._LIMIT * page_num >= total:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_name = self._match_id(url)
|
||||
|
||||
user_id = self._download_json(
|
||||
'https://api.vid.me/userByUsername?username=%s' % user_name,
|
||||
user_name)['user']['user_id']
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(user_id, user_name), user_id,
|
||||
'%s - %s' % (user_name, self._TITLE))
|
||||
|
||||
|
||||
class VidmeUserIE(VidmeListBaseIE):
|
||||
IE_NAME = 'vidme:user'
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
|
||||
_API_ITEM = 'list'
|
||||
_TITLE = 'Videos'
|
||||
_TEST = {
|
||||
'url': 'https://vid.me/EFARCHIVE',
|
||||
'info_dict': {
|
||||
'id': '3834632',
|
||||
'title': 'EFARCHIVE - %s' % _TITLE,
|
||||
},
|
||||
'playlist_mincount': 238,
|
||||
}
|
||||
|
||||
|
||||
class VidmeUserLikesIE(VidmeListBaseIE):
|
||||
IE_NAME = 'vidme:user:likes'
|
||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
|
||||
_API_ITEM = 'likes'
|
||||
_TITLE = 'Likes'
|
||||
_TEST = {
|
||||
'url': 'https://vid.me/ErinAlexis/likes',
|
||||
'info_dict': {
|
||||
'id': '6483530',
|
||||
'title': 'ErinAlexis - %s' % _TITLE,
|
||||
},
|
||||
'playlist_mincount': 415,
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
@ -13,6 +14,11 @@ class VidziIE(InfoExtractor):
|
||||
'id': 'cghql9yq6emu',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
||||
'uploader': 'vidzi.tv',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@ -20,19 +26,14 @@ class VidziIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_host = self._html_search_regex(
|
||||
r'id=\'vplayer\'><img src="http://(.*?)/i', webpage,
|
||||
'video host')
|
||||
video_hash = self._html_search_regex(
|
||||
r'\|([a-z0-9]+)\|hls\|type', webpage, 'video_hash')
|
||||
ext = self._html_search_regex(
|
||||
r'\|tracks\|([a-z0-9]+)\|', webpage, 'video ext')
|
||||
video_url = 'http://' + video_host + '/' + video_hash + '/v.' + ext
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||
|
||||
# Vidzi now uses jwplayer, which can be handled by GenericIE
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'url': smuggle_url(url, {'to_generic': True}),
|
||||
'ie_key': 'Generic',
|
||||
}
|
||||
|
@ -155,10 +155,10 @@ class ViewsterIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
synopsis = info.get('Synopsis', {})
|
||||
synopsis = info.get('Synopsis') or {}
|
||||
# Prefer title outside synopsis since it's less messy
|
||||
title = (info.get('Title') or synopsis['Title']).strip()
|
||||
description = synopsis.get('Detailed') or info.get('Synopsis', {}).get('Short')
|
||||
description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
|
||||
duration = int_or_none(info.get('Duration'))
|
||||
timestamp = parse_iso8601(info.get('ReleaseDate'))
|
||||
|
||||
|
@ -45,6 +45,10 @@ class ViideaIE(InfoExtractor):
|
||||
'upload_date': '20130627',
|
||||
'duration': 565,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video with invalid direct format links (HTTP 403)
|
||||
'url': 'http://videolectures.net/russir2010_filippova_nlp/',
|
||||
|
@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
def _extract_xsrft_and_vuid(self, webpage):
|
||||
xsrft = self._search_regex(
|
||||
r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
|
||||
webpage, 'login token', group='xsrft')
|
||||
vuid = self._search_regex(
|
||||
r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
|
||||
@ -430,7 +430,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
|
||||
source_name = source_file.get('public_name', 'Original')
|
||||
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
|
||||
ext = source_file.get('extension', determine_ext(download_url)).lower(),
|
||||
ext = source_file.get('extension', determine_ext(download_url)).lower()
|
||||
formats.append({
|
||||
'url': download_url,
|
||||
'ext': ext,
|
||||
|
@ -265,7 +265,7 @@ class VKIE(InfoExtractor):
|
||||
return self.url_result(pladform_url)
|
||||
|
||||
m_rutube = re.search(
|
||||
r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
|
||||
r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
|
||||
if m_rutube is not None:
|
||||
rutube_url = self._proto_relative_url(
|
||||
m_rutube.group(1).replace('\\', ''))
|
||||
@ -321,7 +321,7 @@ class VKIE(InfoExtractor):
|
||||
class VKUserVideosIE(InfoExtractor):
|
||||
IE_NAME = 'vk:uservideos'
|
||||
IE_DESC = "VK - User's Videos"
|
||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
|
||||
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
|
||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://vk.com/videos205387401',
|
||||
@ -333,6 +333,9 @@ class VKUserVideosIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://vk.com/videos-77521',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vk.com/videos-97664626?section=all',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user