Compare commits
266 Commits
2016.09.11
...
2016.10.19
Author | SHA1 | Date | |
---|---|---|---|
|
c8802041dd | ||
|
c7911009a0 | ||
|
2b96b06bf0 | ||
|
06b3fe2926 | ||
|
2c6743bf0f | ||
|
efb6242916 | ||
|
0384932e3d | ||
|
edd6074cea | ||
|
791d29dbf8 | ||
|
481cc7335c | ||
|
853a71b628 | ||
|
e2628fb6a0 | ||
|
df4939b1cd | ||
|
0b94dbb115 | ||
|
8d76bdf12b | ||
|
8204bacf1d | ||
|
47da782337 | ||
|
74324a7ac2 | ||
|
b0dfcab60a | ||
|
bbd7706898 | ||
|
112740e79f | ||
|
c0b1e88895 | ||
|
7cdfbbf9b8 | ||
|
ac943d48d3 | ||
|
73498a8921 | ||
|
9187ee4f19 | ||
|
2273e2c530 | ||
|
4b492e3579 | ||
|
9c4258bcec | ||
|
ea8aefd1d7 | ||
|
6edfc40a0e | ||
|
68d9561ca1 | ||
|
cfc0e7c82b | ||
|
4102e64051 | ||
|
f605242bfc | ||
|
d32fa0f12c | ||
|
a347a0d088 | ||
|
77c5b98dcd | ||
|
88ebefc054 | ||
|
2e638d7bca | ||
|
a26b174c61 | ||
|
73c801d660 | ||
|
dff5107b68 | ||
|
8c3e448e80 | ||
|
2ecbd2ad6f | ||
|
62a0b86e4f | ||
|
146969e05b | ||
|
e2004ccaf7 | ||
|
a5f8473145 | ||
|
b7f59a3bf6 | ||
|
580d411931 | ||
|
5c4bfd4da5 | ||
|
7104ae799c | ||
|
bcd6276520 | ||
|
591e384552 | ||
|
9feb1c9731 | ||
|
a093cfc78b | ||
|
6f20b65e72 | ||
|
cea364f70c | ||
|
55642487f0 | ||
|
3d643f4cec | ||
|
c452e69d3d | ||
|
555787d717 | ||
|
f165ca70eb | ||
|
27b8d2ee95 | ||
|
71cdcb2331 | ||
|
176006a120 | ||
|
65f4c1de3d | ||
|
b0082629a9 | ||
|
8204c73352 | ||
|
2b51dac1f9 | ||
|
f68901e50a | ||
|
3adb9d119e | ||
|
1dd58e14d8 | ||
|
dd4291f729 | ||
|
888f8d6ba4 | ||
|
f475e88121 | ||
|
3c6b3bf221 | ||
|
38588ab977 | ||
|
85bcdd081c | ||
|
9dcd6fd3aa | ||
|
98763ee354 | ||
|
3d83a1ae92 | ||
|
c0a7b9b348 | ||
|
831a34caa2 | ||
|
09b9c45e24 | ||
|
33898fb19c | ||
|
017eb82934 | ||
|
b1d798887e | ||
|
0a33bb2cb2 | ||
|
185744f92f | ||
|
7232e54813 | ||
|
6eb5503b12 | ||
|
539c881bfc | ||
|
c1b2a0858c | ||
|
215ff6e0f3 | ||
|
dcdb292fdd | ||
|
c1084ddb0c | ||
|
ee5de4e38e | ||
|
25291b979a | ||
|
567a5996ca | ||
|
6c152ce20f | ||
|
26406d33c7 | ||
|
703b3afa93 | ||
|
99ed78c79e | ||
|
fd15264172 | ||
|
bd26441205 | ||
|
b19e275d99 | ||
|
f6ba581f89 | ||
|
6d2549fb4f | ||
|
4da4516973 | ||
|
e1e97c2446 | ||
|
53a7e3d287 | ||
|
d54739a2e6 | ||
|
63e0fd5bcc | ||
|
9c51a24642 | ||
|
9bd7bd0b80 | ||
|
4a76b73c6c | ||
|
e295618f9e | ||
|
d7753d1948 | ||
|
eaf9b22f94 | ||
|
a1001f47fc | ||
|
1609782258 | ||
|
de6babf922 | ||
|
b0582fc806 | ||
|
af33dd8ee7 | ||
|
70d7b323b6 | ||
|
a7ee8a00f4 | ||
|
c6eed6b8c0 | ||
|
3aa3953d28 | ||
|
efa97bdcf1 | ||
|
475f8a4580 | ||
|
93aa0b6318 | ||
|
0ce26ef228 | ||
|
0d72ff9c51 | ||
|
a56e74e271 | ||
|
f533490bb7 | ||
|
8bfda726c2 | ||
|
8f0cf20ab9 | ||
|
c8f45f763c | ||
|
dd2cffeeec | ||
|
cdfcc4ce95 | ||
|
e384552590 | ||
|
1a2fbe322e | ||
|
f9dd86a112 | ||
|
2342733f85 | ||
|
93933c9819 | ||
|
d75d9e343e | ||
|
72c3d02d29 | ||
|
d3dbb46330 | ||
|
fffb9cff94 | ||
|
d3c97bad61 | ||
|
2d5b4af007 | ||
|
f1ee462c82 | ||
|
5742c18bc1 | ||
|
ddb19772d5 | ||
|
a3d8b38168 | ||
|
e590b7ff9e | ||
|
f3625cc4ca | ||
|
2d3d29976b | ||
|
493353c7fd | ||
|
0a078550b9 | ||
|
f92bb612c6 | ||
|
ddde91952f | ||
|
63c583eb2c | ||
|
7fd57de6fb | ||
|
e71a450956 | ||
|
27e99078d3 | ||
|
6f126d903f | ||
|
7518a61d41 | ||
|
8e45e1cc4d | ||
|
f0bc5a8609 | ||
|
a54ffb8aa7 | ||
|
8add4bfecb | ||
|
0711995bca | ||
|
5968d7d2fe | ||
|
e6332059ac | ||
|
8eec691e8a | ||
|
24628cf7db | ||
|
71ad00c09f | ||
|
45cae3b021 | ||
|
4ddcb5999d | ||
|
628406db96 | ||
|
e3d6bdc8fc | ||
|
0a439c5c4c | ||
|
1978540a51 | ||
|
12f211d0cb | ||
|
3a5a18705f | ||
|
1ae0ae5db0 | ||
|
f62a77b99a | ||
|
4bfd294e2f | ||
|
e33a7253b2 | ||
|
c38f06818d | ||
|
cb57386873 | ||
|
59fd8f931d | ||
|
70b4cf9b1b | ||
|
cc764a6da8 | ||
|
d8dbf8707d | ||
|
a1da888d0c | ||
|
3acff9423d | ||
|
9ca93b99d1 | ||
|
14ae11efab | ||
|
190d2027d0 | ||
|
26394d021d | ||
|
30d0b549be | ||
|
86f4d14f81 | ||
|
21d21b0c72 | ||
|
b4c1d6e800 | ||
|
a0d5077c8d | ||
|
584d6f3457 | ||
|
e14c82bd6b | ||
|
c51a7f0b2f | ||
|
d05ef09d9d | ||
|
30d9e20938 | ||
|
fc86d4eed0 | ||
|
7d273a387a | ||
|
6ad0219556 | ||
|
98b7506e96 | ||
|
52dc8a9b3f | ||
|
9d8985a165 | ||
|
f5e008d134 | ||
|
e6bf3621e7 | ||
|
490b755769 | ||
|
1dec2c8a0e | ||
|
dcce092e0a | ||
|
32443dd346 | ||
|
2133565cec | ||
|
1da50aa34e | ||
|
d2522b86ac | ||
|
537f753399 | ||
|
c849836854 | ||
|
eb5b1fc021 | ||
|
95be29e1c6 | ||
|
c035dba19e | ||
|
87148bb711 | ||
|
797c636bcb | ||
|
0002962f3f | ||
|
3e4185c396 | ||
|
f6717dec8a | ||
|
a942d6cb48 | ||
|
961516bfd1 | ||
|
6db354a9f4 | ||
|
353f340e11 | ||
|
014b7e6b25 | ||
|
925194022c | ||
|
b690ea15eb | ||
|
5712c0f426 | ||
|
86d68f906e | ||
|
4875ff6847 | ||
|
1b6712ab23 | ||
|
8414c2da31 | ||
|
45396dd2ed | ||
|
7a7309219c | ||
|
fcba157e80 | ||
|
a6ccc3e518 | ||
|
1d16035bb4 | ||
|
e8bcd982cc | ||
|
a5ff05df1a | ||
|
d002e91986 | ||
|
546edb2efa | ||
|
be45730226 | ||
|
ee7e672eb0 | ||
|
0307d6fba6 | ||
|
fc150cba1d | ||
|
d667ab7fad | ||
|
eb87d4545a |
8
.github/ISSUE_TEMPLATE.md
vendored
8
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.19**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.09.11
|
||||
[debug] youtube-dl version 2016.10.19
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
@@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
||||
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
|
||||
|
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
@@ -55,4 +55,4 @@ $ youtube-dl -v <your command line>
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
||||
Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
|
||||
If work on your *issue* required an account credentials please provide them or explain how one can obtain them.
|
||||
If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
|
||||
|
5
.github/PULL_REQUEST_TEMPLATE.md
vendored
5
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -10,8 +10,13 @@
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
|
||||
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
|
||||
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
|
||||
- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence)
|
||||
|
||||
### What is the purpose of your *pull request*?
|
||||
- [ ] Bug fix
|
||||
- [ ] Improvement
|
||||
- [ ] New extractor
|
||||
- [ ] New feature
|
||||
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,6 +29,7 @@ updates_key.pem
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.3gp
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
|
4
AUTHORS
4
AUTHORS
@@ -26,7 +26,7 @@ Albert Kim
|
||||
Pierre Rudloff
|
||||
Huarong Huo
|
||||
Ismael Mejía
|
||||
Steffan 'Ruirize' James
|
||||
Steffan Donal
|
||||
Andras Elso
|
||||
Jelle van der Waa
|
||||
Marcin Cieślak
|
||||
@@ -185,3 +185,5 @@ Aleksander Nitecki
|
||||
Sebastian Blunt
|
||||
Matěj Cepl
|
||||
Xie Yanbo
|
||||
Philip Xu
|
||||
John Hawkinson
|
||||
|
@@ -12,7 +12,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] Proxy map: {}
|
||||
...
|
||||
```
|
||||
**Do not post screenshots of verbose log only plain text is acceptable.**
|
||||
**Do not post screenshots of verbose logs; only plain text is acceptable.**
|
||||
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
@@ -66,7 +66,7 @@ Only post features that you (or an incapacitated friend you can personally talk
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
@@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make (both GNU make and BSD make are supported)
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
@@ -167,19 +167,19 @@ In any case, thank you very much for your contributions!
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -199,7 +199,7 @@ Assume at this point `meta`'s layout is:
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
@@ -211,7 +211,7 @@ and not like:
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
@@ -231,21 +231,21 @@ description = self._search_regex(
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
@@ -282,7 +282,7 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
|
223
ChangeLog
223
ChangeLog
@@ -1,10 +1,229 @@
|
||||
version 2016.09.11
|
||||
version 2016.10.19
|
||||
|
||||
Core
|
||||
+ [utils] Expose PACKED_CODES_RE
|
||||
+ [extractor/common] Extract non smil wowza mpd manifests
|
||||
+ [extractor/common] Detect f4m audio-only formats
|
||||
|
||||
Extractors
|
||||
* [vidzi] Fix extraction (#10908, #10952)
|
||||
* [urplay] Fix subtitle extraction
|
||||
+ [urplay] Add supprt for urskola.se (#10915)
|
||||
+ [orf] Add subtitles support (#10939)
|
||||
* [youtube] Fix --no-playlist behavior for youtu.be/id URLs (#10896)
|
||||
* [nrk] Relax URL regular expression (#10928)
|
||||
+ [nytimes] Add support for podcasts (#10926)
|
||||
* [pluralsight] Relax URL regular expression (#10941)
|
||||
|
||||
|
||||
version 2016.10.16
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Return correct filepath and ext in updated information
|
||||
in FFmpegExtractAudioPP (#10879)
|
||||
|
||||
Extractors
|
||||
+ [ruutu] Add support for supla.fi (#10849)
|
||||
+ [theoperaplatform] Add support for theoperaplatform.eu (#10914)
|
||||
* [lynda] Fix height for prioritized streams
|
||||
+ [lynda] Add fallback extraction scenario
|
||||
* [lynda] Switch to https (#10916)
|
||||
+ [huajiao] New extractor (#10917)
|
||||
* [cmt] Fix mgid extraction (#10813)
|
||||
+ [safari:course] Add support for techbus.safaribooksonline.com
|
||||
* [orf:tvthek] Fix extraction and modernize (#10898)
|
||||
* [chirbit] Fix extraction of user profile pages
|
||||
* [carambatv] Fix extraction
|
||||
* [canalplus] Fix extraction for some videos
|
||||
* [cbsinteractive] Fix extraction for cnet.com
|
||||
* [parliamentliveuk] Lower case URLs are now recognized (#10912)
|
||||
|
||||
|
||||
version 2016.10.12
|
||||
|
||||
Core
|
||||
+ Support HTML media elements without child nodes
|
||||
* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
|
||||
|
||||
Extractors
|
||||
* [dailymotion] Fix extraction (#10901)
|
||||
* [vimeo:review] Fix extraction (#10900)
|
||||
* [nhl] Correctly handle invalid formats (#10713)
|
||||
* [footyroom] Fix extraction (#10810)
|
||||
* [abc.net.au:iview] Fix for standalone (non series) videos (#10895)
|
||||
+ [hbo] Add support for episode pages (#10892)
|
||||
* [allocine] Fix extraction (#10860)
|
||||
+ [nextmedia] Recognize action news on AppleDaily
|
||||
* [lego] Improve info extraction and bypass geo restriction (#10872)
|
||||
|
||||
|
||||
version 2016.10.07
|
||||
|
||||
Extractors
|
||||
+ [iprima] Detect geo restriction
|
||||
* [facebook] Fix video extraction (#10846)
|
||||
+ [commonprotocols] Support direct MMS links (#10838)
|
||||
+ [generic] Add support for multiple vimeo embeds (#10862)
|
||||
+ [nzz] Add support for nzz.ch (#4407)
|
||||
+ [npo] Detect geo restriction
|
||||
+ [npo] Add support for 2doc.nl (#10842)
|
||||
+ [lego] Add support for lego.com (#10369)
|
||||
+ [tonline] Add support for t-online.de (#10376)
|
||||
* [techtalks] Relax URL regular expression (#10840)
|
||||
* [youtube:live] Extend URL regular expression (#10839)
|
||||
+ [theweatherchannel] Add support for weather.com (#7188)
|
||||
+ [thisoldhouse] Add support for thisoldhouse.com (#10837)
|
||||
+ [nhl] Add support for wch2016.com (#10833)
|
||||
* [pornoxo] Use JWPlatform to improve metadata extraction
|
||||
|
||||
|
||||
version 2016.10.02
|
||||
|
||||
Core
|
||||
* Fix possibly lost extended attributes during post-processing
|
||||
+ Support pyxattr as well as python-xattr for --xattrs and
|
||||
--xattr-set-filesize (#9054)
|
||||
|
||||
Extractors
|
||||
+ [jwplatform] Support DASH streams in JWPlayer
|
||||
+ [jwplatform] Support old-style JWPlayer playlists
|
||||
+ [byutv:event] Add extractor
|
||||
* [periscope:user] Fix extraction (#10820)
|
||||
* [dctp] Fix extraction (#10734)
|
||||
+ [instagram] Extract video dimensions (#10790)
|
||||
+ [tvland] Extend URL regular expression (#10812)
|
||||
+ [vgtv] Add support for tv.aftonbladet.se (#10800)
|
||||
- [aftonbladet] Remove extractor
|
||||
* [vk] Fix timestamp and view count extraction (#10760)
|
||||
+ [vk] Add support for running and finished live streams (#10799)
|
||||
+ [leeco] Recognize more Le Sports URLs (#10794)
|
||||
+ [instagram] Extract comments (#10788)
|
||||
+ [ketnet] Extract mzsource formats (#10770)
|
||||
* [limelight:media] Improve HTTP formats extraction
|
||||
|
||||
|
||||
version 2016.09.27
|
||||
|
||||
Core
|
||||
+ Add hdcore query parameter to akamai f4m formats
|
||||
+ Delegate HLS live streams downloading to ffmpeg
|
||||
+ Improved support for HTML5 subtitles
|
||||
|
||||
Extractors
|
||||
+ [vk] Add support for dailymotion embeds (#10661)
|
||||
* [promptfile] Fix extraction (#10634)
|
||||
* [kaltura] Speed up embed regular expressions (#10764)
|
||||
+ [npo] Add support for anderetijden.nl (#10754)
|
||||
+ [prosiebensat1] Add support for advopedia sites
|
||||
* [mwave] Relax URL regular expression (#10735, #10748)
|
||||
* [prosiebensat1] Fix playlist support (#10745)
|
||||
+ [prosiebensat1] Add support for sat1gold sites (#10745)
|
||||
+ [cbsnews:livevideo] Fix extraction and extract m3u8 formats
|
||||
+ [brightcove:new] Add support for live streams
|
||||
* [soundcloud] Generalize playlist entries extraction (#10733)
|
||||
+ [mtv] Add support for new URL schema (#8169, #9808)
|
||||
* [einthusan] Fix extraction (#10714)
|
||||
+ [twitter] Support Periscope embeds (#10737)
|
||||
+ [openload] Support subtitles (#10625)
|
||||
|
||||
|
||||
version 2016.09.24
|
||||
|
||||
Core
|
||||
+ Add support for watchTVeverywhere.com authentication provider based MSOs for
|
||||
Adobe Pass authentication (#10709)
|
||||
|
||||
Extractors
|
||||
+ [soundcloud:playlist] Provide video id for early playlist entries (#10733)
|
||||
+ [prosiebensat1] Add support for kabeleinsdoku (#10732)
|
||||
* [cbs] Extract info from thunder videoPlayerService (#10728)
|
||||
* [openload] Fix extraction (#10408)
|
||||
+ [ustream] Support the new HLS streams (#10698)
|
||||
+ [ooyala] Extract all HLS formats
|
||||
+ [cartoonnetwork] Add support for Adobe Pass authentication
|
||||
+ [soundcloud] Extract license metadata
|
||||
+ [fox] Add support for Adobe Pass authentication (#8584)
|
||||
+ [tbs] Add support for Adobe Pass authentication (#10642, #10222)
|
||||
+ [trutv] Add support for Adobe Pass authentication (#10519)
|
||||
+ [turner] Add support for Adobe Pass authentication
|
||||
|
||||
|
||||
version 2016.09.19
|
||||
|
||||
Extractors
|
||||
+ [crunchyroll] Check if already authenticated (#10700)
|
||||
- [twitch:stream] Remove fallback to profile extraction when stream is offline
|
||||
* [thisav] Improve title extraction (#10682)
|
||||
* [vyborymos] Improve station info extraction
|
||||
|
||||
|
||||
version 2016.09.18
|
||||
|
||||
Core
|
||||
+ Introduce manifest_url and fragments fields in formats dictionary for
|
||||
fragmented media
|
||||
+ Provide manifest_url field for DASH segments, HLS and HDS
|
||||
+ Provide fragments field for DASH segments
|
||||
* Rework DASH segments downloader to use fragments field
|
||||
+ Add helper method for Wowza Streaming Engine formats extraction
|
||||
|
||||
Extractors
|
||||
+ [vyborymos] Add extractor for vybory.mos.ru (#10692)
|
||||
+ [xfileshare] Add title regular expression for streamin.to (#10646)
|
||||
+ [globo:article] Add support for multiple videos (#10653)
|
||||
+ [thisav] Recognize HTML5 videos (#10447)
|
||||
* [jwplatform] Improve JWPlayer detection
|
||||
+ [mangomolo] Add support for Mangomolo embeds
|
||||
+ [toutv] Add support for authentication (#10669)
|
||||
* [franceinter] Fix upload date extraction
|
||||
* [tv4] Fix HLS and HDS formats extraction (#10659)
|
||||
|
||||
|
||||
version 2016.09.15
|
||||
|
||||
Core
|
||||
* Improve _hidden_inputs
|
||||
+ Introduce improved explicit Adobe Pass support
|
||||
+ Add --ap-mso to provide multiple-system operator identifier
|
||||
+ Add --ap-username to provide MSO account username
|
||||
+ Add --ap-password to provide MSO account password
|
||||
+ Add --ap-list-mso to list all supported MSOs
|
||||
+ Add support for Rogers Cable multiple-system operator (#10606)
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Fix authentication (#10655)
|
||||
* [twitch] Fix API calls (#10654, #10660)
|
||||
+ [bellmedia] Add support for more Bell Media Television sites
|
||||
* [franceinter] Fix extraction (#10538, #2105)
|
||||
* [kuwo] Improve error detection (#10650)
|
||||
+ [go] Add support for free full episodes (#10439)
|
||||
* [bilibili] Fix extraction for specific videos (#10647)
|
||||
* [nhk] Fix extraction (#10633)
|
||||
* [kaltura] Improve audio detection
|
||||
* [kaltura] Skip chun format
|
||||
+ [vimeo:ondemand] Pass Referer along with embed URL (#10624)
|
||||
+ [nbc] Add support for NBC Olympics (#10361)
|
||||
|
||||
|
||||
version 2016.09.11.1
|
||||
|
||||
Extractors
|
||||
+ [tube8] Extract categories and tags (#10579)
|
||||
+ [pornhub] Extract categories and tags (#10499)
|
||||
+ [foxnews] Support Fox News articles (#10598)
|
||||
* [openload] Temporary fix (#10408)
|
||||
+ [foxnews] Add support Fox News articles (#10598)
|
||||
* [viafree] Improve video id extraction (#10615)
|
||||
* [iwara] Fix extraction after relaunch (#10462, #3215)
|
||||
+ [tfo] Add extractor for tfo.org
|
||||
* [lrt] Fix audio extraction (#10566)
|
||||
* [9now] Fix extraction (#10561)
|
||||
+ [canalplus] Add support for c8.fr (#10577)
|
||||
* [newgrounds] Fix uploader extraction (#10584)
|
||||
+ [polskieradio:category] Add support for category lists (#10576)
|
||||
+ [ketnet] Add extractor for ketnet.be (#10343)
|
||||
+ [canvas] Add support for een.be (#10605)
|
||||
+ [telequebec] Add extractor for telequebec.tv (#1999)
|
||||
* [parliamentliveuk] Fix extraction (#9137)
|
||||
|
||||
|
||||
version 2016.09.08
|
||||
|
6
Makefile
6
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
|
||||
PYTHON ?= /usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
|
||||
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
|
||||
|
||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
install -d $(DESTDIR)$(BINDIR)
|
||||
@@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish
|
||||
|
||||
lazy-extractors: youtube_dl/extractor/lazy_extractors.py
|
||||
|
||||
_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
|
||||
_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
|
87
README.md
87
README.md
@@ -358,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso for
|
||||
a list of available MSOs
|
||||
--ap-username USERNAME Multiple-system operator account login
|
||||
--ap-password PASSWORD Multiple-system operator account password.
|
||||
If this option is left out, youtube-dl will
|
||||
ask interactively.
|
||||
--ap-list-mso List all supported multiple-system
|
||||
operators
|
||||
|
||||
## Post-processing Options:
|
||||
-x, --extract-audio Convert video files to audio-only files
|
||||
(requires ffmpeg or avconv and ffprobe or
|
||||
@@ -438,12 +449,12 @@ You can use `--ignore-config` if you want to disable the configuration file for
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
```
|
||||
After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
|
||||
After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
|
||||
```
|
||||
machine <extractor> login <login> password <password>
|
||||
```
|
||||
@@ -539,13 +550,13 @@ Available for the media that is a track or a part of a music album:
|
||||
- `disc_number`: Number of the disc or other physical medium the track belongs to
|
||||
- `release_year`: Year (YYYY) when the album was released
|
||||
|
||||
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
|
||||
Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
|
||||
To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
|
||||
To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
@@ -553,7 +564,7 @@ In some cases, you don't want special characters such as 中, spaces, or &, such
|
||||
|
||||
#### Output template and Windows batch files
|
||||
|
||||
If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
|
||||
If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
|
||||
|
||||
#### Output template examples
|
||||
|
||||
@@ -586,7 +597,7 @@ $ youtube-dl -o - BaW_jenozKc
|
||||
|
||||
By default youtube-dl tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, youtube-dl will guess it for you by **default**.
|
||||
|
||||
But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
|
||||
But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
|
||||
|
||||
The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
|
||||
@@ -594,21 +605,21 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM
|
||||
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
|
||||
|
||||
You can also use special names to select particular edge case format:
|
||||
- `best`: Select best quality format represented by single file with video and audio
|
||||
- `worst`: Select worst quality format represented by single file with video and audio
|
||||
- `bestvideo`: Select best quality video only format (e.g. DASH video), may not be available
|
||||
- `worstvideo`: Select worst quality video only format, may not be available
|
||||
- `bestaudio`: Select best quality audio only format, may not be available
|
||||
- `worstaudio`: Select worst quality audio only format, may not be available
|
||||
You can also use special names to select particular edge case formats:
|
||||
- `best`: Select the best quality format represented by a single file with video and audio.
|
||||
- `worst`: Select the worst quality format represented by a single file with video and audio.
|
||||
- `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
|
||||
- `worstvideo`: Select the worst quality video-only format. May not be available.
|
||||
- `bestaudio`: Select the best quality audio only-format. May not be available.
|
||||
- `worstaudio`: Select the worst quality audio only-format. May not be available.
|
||||
|
||||
For example, to download worst quality video only format you can use `-f worstvideo`.
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
|
||||
If you want to download several formats of the same video use comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or more sophisticated example combined with precedence feature `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
|
||||
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
|
||||
|
||||
@@ -630,15 +641,15 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster.
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
||||
|
||||
You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download best video only format, best audio only format and mux them together with ffmpeg/avconv.
|
||||
You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv.
|
||||
|
||||
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
@@ -717,7 +728,7 @@ Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging people](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||
|
||||
### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
|
||||
|
||||
@@ -891,7 +902,7 @@ If you want to find out whether a given URL is supported, simply call youtube-dl
|
||||
|
||||
# Why do I need to go through that much red tape when filing bugs?
|
||||
|
||||
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
|
||||
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
|
||||
|
||||
youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
|
||||
|
||||
@@ -912,7 +923,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make (both GNU make and BSD make are supported)
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
@@ -994,19 +1005,19 @@ In any case, thank you very much for your contributions!
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@@ -1026,7 +1037,7 @@ Assume at this point `meta`'s layout is:
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
@@ -1038,7 +1049,7 @@ and not like:
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
@@ -1058,21 +1069,21 @@ description = self._search_regex(
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappeares from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
@@ -1109,7 +1120,7 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
@@ -1179,7 +1190,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
|
||||
# BUGS
|
||||
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||
|
||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
```
|
||||
@@ -1195,7 +1206,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] Proxy map: {}
|
||||
...
|
||||
```
|
||||
**Do not post screenshots of verbose log only plain text is acceptable.**
|
||||
**Do not post screenshots of verbose logs; only plain text is acceptable.**
|
||||
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
@@ -1249,7 +1260,7 @@ Only post features that you (or an incapacitated friend you can personally talk
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
|
||||
# COPYRIGHT
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e
|
||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||
|
||||
read -p "Is ChangeLog up to date? (y/n) " -n 1
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make clean
|
||||
if $skip_tests ; then
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
#
|
||||
# youtube-dl documentation build configuration file, created by
|
||||
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
|
||||
|
@@ -34,12 +34,12 @@
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **AfreecaTV**: afreecatv.com
|
||||
- **Aftonbladet**
|
||||
- **AirMozilla**
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **AMCNetworks**
|
||||
- **anderetijden**: npo.nl and ntr.nl
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **AnySex**
|
||||
@@ -86,9 +86,10 @@
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BeatportPro**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
- **BellMedia**
|
||||
- **Bet**
|
||||
- **Bigflix**
|
||||
- **Bild**: Bild.de
|
||||
@@ -110,6 +111,7 @@
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **BYUtvEvent**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **CamWithHer**
|
||||
@@ -126,8 +128,8 @@
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CCTV**
|
||||
- **CDA**
|
||||
@@ -169,7 +171,6 @@
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **culturebox.francetvinfo.fr**
|
||||
- **CultureUnplugged**
|
||||
@@ -288,6 +289,7 @@
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **HBO**
|
||||
- **HBOEpisode**
|
||||
- **HearThisAt**
|
||||
- **Heise**
|
||||
- **HellPorno**
|
||||
@@ -306,6 +308,7 @@
|
||||
- **HowStuffWorks**
|
||||
- **HRTi**
|
||||
- **HRTiPlaylist**
|
||||
- **Huajiao**: 花椒直播
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
@@ -363,6 +366,7 @@
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
@@ -388,6 +392,8 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
@@ -422,8 +428,9 @@
|
||||
- **MPORA**
|
||||
- **MSN**
|
||||
- **mtg**: MTG services
|
||||
- **MTV**
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv:video**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@@ -445,6 +452,7 @@
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **NBCOlympics**
|
||||
- **NBCSports**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
@@ -502,6 +510,7 @@
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **NZZ**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
@@ -687,6 +696,7 @@
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
- **t-online.de**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
@@ -711,13 +721,16 @@
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
- **theoperaplatform**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **TheWeatherChannel**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
@@ -848,6 +861,7 @@
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VyboryMos**
|
||||
- **Walla**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
@@ -861,7 +875,7 @@
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **WNL**
|
||||
- **wnl**: npo.nl and ntr.nl
|
||||
- **WorldStarHipHop**
|
||||
- **wrzuta.pl**
|
||||
- **wrzuta.pl:playlist**
|
||||
|
2
setup.py
2
setup.py
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
@@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase):
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
@@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase):
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
|
@@ -40,6 +40,7 @@ from youtube_dl.utils import (
|
||||
js_to_json,
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
month_by_name,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
@@ -291,6 +292,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
|
||||
self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
|
||||
|
||||
def test_unified_timestamps(self):
|
||||
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
|
||||
@@ -311,6 +313,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -634,6 +637,14 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||
|
||||
def test_month_by_name(self):
|
||||
self.assertEqual(month_by_name(None), None)
|
||||
self.assertEqual(month_by_name('December', 'en'), 12)
|
||||
self.assertEqual(month_by_name('décembre', 'fr'), 12)
|
||||
self.assertEqual(month_by_name('December'), 12)
|
||||
self.assertEqual(month_by_name('décembre'), None)
|
||||
self.assertEqual(month_by_name('Unknown', 'unknown'), None)
|
||||
|
||||
def test_parse_codecs(self):
|
||||
self.assertEqual(parse_codecs(''), {})
|
||||
self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), {
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
@@ -131,6 +131,9 @@ class YoutubeDL(object):
|
||||
username: Username for authentication purposes.
|
||||
password: Password for authentication purposes.
|
||||
videopassword: Password for accessing a video.
|
||||
ap_mso: Adobe Pass multiple-system operator identifier.
|
||||
ap_username: Multiple-system operator account username.
|
||||
ap_password: Multiple-system operator account password.
|
||||
usenetrc: Use netrc for authentication instead.
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
|
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@@ -34,12 +34,14 @@ from .utils import (
|
||||
setproctitle,
|
||||
std_headers,
|
||||
write_string,
|
||||
render_table,
|
||||
)
|
||||
from .update import update_self
|
||||
from .downloader import (
|
||||
FileDownloader,
|
||||
)
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
|
||||
@@ -118,18 +120,26 @@ def _real_main(argv=None):
|
||||
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
|
||||
write_string(desc + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
if opts.ap_list_mso:
|
||||
table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
|
||||
write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
|
||||
sys.exit(0)
|
||||
|
||||
# Conflicting, missing and erroneous options
|
||||
if opts.usenetrc and (opts.username is not None or opts.password is not None):
|
||||
parser.error('using .netrc conflicts with giving username/password')
|
||||
if opts.password is not None and opts.username is None:
|
||||
parser.error('account username missing\n')
|
||||
if opts.ap_password is not None and opts.ap_username is None:
|
||||
parser.error('TV Provider account username missing\n')
|
||||
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
|
||||
parser.error('using output template conflicts with using title, video ID or auto number')
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error('using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
opts.password = compat_getpass('Type account password and press [Return]: ')
|
||||
if opts.ap_username is not None and opts.ap_password is None:
|
||||
opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
|
||||
if opts.ratelimit is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||
if numeric_limit is None:
|
||||
@@ -155,6 +165,8 @@ def _real_main(argv=None):
|
||||
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||
else:
|
||||
opts.max_sleep_interval = opts.sleep_interval
|
||||
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||
|
||||
def parse_retries(retries):
|
||||
if retries in ('inf', 'infinite'):
|
||||
@@ -254,8 +266,6 @@ def _real_main(argv=None):
|
||||
postprocessors.append({
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
})
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
if opts.embedthumbnail:
|
||||
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
|
||||
postprocessors.append({
|
||||
@@ -264,6 +274,10 @@ def _real_main(argv=None):
|
||||
})
|
||||
if not already_have_thumbnail:
|
||||
opts.writethumbnail = True
|
||||
# XAttrMetadataPP should be run after post-processors that may change file
|
||||
# contents
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
@@ -271,12 +285,6 @@ def _real_main(argv=None):
|
||||
'key': 'ExecAfterDownload',
|
||||
'exec_cmd': opts.exec_cmd,
|
||||
})
|
||||
if opts.xattr_set_filesize:
|
||||
try:
|
||||
import xattr
|
||||
xattr # Confuse flake8
|
||||
except ImportError:
|
||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||
external_downloader_args = None
|
||||
if opts.external_downloader_args:
|
||||
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
|
||||
@@ -293,6 +301,9 @@ def _real_main(argv=None):
|
||||
'password': opts.password,
|
||||
'twofactor': opts.twofactor,
|
||||
'videopassword': opts.videopassword,
|
||||
'ap_mso': opts.ap_mso,
|
||||
'ap_username': opts.ap_username,
|
||||
'ap_password': opts.ap_password,
|
||||
'quiet': (opts.quiet or any_getting or any_printing),
|
||||
'no_warnings': opts.no_warnings,
|
||||
'forceurl': opts.geturl,
|
||||
|
@@ -346,7 +346,6 @@ class FileDownloader(object):
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
print(min_sleep_interval, max_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||
time.sleep(sleep_interval)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
@@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD):
|
||||
FD_NAME = 'dashsegments'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
base_url = info_dict['url']
|
||||
segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls']
|
||||
initialization_url = info_dict.get('initialization_url')
|
||||
segments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(segment_urls) + (1 if initialization_url else 0),
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
def process_segment(segment, tmp_filename, fatal):
|
||||
target_url, segment_name = segment
|
||||
def process_segment(segment, tmp_filename, num):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % num
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = num == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
|
||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
@@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD):
|
||||
return False
|
||||
return True
|
||||
|
||||
segments_to_download = [(initialization_url, 'Init')] if initialization_url else []
|
||||
segments_to_download.extend([
|
||||
(segment_url, 'Seg%d' % i)
|
||||
for i, segment_url in enumerate(segment_urls)])
|
||||
|
||||
for i, segment in enumerate(segments_to_download):
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
if not process_segment(segment, ctx['tmpfilename'], fatal):
|
||||
for i, segment in enumerate(segments):
|
||||
if not process_segment(segment, ctx['tmpfilename'], i):
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
@@ -31,7 +31,7 @@ class HlsFD(FragmentFD):
|
||||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest):
|
||||
def can_download(manifest, info_dict):
|
||||
UNSUPPORTED_FEATURES = (
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
@@ -53,6 +53,7 @@ class HlsFD(FragmentFD):
|
||||
)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
|
||||
check_results.append(not info_dict.get('is_live'))
|
||||
return all(check_results)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
@@ -62,7 +63,7 @@ class HlsFD(FragmentFD):
|
||||
|
||||
s = manifest.decode('utf-8', 'ignore')
|
||||
|
||||
if not self.can_download(s):
|
||||
if not self.can_download(s, info_dict):
|
||||
self.report_warning(
|
||||
'hlsnative has detected features it does not support, '
|
||||
'extraction will be delegated to ffmpeg')
|
||||
|
@@ -13,6 +13,9 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
write_xattr,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
)
|
||||
|
||||
|
||||
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
import xattr
|
||||
xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
|
||||
except(OSError, IOError, ImportError) as err:
|
||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
|
||||
try:
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
class ABCIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au'
|
||||
_VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||
@@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||
'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'FA1505V024S00',
|
||||
'id': 'ZX9735A001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 27 Ep 24',
|
||||
'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
|
||||
'upload_date': '20160820',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1471719600,
|
||||
'title': 'Diaries Of A Broken Mind',
|
||||
'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
|
||||
'upload_date': '20161010',
|
||||
'uploader_id': 'abc2',
|
||||
'timestamp': 1476064920,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
@@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_params = self._parse_json(self._search_regex(
|
||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||
title = video_params['title']
|
||||
title = video_params.get('title') or video_params['seriesTitle']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
|
||||
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
|
||||
@@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': video_params.get('seriesTitle'),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
|
||||
'episode': self._html_search_meta('episode_title', webpage),
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,64 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||
'info_dict': {
|
||||
'id': '36015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'timestamp': 1394142732,
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
|
||||
player_config = self._parse_json(self._html_search_regex(
|
||||
r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
|
||||
internal_meta_id = player_config['aptomaVideoId']
|
||||
internal_meta_url = meta_url % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = format_url % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
formats = []
|
||||
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
|
||||
p = fmt['paths'][0]
|
||||
formats.append({
|
||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||
'ext': 'mp4',
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'tbr': int_or_none(fmt.get('bitrate')),
|
||||
'protocol': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json.get('imageUrl'),
|
||||
'description': internal_meta_json.get('shortPreamble'),
|
||||
'timestamp': int_or_none(internal_meta_json.get('timePublished')),
|
||||
'duration': int_or_none(internal_meta_json.get('duration')),
|
||||
'view_count': int_or_none(internal_meta_json.get('views')),
|
||||
}
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
|
@@ -1,29 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
xpath_element,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class AllocineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
||||
'md5': '0c9fcf59a841f65635fa300ac43d8269',
|
||||
'info_dict': {
|
||||
'id': '19546517',
|
||||
'display_id': '18635087',
|
||||
'ext': 'mp4',
|
||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||
'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
|
||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
@@ -31,64 +28,82 @@ class AllocineIE(InfoExtractor):
|
||||
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
|
||||
'info_dict': {
|
||||
'id': '19540403',
|
||||
'display_id': '19540403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
|
||||
'md5': '101250fb127ef9ca3d73186ff22a47ce',
|
||||
'info_dict': {
|
||||
'id': '19544709',
|
||||
'display_id': '19544709',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:601d15393ac40f249648ef000720e7e3',
|
||||
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||
'only_matching': True,
|
||||
'md5': '3566c0668c0235e2d224fd8edb389f67',
|
||||
'info_dict': {
|
||||
'id': '19550147',
|
||||
'ext': 'mp4',
|
||||
'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
|
||||
'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
typ = mobj.group('typ')
|
||||
display_id = mobj.group('id')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if typ == 'film':
|
||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||
else:
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
|
||||
if player:
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
else:
|
||||
model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
|
||||
model_data = self._parse_json(unescapeHTML(model), display_id)
|
||||
video_id = compat_str(model_data['id'])
|
||||
|
||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||
|
||||
video = xpath_element(xml, './/AcVisionVideo').attrib
|
||||
formats = []
|
||||
quality = qualities(['ld', 'md', 'hd'])
|
||||
|
||||
formats = []
|
||||
for k, v in video.items():
|
||||
if re.match(r'.+_path', k):
|
||||
format_id = k.split('_')[0]
|
||||
model = self._html_search_regex(
|
||||
r'data-model="([^"]+)"', webpage, 'data model', default=None)
|
||||
if model:
|
||||
model_data = self._parse_json(model, display_id)
|
||||
|
||||
for video_url in model_data['sources'].values():
|
||||
video_id, format_id = url_basename(video_url).split('_')[:2]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': v,
|
||||
'url': video_url,
|
||||
})
|
||||
|
||||
title = model_data['title']
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
|
||||
format_id = key[:-len('Path')]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': value,
|
||||
})
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title'
|
||||
).strip(), ' - AlloCiné')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['videoTitle'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
|
@@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires TV provider accounts',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -410,6 +410,22 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class TheOperaPlatformIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'theoperaplatform'
|
||||
_VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
|
||||
'md5': '970655901fa2e82e04c00b955e9afe7b',
|
||||
'info_dict': {
|
||||
'id': '060338-009-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verdi - OTELLO',
|
||||
'upload_date': '20160927',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||
|
@@ -50,25 +50,6 @@ class AWAANBaseIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
|
||||
formats = []
|
||||
format_url_base = 'http' + self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
|
||||
r'<a[^>]+href="rtsp(://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url_base + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url_base + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url_base + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
|
||||
class AWAANVideoIE(AWAANBaseIE):
|
||||
IE_NAME = 'awaan:video'
|
||||
@@ -85,6 +66,7 @@ class AWAANVideoIE(AWAANBaseIE):
|
||||
'duration': 2041,
|
||||
'timestamp': 1227504126,
|
||||
'upload_date': '20081124',
|
||||
'uploader_id': '71',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
|
||||
@@ -99,16 +81,18 @@ class AWAANVideoIE(AWAANBaseIE):
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), video_id)
|
||||
info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloVideo',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
@@ -138,16 +122,18 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
|
||||
compat_urllib_parse_urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
}), channel_id)
|
||||
info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
'countries': 'Q0M=',
|
||||
'filter': 'DENY',
|
||||
})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': 'MangomoloLive',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
|
@@ -103,7 +103,7 @@ class AzubuIE(InfoExtractor):
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
|
@@ -1028,7 +1028,7 @@ class BBCIE(BBCCoUkIE):
|
||||
|
||||
|
||||
class BBCCoUkArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
|
||||
IE_NAME = 'bbc.co.uk:article'
|
||||
IE_DESC = 'BBC articles'
|
||||
|
||||
|
@@ -8,10 +8,10 @@ from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BeatportProIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
class BeatportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
|
||||
'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
|
||||
'md5': 'b3c34d8639a2f6a7f734382358478887',
|
||||
'info_dict': {
|
||||
'id': '5379371',
|
||||
@@ -20,7 +20,7 @@ class BeatportProIE(InfoExtractor):
|
||||
'title': 'Froxic - Synesthesia (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
|
||||
'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
|
||||
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
|
||||
'info_dict': {
|
||||
'id': '3756896',
|
||||
@@ -29,7 +29,7 @@ class BeatportProIE(InfoExtractor):
|
||||
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
|
||||
'url': 'https://beatport.com/track/birds-original-mix/4991738',
|
||||
'md5': 'a1fd8e8046de3950fd039304c186c05f',
|
||||
'info_dict': {
|
||||
'id': '4991738',
|
@@ -6,8 +6,25 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)'
|
||||
class BellMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:
|
||||
ctv|
|
||||
tsn|
|
||||
bnn|
|
||||
thecomedynetwork|
|
||||
discovery|
|
||||
discoveryvelocity|
|
||||
sciencechannel|
|
||||
investigationdiscovery|
|
||||
animalplanet|
|
||||
bravo|
|
||||
mtv|
|
||||
space
|
||||
)\.ca|
|
||||
much\.com
|
||||
)/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
@@ -32,15 +49,27 @@ class CTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
'discoveryvelocity': 'discvel',
|
||||
'sciencechannel': 'discsci',
|
||||
'investigationdiscovery': 'invdisc',
|
||||
'animalplanet': 'aniplan',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
if domain == 'thecomedynetwork':
|
||||
domain = 'comedy'
|
||||
domain = domain.split('.')[0]
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:%s_web:%s' % (domain, video_id),
|
||||
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
_TEST = {
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||
'info_dict': {
|
||||
@@ -32,64 +32,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
'info_dict': {
|
||||
'id': '1041170',
|
||||
'ext': 'mp4',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'duration': 3382.259,
|
||||
'timestamp': 1396530060,
|
||||
'upload_date': '20140403',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '枫叶逝去',
|
||||
'uploader_id': '520116',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||
'info_dict': {
|
||||
'id': '4808130',
|
||||
'ext': 'mp4',
|
||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||
'duration': 1493.995,
|
||||
'timestamp': 1464564180,
|
||||
'upload_date': '20160529',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'uploader': '喜欢拉面',
|
||||
'uploader_id': '151066',
|
||||
},
|
||||
}, {
|
||||
# Missing upload time
|
||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||
'info_dict': {
|
||||
'id': '1867637',
|
||||
'ext': 'mp4',
|
||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||
'duration': 5760.0,
|
||||
'uploader': '黑夜为猫',
|
||||
'uploader_id': '610729',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
# Just to test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['upload time'],
|
||||
}, {
|
||||
'url': 'http://bangumi.bilibili.com/anime/v/40068',
|
||||
'md5': '08d539a0884f3deb7b698fb13ba69696',
|
||||
'info_dict': {
|
||||
'id': '40068',
|
||||
'ext': 'mp4',
|
||||
'duration': 1402.357,
|
||||
'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket',
|
||||
'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
|
||||
'thumbnail': 're:^http?://.+\.jpg',
|
||||
},
|
||||
}]
|
||||
}
|
||||
|
||||
_APP_KEY = '6f90a59ac58a4123'
|
||||
_BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326'
|
||||
@@ -124,7 +67,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'url': durl['url'],
|
||||
'filesize': int_or_none(durl['size']),
|
||||
}]
|
||||
for backup_url in durl['backup_url']:
|
||||
for backup_url in durl.get('backup_url', []):
|
||||
formats.append({
|
||||
'url': backup_url,
|
||||
# backup URLs have lower priorities
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -621,15 +621,21 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
'url': text_track['src'],
|
||||
})
|
||||
|
||||
is_live = False
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
if duration and duration < 0:
|
||||
is_live = True
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
|
||||
'duration': float_or_none(json_data.get('duration'), 1000),
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': account_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,15 +7,15 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||
'info_dict': {
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
'display_id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
|
||||
'title': 'Season 5 Episode 5',
|
||||
'description': 'md5:e07269172baff037f8e8bf9956bc9747',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 1486.486,
|
||||
},
|
||||
@@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
|
||||
ep = self._parse_json(
|
||||
episode_code, display_id, transform_source=lambda s:
|
||||
re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
|
||||
|
||||
if ep['providerType'] != 'Ooyala':
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
|
||||
|
||||
class BYUtvEventIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
|
||||
'info_dict': {
|
||||
'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toledo vs. BYU (9/30/16)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
episode_code = self._search_regex(
|
||||
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
|
||||
episode_json = re.sub(
|
||||
r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
|
||||
ep = json.loads(episode_json)
|
||||
|
||||
if ep['providerType'] == 'Ooyala':
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ep['providerId'],
|
||||
'id': video_id,
|
||||
'title': ep['title'],
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('Unsupported provider %s' % ep['provider'])
|
||||
ooyala_id = self._search_regex(
|
||||
r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala id', group='id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
|
||||
'title').strip()
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Ooyala',
|
||||
'url': 'ooyala:%s' % ooyala_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
|
@@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor):
|
||||
|
||||
|
||||
class CamdemyFolderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# links with trailing slash
|
||||
'url': 'http://www.camdemy.com/folder/450',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -6,11 +6,13 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
qualities,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||
'md5': '41f438a4904f7664b91b4ed0dec969dc',
|
||||
'info_dict': {
|
||||
'id': '1192814',
|
||||
'id': '1405510',
|
||||
'display_id': 'pid1830-c-zapping',
|
||||
'ext': 'mp4',
|
||||
'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
|
||||
'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
|
||||
'upload_date': '20150105',
|
||||
'title': 'Zapping - 02/07/2016',
|
||||
'description': 'Le meilleur de toutes les chaînes, tous les jours',
|
||||
'upload_date': '20160702',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
'info_dict': {
|
||||
'id': '1108190',
|
||||
'ext': 'flv',
|
||||
'title': 'Le labyrinthe - Boing super ranger',
|
||||
'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
|
||||
'ext': 'mp4',
|
||||
'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
|
||||
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
|
||||
'upload_date': '20140724',
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
}, {
|
||||
'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
|
||||
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
|
||||
'md5': '4b47b12b4ee43002626b97fad8fb1de5',
|
||||
'info_dict': {
|
||||
'id': '1390231',
|
||||
'id': '1420213',
|
||||
'display_id': 'pid6318-videos-integrales',
|
||||
'ext': 'mp4',
|
||||
'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
|
||||
'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
|
||||
'upload_date': '20160512',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
|
||||
'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
|
||||
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'info_dict': {
|
||||
'id': '1398334',
|
||||
'id': '1420176',
|
||||
'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'ext': 'mp4',
|
||||
'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
|
||||
'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
|
||||
'upload_date': '20160607',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
|
||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
@@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
|
||||
|
||||
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)'],
|
||||
webpage, 'video id', group='id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||
r'id=["\']canal_video_player(?P<id>\d+)'],
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||
|
@@ -71,7 +71,7 @@ class CanvasIE(InfoExtractor):
|
||||
webpage)).strip()
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
|
||||
r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id')
|
||||
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
|
@@ -9,6 +9,8 @@ from ..utils import (
|
||||
try_get,
|
||||
)
|
||||
|
||||
from .videomore import VideomoreIE
|
||||
|
||||
|
||||
class CarambaTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
|
||||
@@ -62,14 +64,16 @@ class CarambaTVPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
|
||||
'md5': '',
|
||||
'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
|
||||
'info_dict': {
|
||||
'id': '191910501',
|
||||
'ext': 'mp4',
|
||||
'id': '475222',
|
||||
'ext': 'flv',
|
||||
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2678.31,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
# duration reported by videomore is incorrect
|
||||
'duration': int,
|
||||
},
|
||||
'add_ie': [VideomoreIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -77,6 +81,16 @@ class CarambaTVPageIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
videomore_url = VideomoreIE._extract_url(webpage)
|
||||
if videomore_url:
|
||||
title = self._og_search_title(webpage)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': videomore_url,
|
||||
'ie_key': VideomoreIE.ie_key(),
|
||||
'title': title,
|
||||
}
|
||||
|
||||
video_url = self._og_search_property('video:iframe', webpage, default=None)
|
||||
|
||||
if not video_url:
|
||||
|
@@ -33,4 +33,10 @@ class CartoonNetworkIE(TurnerBaseIE):
|
||||
'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
|
||||
'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do',
|
||||
},
|
||||
}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': self._search_regex(
|
||||
r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
|
||||
webpage, 'auth required', default='false') == 'true',
|
||||
})
|
||||
|
@@ -4,7 +4,9 @@ from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
ExtractorError,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@@ -47,27 +49,49 @@ class CBSIE(CBSBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, guid):
|
||||
path = 'dJ5BDC/media/guid/2198311517/' + guid
|
||||
smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid)
|
||||
for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'):
|
||||
try:
|
||||
tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0])
|
||||
formats.extend(tp_formats)
|
||||
except ExtractorError:
|
||||
def _extract_video_info(self, content_id):
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
video_data = xpath_element(items_data, './/item')
|
||||
title = xpath_text(video_data, 'videoTitle', 'title', True)
|
||||
tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id
|
||||
tp_release_url = 'http://link.theplatform.com/s/' + tp_path
|
||||
|
||||
asset_types = []
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types:
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': asset_type,
|
||||
}
|
||||
if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
|
||||
query['formats'] = 'MPEG4,M3U'
|
||||
elif asset_type in ('RTMP', 'WIFI', '3G'):
|
||||
query['formats'] = 'MPEG4,FLV'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query(tp_release_url, query), content_id,
|
||||
'Downloading %s SMIL data' % asset_type)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
metadata = self._download_theplatform_metadata(path, guid)
|
||||
info = self._parse_theplatform_metadata(metadata)
|
||||
|
||||
info = self._extract_theplatform_metadata(tp_path, content_id)
|
||||
info.update({
|
||||
'id': guid,
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'series': xpath_text(video_data, 'seriesTitle'),
|
||||
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
|
||||
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
|
||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': xpath_text(video_data, 'previewImageURL'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': metadata.get('cbs$SeriesTitle'),
|
||||
'season_number': int_or_none(metadata.get('cbs$SeasonNumber')),
|
||||
'episode': metadata.get('cbs$EpisodeTitle'),
|
||||
'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
@@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
|
||||
r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
vdata = data.get('video') or data['videos'][0]
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CBSNewsIE(CBSIE):
|
||||
IE_NAME = 'cbsnews'
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
@@ -68,15 +69,16 @@ class CBSNewsIE(CBSIE):
|
||||
|
||||
|
||||
class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
IE_NAME = 'cbsnews:livevideo'
|
||||
IE_DESC = 'CBS News Live Videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)'
|
||||
|
||||
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
|
||||
_TEST = {
|
||||
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
|
||||
'info_dict': {
|
||||
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Clinton, Sanders Prepare To Face Off In NH',
|
||||
'duration': 334,
|
||||
},
|
||||
@@ -84,25 +86,22 @@ class CBSNewsLiveVideoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._download_json(
|
||||
'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={
|
||||
'device': 'desktop',
|
||||
'dvr_slug': display_id,
|
||||
})
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
|
||||
|
||||
hdcore_sign = 'hdcore=3.3.1'
|
||||
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
|
||||
if f4m_formats:
|
||||
for entry in f4m_formats:
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
self._sort_formats(f4m_formats)
|
||||
formats = self._extract_akamai_formats(video_info['url'], display_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': display_id,
|
||||
'display_id': display_id,
|
||||
'title': video_info['headline'],
|
||||
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
|
||||
'duration': parse_duration(video_info.get('segmentDur')),
|
||||
'formats': f4m_formats,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@ from .cbs import CBSBaseIE
|
||||
|
||||
|
||||
class CBSSportsIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
@@ -65,12 +66,11 @@ class ChirbitIE(InfoExtractor):
|
||||
|
||||
class ChirbitProfileIE(InfoExtractor):
|
||||
IE_NAME = 'chirbit:profile'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://chirbit.com/ScarletBeauty',
|
||||
'info_dict': {
|
||||
'id': 'ScarletBeauty',
|
||||
'title': 'Chirbits by ScarletBeauty',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
@@ -78,13 +78,10 @@ class ChirbitProfileIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
profile_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://chirbit.com/rss/%s' % profile_id, profile_id)
|
||||
webpage = self._download_webpage(url, profile_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(audio_url.text, 'Chirbit')
|
||||
for audio_url in rss.findall('./channel/item/link')]
|
||||
self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
|
||||
for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
|
||||
|
||||
title = rss.find('./channel/title').text
|
||||
|
||||
return self.playlist_result(entries, profile_id, title)
|
||||
return self.playlist_result(entries, profile_id)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,15 +11,15 @@ from ..utils import (
|
||||
class ClipfishIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '79bc922f3e8a9097b3d68a93780fd475',
|
||||
'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
|
||||
'md5': '720563e467b86374c194bdead08d207d',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'id': '4343170',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'description': 'Video zu FIFA 14: E3 2013 Trailer',
|
||||
'upload_date': '20130611',
|
||||
'duration': 82,
|
||||
'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
|
||||
'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
|
||||
'upload_date': '20161005',
|
||||
'duration': 1291,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
@@ -50,10 +51,14 @@ class ClipfishIE(InfoExtractor):
|
||||
'tbr': int_or_none(video_info.get('bitrate')),
|
||||
})
|
||||
|
||||
descr = video_info.get('descr')
|
||||
if descr:
|
||||
descr = descr.strip()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('descr'),
|
||||
'description': descr,
|
||||
'formats': formats,
|
||||
'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
|
||||
'duration': int_or_none(video_info.get('media_length')),
|
||||
|
@@ -1,9 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
|
||||
config_json = self._search_regex(
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||
'configuration')
|
||||
config = json.loads(config_json)
|
||||
'configuration'), video_id)
|
||||
|
||||
video_info = config['videoInfo']
|
||||
sources = config['sources']
|
||||
|
@@ -6,7 +6,7 @@ from ..utils import ExtractorError
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)'
|
||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -26,7 +26,7 @@ class CMTIE(MTVIE):
|
||||
'id': '1504699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
|
||||
'timestamp': 1469421000.0,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
@@ -42,3 +42,8 @@ class CMTIE(MTVIE):
|
||||
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||
|
||||
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(
|
||||
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
|
||||
webpage, 'mgid', group='mgid')
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -21,6 +21,7 @@ from ..compat import (
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -87,6 +88,9 @@ class InfoExtractor(object):
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* manifest_url
|
||||
The URL of the manifest file in case of
|
||||
fragmented media (DASH, hls, hds)
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
@@ -115,6 +119,11 @@ class InfoExtractor(object):
|
||||
download, lower-case.
|
||||
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||
"m3u8", "m3u8_native" or "http_dash_segments".
|
||||
* fragments A list of fragments of the fragmented media,
|
||||
with the following entries:
|
||||
* "url" (mandatory) - fragment's URL
|
||||
* "duration" (optional, int or float)
|
||||
* "filesize" (optional, int)
|
||||
* preference Order number of this format. If this field is
|
||||
present and not None, the formats get sorted
|
||||
by this field, regardless of all other values.
|
||||
@@ -226,7 +235,7 @@ class InfoExtractor(object):
|
||||
chapter_id: Id of the chapter the video belongs to, as a unicode string.
|
||||
|
||||
The following fields should only be used when the video is an episode of some
|
||||
series or programme:
|
||||
series, programme or podcast:
|
||||
|
||||
series: Title of the series or programme the video episode belongs to.
|
||||
season: Title of the season the video episode belongs to.
|
||||
@@ -674,33 +683,36 @@ class InfoExtractor(object):
|
||||
username = info[0]
|
||||
password = info[2]
|
||||
else:
|
||||
raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
|
||||
raise netrc.NetrcParseError(
|
||||
'No authenticators for %s' % netrc_machine)
|
||||
except (IOError, netrc.NetrcParseError) as err:
|
||||
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
|
||||
self._downloader.report_warning(
|
||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||
|
||||
return (username, password)
|
||||
return username, password
|
||||
|
||||
def _get_login_info(self):
|
||||
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
|
||||
"""
|
||||
Get the login info as (username, password)
|
||||
It will look in the netrc file using the _NETRC_MACHINE value
|
||||
First look for the manually specified credentials using username_option
|
||||
and password_option as keys in params dictionary. If no such credentials
|
||||
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
|
||||
value.
|
||||
If there's no info available, return (None, None)
|
||||
"""
|
||||
if self._downloader is None:
|
||||
return (None, None)
|
||||
|
||||
username = None
|
||||
password = None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
# Attempt to use provided username and password or .netrc data
|
||||
if downloader_params.get('username') is not None:
|
||||
username = downloader_params['username']
|
||||
password = downloader_params['password']
|
||||
if downloader_params.get(username_option) is not None:
|
||||
username = downloader_params[username_option]
|
||||
password = downloader_params[password_option]
|
||||
else:
|
||||
username, password = self._get_netrc_login_info()
|
||||
username, password = self._get_netrc_login_info(netrc_machine)
|
||||
|
||||
return (username, password)
|
||||
return username, password
|
||||
|
||||
def _get_tfa_info(self, note='two-factor verification code'):
|
||||
"""
|
||||
@@ -888,16 +900,16 @@ class InfoExtractor(object):
|
||||
def _hidden_inputs(html):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
hidden_inputs = {}
|
||||
for input in re.findall(r'(?i)<input([^>]+)>', html):
|
||||
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
|
||||
for input in re.findall(r'(?i)(<input[^>]+>)', html):
|
||||
attrs = extract_attributes(input)
|
||||
if not input:
|
||||
continue
|
||||
name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
|
||||
if not name:
|
||||
if attrs.get('type') not in ('hidden', 'submit'):
|
||||
continue
|
||||
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
|
||||
if not value:
|
||||
continue
|
||||
hidden_inputs[name.group('value')] = value.group('value')
|
||||
name = attrs.get('name') or attrs.get('id')
|
||||
value = attrs.get('value')
|
||||
if name and value is not None:
|
||||
hidden_inputs[name] = value
|
||||
return hidden_inputs
|
||||
|
||||
def _form_hidden_inputs(self, form_id, html):
|
||||
@@ -1088,6 +1100,13 @@ class InfoExtractor(object):
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||
'bootstrap info', default=None)
|
||||
|
||||
vcodec = None
|
||||
mime_type = xpath_text(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
|
||||
'base URL', default=None)
|
||||
if mime_type and mime_type.startswith('audio/'):
|
||||
vcodec = 'none'
|
||||
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
width = int_or_none(media_el.attrib.get('width'))
|
||||
@@ -1128,6 +1147,7 @@ class InfoExtractor(object):
|
||||
'width': f.get('width') or width,
|
||||
'height': f.get('height') or height,
|
||||
'format_id': f.get('format_id') if not tbr else format_id,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
formats.extend(f4m_formats)
|
||||
continue
|
||||
@@ -1139,10 +1159,12 @@ class InfoExtractor(object):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'ext': 'flv' if bootstrap_info is not None else None,
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
'preference': preference,
|
||||
})
|
||||
return formats
|
||||
@@ -1244,9 +1266,11 @@ class InfoExtractor(object):
|
||||
# format_id intact.
|
||||
if not live:
|
||||
format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
|
||||
manifest_url = format_url(line.strip())
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(line.strip()),
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
@@ -1518,9 +1542,10 @@ class InfoExtractor(object):
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}):
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
@@ -1541,42 +1566,52 @@ class InfoExtractor(object):
|
||||
|
||||
def extract_multisegment_info(element, ms_parent_info):
|
||||
ms_info = ms_parent_info.copy()
|
||||
|
||||
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
||||
# common attributes and elements. We will only extract relevant
|
||||
# for us.
|
||||
def extract_common(source):
|
||||
segment_timeline = source.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
start_number = source.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
timescale = source.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = source.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
|
||||
def extract_Initialization(source):
|
||||
initialization = source.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
|
||||
segment_list = element.find(_add_ns('SegmentList'))
|
||||
if segment_list is not None:
|
||||
extract_common(segment_list)
|
||||
extract_Initialization(segment_list)
|
||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||
if segment_urls_e:
|
||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
||||
initialization = segment_list.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
else:
|
||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||
if segment_template is not None:
|
||||
start_number = segment_template.get('startNumber')
|
||||
if start_number:
|
||||
ms_info['start_number'] = int(start_number)
|
||||
segment_timeline = segment_template.find(_add_ns('SegmentTimeline'))
|
||||
if segment_timeline is not None:
|
||||
s_e = segment_timeline.findall(_add_ns('S'))
|
||||
if s_e:
|
||||
ms_info['total_number'] = 0
|
||||
ms_info['s'] = []
|
||||
for s in s_e:
|
||||
r = int(s.get('r', 0))
|
||||
ms_info['total_number'] += 1 + r
|
||||
ms_info['s'].append({
|
||||
't': int(s.get('t', 0)),
|
||||
# @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
|
||||
'd': int(s.attrib['d']),
|
||||
'r': r,
|
||||
})
|
||||
else:
|
||||
timescale = segment_template.get('timescale')
|
||||
if timescale:
|
||||
ms_info['timescale'] = int(timescale)
|
||||
segment_duration = segment_template.get('duration')
|
||||
if segment_duration:
|
||||
ms_info['segment_duration'] = int(segment_duration)
|
||||
extract_common(segment_template)
|
||||
media_template = segment_template.get('media')
|
||||
if media_template:
|
||||
ms_info['media_template'] = media_template
|
||||
@@ -1584,11 +1619,14 @@ class InfoExtractor(object):
|
||||
if initialization:
|
||||
ms_info['initialization_url'] = initialization
|
||||
else:
|
||||
initialization = segment_template.find(_add_ns('Initialization'))
|
||||
if initialization is not None:
|
||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
||||
extract_Initialization(segment_template)
|
||||
return ms_info
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats = []
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
@@ -1631,6 +1669,7 @@ class InfoExtractor(object):
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
@@ -1645,9 +1684,7 @@ class InfoExtractor(object):
|
||||
}
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
|
||||
media_template = representation_ms_info['media_template']
|
||||
media_template = media_template.replace('$RepresentationID$', representation_id)
|
||||
media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
|
||||
@@ -1656,46 +1693,79 @@ class InfoExtractor(object):
|
||||
|
||||
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
|
||||
# can't be used at the same time
|
||||
if '%(Number' in media_template:
|
||||
representation_ms_info['segment_urls'] = [
|
||||
media_template % {
|
||||
if '%(Number' in media_template and 's' not in representation_ms_info:
|
||||
segment_duration = None
|
||||
if 'total_number' not in representation_ms_info and 'segment_duration':
|
||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
representation_ms_info['start_number'],
|
||||
representation_ms_info['total_number'] + representation_ms_info['start_number'])]
|
||||
else:
|
||||
representation_ms_info['segment_urls'] = []
|
||||
# $Number*$ or $Time$ in media template with S list available
|
||||
# Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
|
||||
# Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
|
||||
representation_ms_info['fragments'] = []
|
||||
segment_time = 0
|
||||
segment_d = None
|
||||
segment_number = representation_ms_info['start_number']
|
||||
|
||||
def add_segment_url():
|
||||
representation_ms_info['segment_urls'].append(
|
||||
media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
}
|
||||
)
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
|
||||
})
|
||||
|
||||
for num, s in enumerate(representation_ms_info['s']):
|
||||
segment_time = s.get('t') or segment_time
|
||||
segment_d = s['d']
|
||||
add_segment_url()
|
||||
segment_number += 1
|
||||
for r in range(s.get('r', 0)):
|
||||
segment_time += s['d']
|
||||
segment_time += segment_d
|
||||
add_segment_url()
|
||||
segment_time += s['d']
|
||||
if 'segment_urls' in representation_ms_info:
|
||||
segment_number += 1
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
s_num = 0
|
||||
for segment_url in representation_ms_info['segment_urls']:
|
||||
s = representation_ms_info['s'][s_num]
|
||||
for r in range(s.get('r', 0) + 1):
|
||||
fragments.append({
|
||||
'url': segment_url,
|
||||
'duration': float_or_none(s['d'], representation_ms_info['timescale']),
|
||||
})
|
||||
representation_ms_info['fragments'] = fragments
|
||||
# NB: MPD manifest may contain direct URLs to unfragmented media.
|
||||
# No fragments key is present in this case.
|
||||
if 'fragments' in representation_ms_info:
|
||||
f.update({
|
||||
'segment_urls': representation_ms_info['segment_urls'],
|
||||
'fragments': [],
|
||||
'protocol': 'http_dash_segments',
|
||||
})
|
||||
if 'initialization_url' in representation_ms_info:
|
||||
initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
|
||||
f.update({
|
||||
'initialization_url': initialization_url,
|
||||
})
|
||||
if not f.get('url'):
|
||||
f['url'] = initialization_url
|
||||
f['fragments'].append({'url': initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
for fragment in f['fragments']:
|
||||
fragment['url'] = combine_url(base_url, fragment['url'])
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
@@ -1741,7 +1811,11 @@ class InfoExtractor(object):
|
||||
return is_plain_url, formats
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
@@ -1768,7 +1842,7 @@ class InfoExtractor(object):
|
||||
for track_tag in re.findall(r'<track[^>]+>', media_content):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind == 'subtitles':
|
||||
if not kind or kind in ('subtitles', 'captions'):
|
||||
src = track_attributes.get('src')
|
||||
if not src:
|
||||
continue
|
||||
@@ -1776,22 +1850,70 @@ class InfoExtractor(object):
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
if media_info['formats']:
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id):
|
||||
formats = []
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(f4m_url, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
if 'hdcore=' not in f4m_url:
|
||||
f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False)
|
||||
for entry in f4m_formats:
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.extend(f4m_formats)
|
||||
m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
return formats
|
||||
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
|
||||
url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
|
||||
http_base_url = 'http' + url_base
|
||||
formats = []
|
||||
if 'm3u8' not in skip_protocols:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
http_base_url + '/playlist.m3u8', video_id, 'mp4',
|
||||
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
|
||||
if 'f4m' not in skip_protocols:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
http_base_url + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
if 'dash' not in skip_protocols:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
http_base_url + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
if re.search(r'(?:/smil:|\.smil)', url_base):
|
||||
if 'smil' not in skip_protocols:
|
||||
rtmp_formats = self._extract_smil_formats(
|
||||
http_base_url + '/jwplayer.smil',
|
||||
video_id, fatal=False)
|
||||
for rtmp_format in rtmp_formats:
|
||||
rtsp_format = rtmp_format.copy()
|
||||
rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
|
||||
del rtsp_format['play_path']
|
||||
del rtsp_format['ext']
|
||||
rtsp_format.update({
|
||||
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
|
||||
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
|
||||
'protocol': 'rtsp',
|
||||
})
|
||||
formats.extend([rtmp_format, rtsp_format])
|
||||
else:
|
||||
for protocol in ('rtmp', 'rtsp'):
|
||||
if protocol not in skip_protocols:
|
||||
formats.append({
|
||||
'url': protocol + url_base,
|
||||
'format_id': protocol,
|
||||
'protocol': protocol,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@@ -1912,6 +2034,12 @@ class InfoExtractor(object):
|
||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||
return headers
|
||||
|
||||
def _generic_id(self, url):
|
||||
return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
|
||||
def _generic_title(self, url):
|
||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -1,13 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class RtmpIE(InfoExtractor):
|
||||
@@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
video_id = self._generic_id(url)
|
||||
title = self._generic_title(url)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor):
|
||||
'format_id': compat_urlparse.urlparse(url).scheme,
|
||||
}],
|
||||
}
|
||||
|
||||
|
||||
class MmsIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'(?i)mms://.+'
|
||||
|
||||
_TEST = {
|
||||
# Direct MMS link
|
||||
'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
|
||||
'info_dict': {
|
||||
'id': 'MilesReid(0709)',
|
||||
'ext': 'wmv',
|
||||
'title': 'MilesReid(0709)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # rtsp downloads, requiring mplayer or mpv
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._generic_id(url)
|
||||
title = self._generic_title(url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
}
|
||||
|
@@ -1,13 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CriterionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
|
||||
_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.criterion.com/films/184-le-samourai',
|
||||
'md5': 'bc51beba55685509883a9a7830919ec3',
|
||||
@@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Samouraï',
|
||||
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
final_url = self._search_regex(
|
||||
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._search_regex(
|
||||
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
webpage, 'thumbnail url')
|
||||
|
||||
return {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -34,22 +34,58 @@ from ..aes import (
|
||||
|
||||
|
||||
class CrunchyrollBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||
_LOGIN_FORM = 'login_form'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://www.crunchyroll.com/?a=formhandler'
|
||||
data = urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'name': username,
|
||||
'password': password,
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
def is_logged(webpage):
|
||||
return '<title>Redirecting' in webpage
|
||||
|
||||
# Already logged in
|
||||
if is_logged(login_page):
|
||||
return
|
||||
|
||||
login_form_str = self._search_regex(
|
||||
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||
login_page, 'login form', group='form')
|
||||
|
||||
post_url = extract_attributes(login_form_str).get('action')
|
||||
if not post_url:
|
||||
post_url = self._LOGIN_URL
|
||||
elif not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
|
||||
|
||||
login_form.update({
|
||||
'login_form[name]': username,
|
||||
'login_form[password]': password,
|
||||
})
|
||||
login_request = sanitized_Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', 'Wrong login info',
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if is_logged(response):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
'(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -114,6 +150,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
|
||||
'info_dict': {
|
||||
|
@@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||
'uploader': 'HotWaves1012',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# geo-restricted, player v5
|
||||
{
|
||||
@@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
player_v5 = self._search_regex(
|
||||
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||
r'buildPlayer\(({.+?})\);'],
|
||||
r'buildPlayer\(({.+?})\);',
|
||||
r'var\s+config\s*=\s*({.+?});'],
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@@ -1,61 +1,54 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||
_TEST = {
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'md5': '174dd4a8a6225cf5655952f969cfbe24',
|
||||
'info_dict': {
|
||||
'id': '1324',
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||
'ext': 'mp4',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'upload_date': '20110407',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
|
||||
version_json = self._download_json(
|
||||
base_url + 'version.json',
|
||||
video_id, note='Determining file version')
|
||||
version = version_json['version_name']
|
||||
info_json = self._download_json(
|
||||
'{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
|
||||
video_id, note='Fetching object ID')
|
||||
object_id = compat_str(info_json['object_id'])
|
||||
meta_json = self._download_json(
|
||||
'{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
|
||||
video_id, note='Downloading metadata')
|
||||
uuid = meta_json['uuid']
|
||||
title = meta_json['title']
|
||||
wide = meta_json['is_wide']
|
||||
if wide:
|
||||
ratio = '16x9'
|
||||
else:
|
||||
ratio = '4x3'
|
||||
play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
object_id = self._html_search_meta('DC.identifier', webpage)
|
||||
|
||||
servers_json = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/',
|
||||
'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
|
||||
video_id, note='Downloading server list')
|
||||
url = servers_json[0]['endpoint']
|
||||
server = servers_json[0]['server']
|
||||
m3u8_path = self._search_regex(
|
||||
r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
|
||||
formats = self._extract_m3u8_formats(
|
||||
'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('DC.description', webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('DC.date.created', webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': object_id,
|
||||
'title': title,
|
||||
'format': 'rtmp',
|
||||
'url': url,
|
||||
'play_path': play_path,
|
||||
'rtmp_real_time': True,
|
||||
'ext': 'flv',
|
||||
'display_id': video_id
|
||||
'formats': formats,
|
||||
'display_id': video_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DemocracynowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)'
|
||||
_VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)'
|
||||
IE_NAME = 'democracynow'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.democracynow.org/shows/2015/7/3',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
@@ -14,7 +14,7 @@ class EinthusanIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'af244f4458cd667205e513d75da5b8b1',
|
||||
'md5': 'd71379996ff5b7f217eca034c34e3461',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
@@ -25,13 +25,13 @@ class EinthusanIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'ef63c7a803e22315880ed182c10d1c5c',
|
||||
'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
|
||||
'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c',
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -50,9 +50,11 @@ class EinthusanIE(InfoExtractor):
|
||||
video_id = self._search_regex(
|
||||
r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
|
||||
|
||||
video_url = self._download_webpage(
|
||||
m3u8_url = self._download_webpage(
|
||||
'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
|
||||
% video_id, video_id)
|
||||
% video_id, video_id, headers={'Referer': url})
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native')
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
@@ -64,7 +66,7 @@ class EinthusanIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -4,7 +4,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class EngadgetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
|
@@ -8,7 +8,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ExpoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916',
|
||||
'md5': 'fe1d728c3a813ff78f595bc8b7a707a8',
|
||||
|
@@ -31,7 +31,6 @@ from .aenetworks import (
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
@@ -67,6 +66,7 @@ from .arte import (
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
TheOperaPlatformIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
@@ -93,7 +93,8 @@ from .bbc import (
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .beatportpro import BeatportProIE
|
||||
from .bellmedia import BellMediaIE
|
||||
from .beatport import BeatportIE
|
||||
from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
@@ -116,7 +117,10 @@ from .brightcove import (
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .byutv import (
|
||||
BYUtvIE,
|
||||
BYUtvEventIE,
|
||||
)
|
||||
from .c56 import C56IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
@@ -183,7 +187,10 @@ from .comedycentral import (
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
from .commonprotocols import (
|
||||
MmsIE,
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
@@ -195,7 +202,6 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
@@ -343,7 +349,10 @@ from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .hbo import HBOIE
|
||||
from .hbo import (
|
||||
HBOIE,
|
||||
HBOEpisodeIE,
|
||||
)
|
||||
from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
@@ -364,6 +373,7 @@ from .hrti import (
|
||||
HRTiIE,
|
||||
HRTiPlaylistIE,
|
||||
)
|
||||
from .huajiao import HuajiaoIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
@@ -435,6 +445,7 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
@@ -472,6 +483,10 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .meta import METAIE
|
||||
@@ -512,6 +527,7 @@ from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVVideoIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
)
|
||||
@@ -534,6 +550,7 @@ from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
@@ -606,13 +623,14 @@ from .nowtv import (
|
||||
)
|
||||
from .noz import NozIE
|
||||
from .npo import (
|
||||
AndereTijdenIE,
|
||||
NPOIE,
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
SchoolTVIE,
|
||||
VPROIE,
|
||||
WNLIE
|
||||
WNLIE,
|
||||
)
|
||||
from .npr import NprIE
|
||||
from .nrk import (
|
||||
@@ -628,6 +646,7 @@ from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzz import NZZIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
@@ -881,8 +900,10 @@ from .theplatform import (
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcDeIE
|
||||
@@ -897,6 +918,7 @@ from .tnaflix import (
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@@ -1064,6 +1086,7 @@ from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
|
@@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -2,25 +2,27 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .streamable import StreamableIE
|
||||
|
||||
|
||||
class FootyRoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||
'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
|
||||
'info_dict': {
|
||||
'id': 'schalke-04-0-2-real-madrid-2015-02',
|
||||
'title': 'Schalke 04 0 – 2 Real Madrid',
|
||||
'id': '79922154',
|
||||
'title': 'VIDEO Hull City 0 - 2 Chelsea',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Video for this match is not available',
|
||||
'playlist_count': 2,
|
||||
'add_ie': [StreamableIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
|
||||
'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
|
||||
'info_dict': {
|
||||
'id': 'georgia-0-2-germany-2015-03',
|
||||
'title': 'Georgia 0 – 2 Germany',
|
||||
'id': '75817984',
|
||||
'title': 'VIDEO Georgia 0 - 2 Germany',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'add_ie': ['Playwire']
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
|
||||
playlist = self._parse_json(self._search_regex(
|
||||
r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
|
||||
playlist_id)
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
@@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor):
|
||||
payload = video.get('payload')
|
||||
if not payload:
|
||||
continue
|
||||
playwire_url = self._search_regex(
|
||||
playwire_url = self._html_search_regex(
|
||||
r'data-config="([^"]+)"', payload,
|
||||
'playwire url', default=None)
|
||||
if playwire_url:
|
||||
entries.append(self.url_result(self._proto_relative_url(
|
||||
playwire_url, 'http:'), 'Playwire'))
|
||||
|
||||
streamable_url = StreamableIE._extract_url(payload)
|
||||
if streamable_url:
|
||||
entries.append(self.url_result(
|
||||
streamable_url, StreamableIE.ie_key()))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
@@ -11,9 +11,13 @@ class Formula1IE(InfoExtractor):
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
|
@@ -1,14 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FOXIE(InfoExtractor):
|
||||
class FOXIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
||||
@@ -30,14 +30,26 @@ class FOXIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
release_url = self._parse_json(self._search_regex(
|
||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||
video_id)['release_url']
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), video_id)
|
||||
fox_pdk_player = settings['fox_pdk_player']
|
||||
release_url = fox_pdk_player['release_url']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'switch': 'http'
|
||||
}
|
||||
if fox_pdk_player.get('access') == 'locked':
|
||||
ap_p = settings['foxAdobePassProvider']
|
||||
rating = ap_p.get('videoRating')
|
||||
if rating == 'n/a':
|
||||
rating = None
|
||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(
|
||||
release_url, {'switch': 'http'}), {'force_smil_url': True}),
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
@@ -2,21 +2,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import month_by_name
|
||||
|
||||
|
||||
class FranceInterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||
'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'md5': '9e54d7bdb6fdc02a841007f8a975c094',
|
||||
'info_dict': {
|
||||
'id': '793962',
|
||||
'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016',
|
||||
'ext': 'mp3',
|
||||
'title': 'L’Histoire dans les jeux vidéo',
|
||||
'description': 'md5:7e93ddb4451e7530022792240a3049c7',
|
||||
'timestamp': 1387369800,
|
||||
'upload_date': '20131218',
|
||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||
'upload_date': '20160907',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -25,23 +25,30 @@ class FranceInterIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
path = self._search_regex(
|
||||
r'<a id="player".+?href="([^"]+)"', webpage, 'video url')
|
||||
video_url = 'http://www.franceinter.fr/' + path
|
||||
video_url = self._search_regex(
|
||||
r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video url', group='url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<span class="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'data-date="(\d+)"', webpage, 'upload date', fatal=False))
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date_str:
|
||||
upload_date_list = upload_date_str.split()
|
||||
upload_date_list.reverse()
|
||||
upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0)
|
||||
upload_date_list[2] = '%02d' % int(upload_date_list[2])
|
||||
upload_date = ''.join(upload_date_list)
|
||||
else:
|
||||
upload_date = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'upload_date': upload_date,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
'vcodec': 'none',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@@ -8,7 +8,7 @@ from .common import InfoExtractor
|
||||
|
||||
class FreespeechIE(InfoExtractor):
|
||||
IE_NAME = 'freespeech.org'
|
||||
_VALID_URL = r'https://www\.freespeech\.org/video/(?P<title>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)'
|
||||
_TEST = {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GameStarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
|
||||
'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@@ -27,7 +27,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_basename,
|
||||
xpath_text,
|
||||
)
|
||||
from .brightcove import (
|
||||
@@ -1369,6 +1368,11 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
},
|
||||
{
|
||||
# generic vimeo embed that requires original URL passed as Referer
|
||||
'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
@@ -1544,7 +1548,7 @@ class GenericIE(InfoExtractor):
|
||||
force_videoid = smuggled_data['force_videoid']
|
||||
video_id = force_videoid
|
||||
else:
|
||||
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
video_id = self._generic_id(url)
|
||||
|
||||
self.to_screen('%s: Requesting header' % video_id)
|
||||
|
||||
@@ -1573,7 +1577,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
|
||||
'title': self._generic_title(url),
|
||||
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
||||
}
|
||||
|
||||
@@ -1652,7 +1656,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc, video_id, mpd_base_url=url.rpartition('/')[0])
|
||||
doc, video_id,
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||
@@ -1747,9 +1753,9 @@ class GenericIE(InfoExtractor):
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
|
||||
if vimeo_url is not None:
|
||||
return self.url_result(vimeo_url)
|
||||
vimeo_urls = VimeoIE._extract_urls(url, webpage)
|
||||
if vimeo_urls:
|
||||
return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
@@ -2249,6 +2255,35 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
|
||||
|
||||
# Look for Mangomolo embeds
|
||||
mobj = re.search(
|
||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
|
||||
(?:
|
||||
video\?.*?\bid=(?P<video_id>\d+)|
|
||||
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||
).+?)\1''', webpage)
|
||||
if mobj is not None:
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': video_thumbnail,
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
video_id = mobj.group('video_id')
|
||||
if video_id:
|
||||
info.update({
|
||||
'ie_key': 'MangomoloVideo',
|
||||
'id': video_id,
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'ie_key': 'MangomoloLive',
|
||||
'id': mobj.group('channel_id'),
|
||||
})
|
||||
return info
|
||||
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
if instagram_embed_url is not None:
|
||||
@@ -2296,12 +2331,23 @@ class GenericIE(InfoExtractor):
|
||||
info_dict.update(json_ld)
|
||||
return info_dict
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
for entry in entries:
|
||||
entry.update({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
})
|
||||
self._sort_formats(entry['formats'])
|
||||
return self.playlist_result(entries)
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||
|
||||
def filter_video(urls):
|
||||
return list(filter(check_video, urls))
|
||||
@@ -2351,9 +2397,6 @@ class GenericIE(InfoExtractor):
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
||||
if not found:
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -14,6 +15,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
@@ -63,6 +65,9 @@ class GloboIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'globo:3607726',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
class MD5(object):
|
||||
@@ -396,7 +401,7 @@ class GloboIE(InfoExtractor):
|
||||
|
||||
|
||||
class GloboArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
|
||||
|
||||
_VIDEOID_REGEXES = [
|
||||
r'\bdata-video-id=["\'](\d{7,})',
|
||||
@@ -408,15 +413,20 @@ class GloboArticleIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
|
||||
'md5': '307fdeae4390ccfe6ba1aa198cf6e72b',
|
||||
'info_dict': {
|
||||
'id': '3652183',
|
||||
'ext': 'mp4',
|
||||
'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião',
|
||||
'duration': 110.711,
|
||||
'uploader': 'Rede Globo',
|
||||
'uploader_id': '196',
|
||||
}
|
||||
'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
|
||||
'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
|
||||
'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
|
||||
'info_dict': {
|
||||
'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
|
||||
'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
|
||||
'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
|
||||
'only_matching': True,
|
||||
@@ -435,5 +445,12 @@ class GloboArticleIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id')
|
||||
return self.url_result('globo:%s' % video_id, 'Globo')
|
||||
video_ids = []
|
||||
for video_regex in self._VIDEOID_REGEXES:
|
||||
video_ids.extend(re.findall(video_regex, webpage))
|
||||
entries = [
|
||||
self.url_result('globo:%s' % video_id, GloboIE.ie_key())
|
||||
for video_id in orderedSet(video_ids)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
return self.playlist_result(entries, display_id, title, description)
|
||||
|
@@ -8,6 +8,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
parse_age_limit,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,7 +21,7 @@ class GoIE(InfoExtractor):
|
||||
'watchdisneyjunior': '008',
|
||||
'watchdisneyxd': '009',
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'info_dict': {
|
||||
@@ -38,9 +40,13 @@ class GoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id')
|
||||
brand = self._BRANDS[sub_domain]
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id),
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||
video_id)['video'][0]
|
||||
title = video_data['title']
|
||||
|
||||
@@ -52,6 +58,21 @@ class GoIE(InfoExtractor):
|
||||
format_id = asset.get('format')
|
||||
ext = determine_ext(asset_url)
|
||||
if ext == 'm3u8':
|
||||
video_type = video_data.get('type')
|
||||
if video_type == 'lf':
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
video_id, data=urlencode_postdata({
|
||||
'video_id': video_data['id'],
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
}))
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
||||
else:
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import unified_strdate
|
||||
|
||||
class GooglePlusIE(InfoExtractor):
|
||||
IE_DESC = 'Google Plus'
|
||||
_VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||
IE_NAME = 'plus.google'
|
||||
_TEST = {
|
||||
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GoshgayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
|
||||
_VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
|
||||
'md5': '4b6db9a0a333142eb9f15913142b0ed1',
|
||||
|
@@ -1,11 +1,11 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HarkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+'
|
||||
_VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
|
||||
'md5': '6783a58491b47b92c7c1af5a77d4cbee',
|
||||
|
@@ -12,17 +12,7 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class HBOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
||||
'info_dict': {
|
||||
'id': '1437839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ep. 64 Clip: Encryption',
|
||||
}
|
||||
}
|
||||
class HBOBaseIE(InfoExtractor):
|
||||
_FORMATS_INFO = {
|
||||
'1920': {
|
||||
'width': 1280,
|
||||
@@ -50,8 +40,7 @@ class HBOIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _extract_from_id(self, video_id):
|
||||
video_data = self._download_xml(
|
||||
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
|
||||
title = xpath_text(video_data, 'title', 'title', True)
|
||||
@@ -116,7 +105,60 @@ class HBOIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
|
||||
'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
|
||||
class HBOIE(HBOBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
|
||||
'md5': '1c33253f0c7782142c993c0ba62a8753',
|
||||
'info_dict': {
|
||||
'id': '1437839',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ep. 64 Clip: Encryption',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 1072,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_id(video_id)
|
||||
|
||||
|
||||
class HBOEpisodeIE(HBOBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
|
||||
'md5': '689132b253cc0ab7434237fc3a293210',
|
||||
'info_dict': {
|
||||
'id': '1439518',
|
||||
'display_id': 'ep-52-inside-the-episode',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ep. 52: Inside the Episode',
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'duration': 240,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
|
||||
webpage, 'video ID', group='video_id')
|
||||
|
||||
info_dict = self._extract_from_id(video_id)
|
||||
info_dict['display_id'] = display_id
|
||||
|
||||
return info_dict
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
|
||||
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
|
56
youtube_dl/extractor/huajiao.py
Normal file
56
youtube_dl/extractor/huajiao.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class HuajiaoIE(InfoExtractor):
|
||||
IE_DESC = '花椒直播'
|
||||
_VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.huajiao.com/l/38941232',
|
||||
'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
|
||||
'info_dict': {
|
||||
'id': '38941232',
|
||||
'ext': 'mp4',
|
||||
'title': '#新人求关注#',
|
||||
'description': 're:.*',
|
||||
'duration': 2424.0,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'timestamp': 1475866459,
|
||||
'upload_date': '20161007',
|
||||
'uploader': 'Penny_余姿昀',
|
||||
'uploader_id': '75206005',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
feed_json = self._search_regex(
|
||||
r'var\s+feed\s*=\s*({.+})', webpage, 'feed json')
|
||||
feed = self._parse_json(feed_json, video_id)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description', fatal=False)
|
||||
|
||||
def get(section, field):
|
||||
return feed.get(section, {}).get(field)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': feed['feed']['formated_title'],
|
||||
'description': description,
|
||||
'duration': parse_duration(get('feed', 'duration')),
|
||||
'thumbnail': get('feed', 'image'),
|
||||
'timestamp': parse_iso8601(feed.get('creatime'), ' '),
|
||||
'uploader': get('author', 'nickname'),
|
||||
'uploader_id': get('author', 'uid'),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
|
||||
}
|
@@ -94,7 +94,7 @@ class ImdbIE(InfoExtractor):
|
||||
class ImdbListIE(InfoExtractor):
|
||||
IE_NAME = 'imdb:list'
|
||||
IE_DESC = 'Internet Movie Database lists'
|
||||
_VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
||||
_VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
|
||||
_TEST = {
|
||||
'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
|
||||
'info_dict': {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': 'Naomi Leonor Phan-Quang',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
}, {
|
||||
# missing description
|
||||
@@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': 'Britney Spears',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count) = [None] * 8
|
||||
uploader_id, like_count, comment_count, height, width) = [None] * 10
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
|
||||
shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
timestamp = int_or_none(media.get('date'))
|
||||
@@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
like_count = int_or_none(media.get('likes', {}).get('count'))
|
||||
comment_count = int_or_none(media.get('comments', {}).get('count'))
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
|
||||
if not uploader_id:
|
||||
uploader_id = self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
|
||||
@@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'formats': formats,
|
||||
'ext': 'mp4',
|
||||
'title': 'Video by %s' % uploader_id,
|
||||
'description': description,
|
||||
@@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'comments': comments,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor):
|
||||
for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
|
||||
extract_formats(src)
|
||||
|
||||
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# coding=utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
@@ -19,24 +20,32 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
# TODO: Merge this with JWPlayer-related codes in generic.py
|
||||
|
||||
mobj = re.search(
|
||||
'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('options')
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._find_jwplayer_data(webpage), video_id)
|
||||
self._find_jwplayer_data(webpage), video_id,
|
||||
transform_source=js_to_json)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
|
||||
# JWPlayer backward compatibility: single playlist item
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
|
||||
if not isinstance(jwplayer_data['playlist'], list):
|
||||
jwplayer_data['playlist'] = [jwplayer_data['playlist']]
|
||||
|
||||
for video_data in jwplayer_data['playlist']:
|
||||
# JWPlayer backward compatibility: flattened sources
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||
@@ -55,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor):
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, this_video_id, mpd_id=mpd_id, fatal=False))
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||
formats.append({
|
||||
|
@@ -105,20 +105,20 @@ class KalturaIE(InfoExtractor):
|
||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||
\{.*?
|
||||
(?P<q1>['\"])wid(?P=q1)\s*:\s*
|
||||
(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?
|
||||
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),
|
||||
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4),
|
||||
""", webpage) or
|
||||
re.search(
|
||||
r'''(?xs)
|
||||
(?P<q1>["\'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
(?P=q1).*?
|
||||
(?:
|
||||
entry_?[Ii]d|
|
||||
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
|
||||
)\s*:\s*
|
||||
(?P<q3>["\'])(?P<id>.+?)(?P=q3)
|
||||
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage))
|
||||
if mobj:
|
||||
embed_info = mobj.groupdict()
|
||||
@@ -262,8 +262,16 @@ class KalturaIE(InfoExtractor):
|
||||
# Continue if asset is not ready
|
||||
if f.get('status') != 2:
|
||||
continue
|
||||
# Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
|
||||
# skip for now.
|
||||
if f.get('fileExt') == 'chun':
|
||||
continue
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||
# -f mp4-56)
|
||||
vcodec = 'none' if 'videoCodecId' not in f and f.get(
|
||||
'frameRate') == 0 else f.get('videoCodecId')
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'ext': f.get('fileExt'),
|
||||
@@ -271,7 +279,7 @@ class KalturaIE(InfoExtractor):
|
||||
'fps': int_or_none(f.get('frameRate')),
|
||||
'filesize_approx': int_or_none(f.get('size'), invscale=1024),
|
||||
'container': f.get('containerFormat'),
|
||||
'vcodec': f.get('videoCodecId'),
|
||||
'vcodec': vcodec,
|
||||
'height': int_or_none(f.get('height')),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'url': video_url,
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class KaraoketvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.karaoketv\.co\.il/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
|
||||
'info_dict': {
|
||||
|
@@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# mzsource, geo restricted to Belgium
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor):
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
config['source']['hls'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
formats = []
|
||||
for source_key in ('', 'mz'):
|
||||
source = config.get('%ssource' % source_key)
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
for format_id, format_url in source.items():
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif format_id == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -6,7 +6,7 @@ from ..utils import smuggle_url
|
||||
|
||||
|
||||
class KickStarterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||
_VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description',
|
||||
'md5': 'c81addca81327ffa66c642b5d8b08cab',
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user