Compare commits
408 Commits
2016.10.02
...
2017.01.02
Author | SHA1 | Date | |
---|---|---|---|
2021b650dd | |||
b890caaf21 | |||
3783a5ccba | |||
327caf661a | |||
ce7ccb1caa | |||
295eac6165 | |||
d546d4c8e0 | |||
eec45445a8 | |||
7fc06b6a15 | |||
966815e139 | |||
e5e19379be | |||
1f766b6e7b | |||
dc48a35404 | |||
1ea0b727c4 | |||
b6ee45e9fa | |||
e66dca5e4a | |||
3f1ce16876 | |||
9a0f999585 | |||
3540fe262f | |||
e186a9ec03 | |||
69677f3ee2 | |||
e746021577 | |||
490da94edf | |||
424ed37ec4 | |||
9cdb0a338d | |||
6cf261d882 | |||
df086e74e2 | |||
963bd5ecfc | |||
51378d359e | |||
b63005f5af | |||
4606c34e19 | |||
53a664edf4 | |||
264e77c406 | |||
d1cd7e0ed9 | |||
846fd69bac | |||
12da830993 | |||
e7ac722d62 | |||
19f37ce4b1 | |||
5e77c0b58e | |||
ab3091feda | |||
a07588369f | |||
f5a723a78a | |||
f120646f04 | |||
9c5b5f2115 | |||
ae806db628 | |||
bfa1073e11 | |||
e029c43bd4 | |||
90352a8041 | |||
1f6a79b0af | |||
3d6761ba92 | |||
f59d1146c0 | |||
b1c357975d | |||
d8c507c9e2 | |||
7fe1592073 | |||
8ab7e6c4cc | |||
c80db5d398 | |||
5aaf012a4e | |||
954529c10f | |||
ed7b333fbf | |||
723103151e | |||
e7b6caef24 | |||
ec79b1de1c | |||
f73d7d5074 | |||
52a1d48d9f | |||
d5e623aaa1 | |||
199a47abba | |||
b42a0bf360 | |||
6e416b210c | |||
04bf59ff64 | |||
87a449c1ed | |||
93753aad20 | |||
2786818c33 | |||
9b785768ac | |||
47c914f995 | |||
732d116aa7 | |||
a495840d3b | |||
b0c65c677f | |||
594601f545 | |||
0ae9560eea | |||
dc1f3a9f20 | |||
7b1e80792b | |||
38be3bc568 | |||
d7ef47bffd | |||
5c32a5be95 | |||
30918999f5 | |||
069f918302 | |||
89c63cc5f8 | |||
577748075b | |||
67dcbc0add | |||
3a40f859b5 | |||
e34c33614d | |||
abf3494ac7 | |||
3c1e9dc4ec | |||
62faf9b55e | |||
3530e0d3d9 | |||
fb37eb25d9 | |||
d2d2495e16 | |||
19b4900b7b | |||
6ca478d44a | |||
655cb545ab | |||
f0b69fa91a | |||
8821a718cf | |||
0d7d9f9404 | |||
f41db40596 | |||
68601ef3ac | |||
18ece70c4d | |||
9ed3495eae | |||
6c20a0bb99 | |||
f43795e56b | |||
7441915b1e | |||
283d1c6a8b | |||
875ddd7409 | |||
4afa4ff223 | |||
3ed81714d8 | |||
4bd7d9d4ae | |||
9b5288c92a | |||
8344296619 | |||
a94e7f4a0c | |||
d17bfe4095 | |||
98b08f94b1 | |||
73ec479c7d | |||
f150530f4d | |||
4c4765dba2 | |||
f882554815 | |||
db75f14d8a | |||
8b0d3ee64e | |||
3779d524df | |||
6303fc8204 | |||
cc61fc3934 | |||
c2530d3319 | |||
8953319916 | |||
51b1378eed | |||
2b380fc299 | |||
294d4926d7 | |||
83f1481baa | |||
f25e1c8d8c | |||
6901673868 | |||
560c8c6ec0 | |||
9338a0eae3 | |||
74394b5e10 | |||
1db058466d | |||
e94eeb1dd3 | |||
8b27d83e4e | |||
8eb7b5c3f1 | |||
b68599ed47 | |||
44444f0d3b | |||
c867adc68c | |||
3b5daf0736 | |||
c8f56741dd | |||
868630fbe5 | |||
1d6ae5628f | |||
6334794f2a | |||
4eece8ba57 | |||
2574721a81 | |||
dbcc4a6b32 | |||
0bb58a208b | |||
dc6a9e4195 | |||
8f8f182d0b | |||
2176e466e0 | |||
303b38fa84 | |||
fb27d0ce5e | |||
0aacd2deb1 | |||
08ec95a6db | |||
df46b19cb8 | |||
748a462fbe | |||
c131fc3372 | |||
b25459b88a | |||
5f75c4a4ad | |||
689f31fde5 | |||
582be35847 | |||
073d5bf583 | |||
315cb86a95 | |||
b2fc1c4fb9 | |||
d76767c90e | |||
eceba9f805 | |||
d755396804 | |||
58355a3bf1 | |||
49b69ad91c | |||
6b4dfa2819 | |||
9f60134a9d | |||
b3d4bd05f9 | |||
dbffd00ba9 | |||
50913b8241 | |||
7e08e2cab0 | |||
690355551c | |||
754e6c8322 | |||
e58609b22c | |||
4ea4c0bb22 | |||
577281b0c6 | |||
3d2729514f | |||
f076d7972c | |||
8b1aeadc33 | |||
95ad9ce573 | |||
189935f159 | |||
bc40b3a5ba | |||
3eaaa8abac | |||
db3367f43e | |||
6590925c27 | |||
4719af097c | |||
9946aa5ccf | |||
c58e07a7aa | |||
f700afa24c | |||
5d47b38cf5 | |||
ebc7ab1e23 | |||
97726317ac | |||
cb882540e8 | |||
98708e6cbd | |||
b52c9ef165 | |||
e28ed498e6 | |||
5021ca6c13 | |||
37e7a71c6c | |||
f5c4b06f17 | |||
519d897049 | |||
b61cd51869 | |||
f420902a3b | |||
de328af362 | |||
b30e4c2754 | |||
09ffe34b00 | |||
640aff1d0c | |||
c897af8aac | |||
f3c705f8ec | |||
f93ac1d175 | |||
c4c9b8440c | |||
32f2627aed | |||
9d64e1dcdc | |||
10380e55de | |||
22979993e7 | |||
b47ecd0b74 | |||
3a86b2c51e | |||
b811b4c93b | |||
f4dfa9a5ed | |||
3b4b66b50c | |||
4119a96ce5 | |||
26aae56690 | |||
4f9cd4d36f | |||
cc99a77ac1 | |||
8956d6608a | |||
3365ea8929 | |||
a18aeee803 | |||
1616f9b452 | |||
02dc0a36b7 | |||
639e3b5c99 | |||
b2758123c5 | |||
f449c061d0 | |||
9c82bba05d | |||
e3577722b0 | |||
b82c33dd67 | |||
e5a088dc4b | |||
2c6da7df4a | |||
7e7a028aa4 | |||
e70a5e6566 | |||
3bf55be466 | |||
a901fc5fc2 | |||
cae6bc0118 | |||
d9ee2e5cf6 | |||
e1a0b3b81c | |||
2a048f9878 | |||
ea331f40e6 | |||
f02700a1fa | |||
f3517569f6 | |||
c725333d41 | |||
a5a8877f9c | |||
43c53a1700 | |||
ec8705117a | |||
3d8d44c7b1 | |||
88839f4380 | |||
83e9374464 | |||
773017c648 | |||
777d90dc28 | |||
3791d84acc | |||
9305a0dc60 | |||
94e08950e3 | |||
ee824a8d06 | |||
d3b6b3b95b | |||
b17422753f | |||
b0b28b8241 | |||
81cb7a5978 | |||
d2e96a8ed4 | |||
2e7c8cab55 | |||
d7d4481c6a | |||
5ace137bf4 | |||
9dde0e04e6 | |||
f16f8505b1 | |||
9dc13a6780 | |||
9aa929d337 | |||
425f3fdfcb | |||
e034cbc581 | |||
5378f8ce0d | |||
b64d04c119 | |||
00ca755231 | |||
69c2d42bd7 | |||
062e2769a3 | |||
859447a28d | |||
f8ae2c7f30 | |||
9ce0077485 | |||
0ebb86bd18 | |||
9df6b03caf | |||
8e2915d70b | |||
19e447150d | |||
ad9fd84004 | |||
60633ae9a0 | |||
a81dc82151 | |||
9218a6b4f5 | |||
02af6ec707 | |||
05b7996cab | |||
46f6052950 | |||
c8802041dd | |||
c7911009a0 | |||
2b96b06bf0 | |||
06b3fe2926 | |||
2c6743bf0f | |||
efb6242916 | |||
0384932e3d | |||
edd6074cea | |||
791d29dbf8 | |||
481cc7335c | |||
853a71b628 | |||
e2628fb6a0 | |||
df4939b1cd | |||
0b94dbb115 | |||
8d76bdf12b | |||
8204bacf1d | |||
47da782337 | |||
74324a7ac2 | |||
b0dfcab60a | |||
bbd7706898 | |||
112740e79f | |||
c0b1e88895 | |||
7cdfbbf9b8 | |||
ac943d48d3 | |||
73498a8921 | |||
9187ee4f19 | |||
2273e2c530 | |||
4b492e3579 | |||
9c4258bcec | |||
ea8aefd1d7 | |||
6edfc40a0e | |||
68d9561ca1 | |||
cfc0e7c82b | |||
4102e64051 | |||
f605242bfc | |||
d32fa0f12c | |||
a347a0d088 | |||
77c5b98dcd | |||
88ebefc054 | |||
2e638d7bca | |||
a26b174c61 | |||
73c801d660 | |||
dff5107b68 | |||
8c3e448e80 | |||
2ecbd2ad6f | |||
62a0b86e4f | |||
146969e05b | |||
e2004ccaf7 | |||
a5f8473145 | |||
b7f59a3bf6 | |||
580d411931 | |||
5c4bfd4da5 | |||
7104ae799c | |||
bcd6276520 | |||
591e384552 | |||
9feb1c9731 | |||
a093cfc78b | |||
6f20b65e72 | |||
cea364f70c | |||
55642487f0 | |||
3d643f4cec | |||
c452e69d3d | |||
555787d717 | |||
f165ca70eb | |||
27b8d2ee95 | |||
71cdcb2331 | |||
176006a120 | |||
65f4c1de3d | |||
b0082629a9 | |||
8204c73352 | |||
2b51dac1f9 | |||
f68901e50a | |||
3adb9d119e | |||
1dd58e14d8 | |||
dd4291f729 | |||
888f8d6ba4 | |||
f475e88121 | |||
3c6b3bf221 | |||
38588ab977 | |||
85bcdd081c | |||
9dcd6fd3aa | |||
98763ee354 | |||
3d83a1ae92 | |||
c0a7b9b348 | |||
831a34caa2 | |||
09b9c45e24 | |||
33898fb19c | |||
017eb82934 | |||
b1d798887e | |||
0a33bb2cb2 | |||
185744f92f | |||
7232e54813 | |||
6eb5503b12 | |||
539c881bfc | |||
c1b2a0858c | |||
215ff6e0f3 | |||
dcdb292fdd | |||
c1084ddb0c | |||
ee5de4e38e | |||
25291b979a | |||
567a5996ca | |||
6cbb20bb09 |
8
.github/ISSUE_TEMPLATE.md
vendored
8
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.01.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.01.02**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.10.02
|
||||
[debug] youtube-dl version 2017.01.02
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
@ -50,6 +50,8 @@ $ youtube-dl -v <your command line>
|
||||
- Single video: https://youtu.be/BaW_jenozKc
|
||||
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
|
||||
|
||||
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
|
||||
---
|
||||
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
2
.github/ISSUE_TEMPLATE_tmpl.md
vendored
@ -50,6 +50,8 @@ $ youtube-dl -v <your command line>
|
||||
- Single video: https://youtu.be/BaW_jenozKc
|
||||
- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
|
||||
|
||||
Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
|
||||
---
|
||||
|
||||
### Description of your *issue*, suggested solution and other information
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -30,6 +30,10 @@ updates_key.pem
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.3gp
|
||||
*.wav
|
||||
*.ape
|
||||
*.mkv
|
||||
*.swf
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
|
8
AUTHORS
8
AUTHORS
@ -26,7 +26,7 @@ Albert Kim
|
||||
Pierre Rudloff
|
||||
Huarong Huo
|
||||
Ismael Mejía
|
||||
Steffan 'Ruirize' James
|
||||
Steffan Donal
|
||||
Andras Elso
|
||||
Jelle van der Waa
|
||||
Marcin Cieślak
|
||||
@ -185,3 +185,9 @@ Aleksander Nitecki
|
||||
Sebastian Blunt
|
||||
Matěj Cepl
|
||||
Xie Yanbo
|
||||
Philip Xu
|
||||
John Hawkinson
|
||||
Rich Leeper
|
||||
Zhong Jianxin
|
||||
Thor77
|
||||
Mattias Wadman
|
||||
|
@ -12,7 +12,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] Proxy map: {}
|
||||
...
|
||||
```
|
||||
**Do not post screenshots of verbose log only plain text is acceptable.**
|
||||
**Do not post screenshots of verbose logs; only plain text is acceptable.**
|
||||
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
@ -58,7 +58,7 @@ We are then presented with a very complicated request when the original problem
|
||||
|
||||
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
|
||||
|
||||
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
|
||||
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
|
||||
|
||||
### Is anyone going to need the feature?
|
||||
|
||||
@ -66,7 +66,7 @@ Only post features that you (or an incapacitated friend you can personally talk
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
@ -85,16 +85,16 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make (both GNU make and BSD make are supported)
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with:
|
||||
@ -167,19 +167,19 @@ In any case, thank you very much for your contributions!
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@ -199,7 +199,7 @@ Assume at this point `meta`'s layout is:
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
@ -211,7 +211,7 @@ and not like:
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
@ -231,21 +231,21 @@ description = self._search_regex(
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
@ -282,7 +282,7 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
|
389
ChangeLog
389
ChangeLog
@ -1,3 +1,392 @@
|
||||
version 2017.01.02
|
||||
|
||||
Extractors
|
||||
* [cctv] Improve extraction (#879, #6753, #8541)
|
||||
+ [nrktv:episodes] Add support for episodes (#11571)
|
||||
+ [arkena] Add support for video.arkena.com (#11568)
|
||||
|
||||
|
||||
version 2016.12.31
|
||||
|
||||
Core
|
||||
+ Introduce --config-location option for custom configuration files (#6745,
|
||||
#10648)
|
||||
|
||||
Extractors
|
||||
+ [twitch] Add support for player.twitch.tv (#11535, #11537)
|
||||
+ [videa] Add support for videa.hu (#8181, #11133)
|
||||
* [vk] Fix postlive videos extraction
|
||||
* [vk] Extract from playerParams (#11555)
|
||||
- [freevideo] Remove extractor (#11515)
|
||||
+ [showroomlive] Add support for showroom-live.com (#11458)
|
||||
* [xhamster] Fix duration extraction (#11549)
|
||||
* [rtve:live] Fix extraction (#11529)
|
||||
* [brightcove:legacy] Improve embeds detection (#11523)
|
||||
+ [twitch] Add support for rechat messages (#11524)
|
||||
* [acast] Fix audio and timestamp extraction (#11521)
|
||||
|
||||
|
||||
version 2016.12.22
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve detection of video-only formats in m3u8
|
||||
manifests (#11507)
|
||||
|
||||
Extractors
|
||||
+ [theplatform] Pass geo verification headers to SMIL request (#10146)
|
||||
+ [viu] Pass geo verification headers to auth request
|
||||
* [rtl2] Extract more formats and metadata
|
||||
* [vbox7] Skip malformed JSON-LD (#11501)
|
||||
* [uplynk] Force downloading using native HLS downloader (#11496)
|
||||
+ [laola1] Add support for another extraction scenario (#11460)
|
||||
|
||||
|
||||
version 2016.12.20
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve fragment URL construction for DASH media
|
||||
* [extractor/common] Fix codec information extraction for mixed audio/video
|
||||
DASH media (#11490)
|
||||
|
||||
Extractors
|
||||
* [vbox7] Fix extraction (#11494)
|
||||
+ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027)
|
||||
+ [piksel] Add support for player.piksel.com (#11246)
|
||||
+ [vimeo] Add support for DASH formats
|
||||
* [vimeo] Fix extraction for HLS formats (#11490)
|
||||
* [kaltura] Fix wrong widget ID in some cases (#11480)
|
||||
+ [nrktv:direkte] Add support for live streams (#11488)
|
||||
* [pbs] Fix extraction for geo restricted videos (#7095)
|
||||
* [brightcove:new] Skip widevine classic videos
|
||||
+ [viu] Add support for viu.com (#10607, #11329)
|
||||
|
||||
|
||||
version 2016.12.18
|
||||
|
||||
Core
|
||||
+ [extractor/common] Recognize DASH formats in html5 media entries
|
||||
|
||||
Extractors
|
||||
+ [ccma] Add support for ccma.cat (#11359)
|
||||
* [laola1tv] Improve extraction
|
||||
+ [laola1tv] Add support embed URLs (#11460)
|
||||
* [nbc] Fix extraction for MSNBC videos (#11466)
|
||||
* [twitch] Adapt to new videos pages URL schema (#11469)
|
||||
+ [meipai] Add support for meipai.com (#10718)
|
||||
* [jwplatform] Improve subtitles and duration extraction
|
||||
+ [ondemandkorea] Add support for ondemandkorea.com (#10772)
|
||||
+ [vvvvid] Add support for vvvvid.it (#5915)
|
||||
|
||||
|
||||
version 2016.12.15
|
||||
|
||||
Core
|
||||
+ [utils] Add convenience urljoin
|
||||
|
||||
Extractors
|
||||
+ [openload] Recognize oload.tv URLs (#10408)
|
||||
+ [facebook] Recognize .onion URLs (#11443)
|
||||
* [vlive] Fix extraction (#11375, #11383)
|
||||
+ [canvas] Extract DASH formats
|
||||
+ [melonvod] Add support for vod.melon.com (#11419)
|
||||
|
||||
|
||||
version 2016.12.12
|
||||
|
||||
Core
|
||||
+ [utils] Add common user agents map
|
||||
+ [common] Recognize HLS manifests that contain video only formats (#11394)
|
||||
|
||||
Extractors
|
||||
+ [dplay] Use Safari user agent for HLS (#11418)
|
||||
+ [facebook] Detect login required error message
|
||||
* [facebook] Improve video selection (#11390)
|
||||
+ [canalplus] Add another video id pattern (#11399)
|
||||
* [mixcloud] Relax URL regular expression (#11406)
|
||||
* [ctvnews] Relax URL regular expression (#11394)
|
||||
+ [rte] Capture and output error message (#7746, #10498)
|
||||
+ [prosiebensat1] Add support for DASH formats
|
||||
* [srgssr] Improve extraction for geo restricted videos (#11089)
|
||||
* [rts] Improve extraction for geo restricted videos (#4989)
|
||||
|
||||
|
||||
version 2016.12.09
|
||||
|
||||
Core
|
||||
* [socks] Fix error reporting (#11355)
|
||||
|
||||
Extractors
|
||||
* [openload] Fix extraction (#10408)
|
||||
* [pandoratv] Fix extraction (#11023)
|
||||
+ [telebruxelles] Add support for emission URLs
|
||||
* [telebruxelles] Extract all formats
|
||||
+ [bloomberg] Add another video id regular expression (#11371)
|
||||
* [fusion] Update ooyala id regular expression (#11364)
|
||||
+ [1tv] Add support for playlists (#11335)
|
||||
* [1tv] Improve extraction (#11335)
|
||||
+ [aenetworks] Extract more formats (#11321)
|
||||
+ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
|
||||
|
||||
|
||||
version 2016.12.01
|
||||
|
||||
Extractors
|
||||
* [soundcloud] Update client id (#11327)
|
||||
* [ruutu] Detect DRM protected videos
|
||||
+ [liveleak] Add support for youtube embeds (#10688)
|
||||
* [spike] Fix full episodes support (#11312)
|
||||
* [comedycentral] Fix full episodes support
|
||||
* [normalboots] Rewrite in terms of JWPlatform (#11184)
|
||||
* [teamfourstar] Rewrite in terms of JWPlatform (#11184)
|
||||
- [screenwavemedia] Remove extractor (#11184)
|
||||
|
||||
|
||||
version 2016.11.27
|
||||
|
||||
Extractors
|
||||
+ [webcaster] Add support for webcaster.pro
|
||||
+ [azubu] Add support for azubu.uol.com.br (#11305)
|
||||
* [viki] Prefer hls formats
|
||||
* [viki] Fix rtmp formats extraction (#11255)
|
||||
* [puls4] Relax URL regular expression (#11267)
|
||||
* [vevo] Improve artist extraction (#10911)
|
||||
* [mitele] Relax URL regular expression and extract more metadata (#11244)
|
||||
+ [cbslocal] Recognize New York site (#11285)
|
||||
+ [youtube:playlist] Pass disable_polymer in URL query (#11193)
|
||||
|
||||
|
||||
version 2016.11.22
|
||||
|
||||
Extractors
|
||||
* [hellporno] Fix video extension extraction (#11247)
|
||||
+ [hellporno] Add support for hellporno.net (#11247)
|
||||
+ [amcnetworks] Recognize more BBC America URLs (#11263)
|
||||
* [funnyordie] Improve extraction (#11208)
|
||||
* [extractor/generic] Improve limelight embeds support
|
||||
- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
|
||||
* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
|
||||
* [twitter:card] Relax URL regular expression (#11225)
|
||||
+ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
|
||||
|
||||
|
||||
version 2016.11.18
|
||||
|
||||
Extractors
|
||||
* [youtube:live] Relax URL regular expression (#11164)
|
||||
* [openload] Fix extraction (#10408, #11122)
|
||||
* [vlive] Prefer locale over language for subtitles id (#11203)
|
||||
|
||||
|
||||
version 2016.11.14.1
|
||||
|
||||
Core
|
||||
+ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict
|
||||
* [extractor/common] Fix media templates with Bandwidth substitution pattern in
|
||||
MPD manifests (#11175)
|
||||
* [extractor/common] Improve thumbnail extraction from JSON-LD
|
||||
|
||||
Extractors
|
||||
+ [nrk] Workaround geo restriction
|
||||
+ [nrk] Improve error detection and messages
|
||||
+ [afreecatv] Add support for vod.afreecatv.com (#11174)
|
||||
* [cda] Fix and improve extraction (#10929, #10936)
|
||||
* [plays] Fix extraction (#11165)
|
||||
* [eagleplatform] Fix extraction (#11160)
|
||||
+ [audioboom] Recognize /posts/ URLs (#11149)
|
||||
|
||||
|
||||
version 2016.11.08.1
|
||||
|
||||
Extractors
|
||||
* [espn:article] Fix support for espn.com articles
|
||||
* [franceculture] Fix extraction (#11140)
|
||||
|
||||
|
||||
version 2016.11.08
|
||||
|
||||
Extractors
|
||||
* [tmz:article] Fix extraction (#11052)
|
||||
* [espn] Fix extraction (#11041)
|
||||
* [mitele] Fix extraction after website redesign (#10824)
|
||||
- [ard] Remove age restriction check (#11129)
|
||||
* [generic] Improve support for pornhub.com embeds (#11100)
|
||||
+ [generic] Add support for redtube.com embeds (#11099)
|
||||
+ [generic] Add support for drtuber.com embeds (#11098)
|
||||
+ [redtube] Add support for embed URLs
|
||||
+ [drtuber] Add support for embed URLs
|
||||
+ [yahoo] Improve content id extraction (#11088)
|
||||
* [toutv] Relax URL regular expression (#11121)
|
||||
|
||||
|
||||
version 2016.11.04
|
||||
|
||||
Core
|
||||
* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8
|
||||
manifests (#11113)
|
||||
* [downloader/ism] Fix AVC Decoder Configuration Record
|
||||
|
||||
Extractors
|
||||
+ [fox9] Add support for fox9.com (#11110)
|
||||
+ [anvato] Extract more metadata and improve formats extraction
|
||||
* [vodlocker] Improve removed videos detection (#11106)
|
||||
+ [vzaar] Add support for vzaar.com (#11093)
|
||||
+ [vice] Add support for uplynk preplay videos (#11101)
|
||||
* [tubitv] Fix extraction (#11061)
|
||||
+ [shahid] Add support for authentication (#11091)
|
||||
+ [radiocanada] Add subtitles support (#11096)
|
||||
+ [generic] Add support for ISM manifests
|
||||
|
||||
|
||||
version 2016.11.02
|
||||
|
||||
Core
|
||||
+ Add basic support for Smooth Streaming protocol (#8118, #10969)
|
||||
* Improve MPD manifest base URL extraction (#10909, #11079)
|
||||
* Fix --match-filter for int-like strings (#11082)
|
||||
|
||||
Extractors
|
||||
+ [mva] Add support for ISM formats
|
||||
+ [msn] Add support for ISM formats
|
||||
+ [onet] Add support for ISM formats
|
||||
+ [tvp] Add support for ISM formats
|
||||
+ [nicknight] Add support for nicknight sites (#10769)
|
||||
|
||||
|
||||
version 2016.10.30
|
||||
|
||||
Extractors
|
||||
* [facebook] Improve 1080P video detection (#11073)
|
||||
* [imgur] Recognize /r/ URLs (#11071)
|
||||
* [beeg] Fix extraction (#11069)
|
||||
* [openload] Fix extraction (#10408)
|
||||
* [gvsearch] Modernize and fix search request (#11051)
|
||||
* [adultswim] Fix extraction (#10979)
|
||||
+ [nobelprize] Add support for nobelprize.org (#9999)
|
||||
* [hornbunny] Fix extraction (#10981)
|
||||
* [tvp] Improve video id extraction (#10585)
|
||||
|
||||
|
||||
version 2016.10.26
|
||||
|
||||
Extractors
|
||||
+ [rentv] Add support for ren.tv (#10620)
|
||||
+ [ard] Detect unavailable videos (#11018)
|
||||
* [vk] Fix extraction (#11022)
|
||||
|
||||
|
||||
version 2016.10.25
|
||||
|
||||
Core
|
||||
* Running youtube-dl in the background is fixed (#10996, #10706, #955)
|
||||
|
||||
Extractors
|
||||
+ [jamendo] Add support for jamendo.com (#10132, #10736)
|
||||
+ [pandatv] Add support for panda.tv (#10736)
|
||||
+ [dotsub] Support Vimeo embed (#10964)
|
||||
* [litv] Fix extraction
|
||||
+ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994)
|
||||
* [vivo] Fix extraction (#11003)
|
||||
+ [twitch:stream] Add support for rebroadcasts (#10995)
|
||||
* [pluralsight] Fix subtitles conversion (#10990)
|
||||
|
||||
|
||||
version 2016.10.21.1
|
||||
|
||||
Extractors
|
||||
+ [pluralsight] Process all clip URLs (#10984)
|
||||
|
||||
|
||||
version 2016.10.21
|
||||
|
||||
Core
|
||||
- Disable thumbnails embedding in mkv
|
||||
+ Add support for Comcast multiple-system operator (#10819)
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Adapt to new API (#10972)
|
||||
* [openload] Fix extraction (#10408, #10971)
|
||||
+ [natgeo] Extract m3u8 formats (#10959)
|
||||
|
||||
|
||||
version 2016.10.19
|
||||
|
||||
Core
|
||||
+ [utils] Expose PACKED_CODES_RE
|
||||
+ [extractor/common] Extract non smil wowza mpd manifests
|
||||
+ [extractor/common] Detect f4m audio-only formats
|
||||
|
||||
Extractors
|
||||
* [vidzi] Fix extraction (#10908, #10952)
|
||||
* [urplay] Fix subtitles extraction
|
||||
+ [urplay] Add support for urskola.se (#10915)
|
||||
+ [orf] Add subtitles support (#10939)
|
||||
* [youtube] Fix --no-playlist behavior for youtu.be/id URLs (#10896)
|
||||
* [nrk] Relax URL regular expression (#10928)
|
||||
+ [nytimes] Add support for podcasts (#10926)
|
||||
* [pluralsight] Relax URL regular expression (#10941)
|
||||
|
||||
|
||||
version 2016.10.16
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Return correct filepath and ext in updated information
|
||||
in FFmpegExtractAudioPP (#10879)
|
||||
|
||||
Extractors
|
||||
+ [ruutu] Add support for supla.fi (#10849)
|
||||
+ [theoperaplatform] Add support for theoperaplatform.eu (#10914)
|
||||
* [lynda] Fix height for prioritized streams
|
||||
+ [lynda] Add fallback extraction scenario
|
||||
* [lynda] Switch to https (#10916)
|
||||
+ [huajiao] New extractor (#10917)
|
||||
* [cmt] Fix mgid extraction (#10813)
|
||||
+ [safari:course] Add support for techbus.safaribooksonline.com
|
||||
* [orf:tvthek] Fix extraction and modernize (#10898)
|
||||
* [chirbit] Fix extraction of user profile pages
|
||||
* [carambatv] Fix extraction
|
||||
* [canalplus] Fix extraction for some videos
|
||||
* [cbsinteractive] Fix extraction for cnet.com
|
||||
* [parliamentliveuk] Lower case URLs are now recognized (#10912)
|
||||
|
||||
|
||||
version 2016.10.12
|
||||
|
||||
Core
|
||||
+ Support HTML media elements without child nodes
|
||||
* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
|
||||
|
||||
Extractors
|
||||
* [dailymotion] Fix extraction (#10901)
|
||||
* [vimeo:review] Fix extraction (#10900)
|
||||
* [nhl] Correctly handle invalid formats (#10713)
|
||||
* [footyroom] Fix extraction (#10810)
|
||||
* [abc.net.au:iview] Fix for standalone (non series) videos (#10895)
|
||||
+ [hbo] Add support for episode pages (#10892)
|
||||
* [allocine] Fix extraction (#10860)
|
||||
+ [nextmedia] Recognize action news on AppleDaily
|
||||
* [lego] Improve info extraction and bypass geo restriction (#10872)
|
||||
|
||||
|
||||
version 2016.10.07
|
||||
|
||||
Extractors
|
||||
+ [iprima] Detect geo restriction
|
||||
* [facebook] Fix video extraction (#10846)
|
||||
+ [commonprotocols] Support direct MMS links (#10838)
|
||||
+ [generic] Add support for multiple vimeo embeds (#10862)
|
||||
+ [nzz] Add support for nzz.ch (#4407)
|
||||
+ [npo] Detect geo restriction
|
||||
+ [npo] Add support for 2doc.nl (#10842)
|
||||
+ [lego] Add support for lego.com (#10369)
|
||||
+ [tonline] Add support for t-online.de (#10376)
|
||||
* [techtalks] Relax URL regular expression (#10840)
|
||||
* [youtube:live] Extend URL regular expression (#10839)
|
||||
+ [theweatherchannel] Add support for weather.com (#7188)
|
||||
+ [thisoldhouse] Add support for thisoldhouse.com (#10837)
|
||||
+ [nhl] Add support for wch2016.com (#10833)
|
||||
* [pornoxo] Use JWPlatform to improve metadata extraction
|
||||
|
||||
|
||||
version 2016.10.02
|
||||
|
||||
Core
|
||||
|
6
Makefile
6
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
|
||||
PYTHON ?= /usr/bin/env python
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
|
||||
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
|
||||
|
||||
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
install -d $(DESTDIR)$(BINDIR)
|
||||
@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish
|
||||
|
||||
lazy-extractors: youtube_dl/extractor/lazy_extractors.py
|
||||
|
||||
_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py'
|
||||
_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
|
||||
youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
|
109
README.md
109
README.md
@ -29,7 +29,7 @@ Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.ex
|
||||
|
||||
You can also use pip:
|
||||
|
||||
sudo pip install --upgrade youtube-dl
|
||||
sudo -H pip install --upgrade youtube-dl
|
||||
|
||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||
|
||||
@ -44,11 +44,7 @@ Or with [MacPorts](https://www.macports.org/):
|
||||
Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a command-line program to download videos from
|
||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||
2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
|
||||
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||
which means you can modify it, redistribute it or use it however you like.
|
||||
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on Mac OS X. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
@ -84,6 +80,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
--config-location PATH Location of the configuration file; either
|
||||
the path to the config or its containing
|
||||
directory.
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them.
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
@ -187,7 +186,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected filesize (experimental)
|
||||
expected file size (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg
|
||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||
@ -354,7 +353,7 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
-u, --username USERNAME Login with this account ID
|
||||
-p, --password PASSWORD Account password. If this option is left
|
||||
out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR Two-factor auth code
|
||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||
|
||||
@ -449,12 +448,12 @@ You can use `--ignore-config` if you want to disable the configuration file for
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
```
|
||||
After that you can add credentials for extractor in the following format, where *extractor* is the name of extractor in lowercase:
|
||||
After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
|
||||
```
|
||||
machine <extractor> login <login> password <password>
|
||||
```
|
||||
@ -550,13 +549,13 @@ Available for the media that is a track or a part of a music album:
|
||||
- `disc_number`: Number of the disc or other physical medium the track belongs to
|
||||
- `release_year`: Year (YYYY) when the album was released
|
||||
|
||||
Each aforementioned sequence when referenced in output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj` this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
|
||||
Output template can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` that will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
|
||||
To specify percent literal in output template use `%%`. To output to stdout use `-o -`.
|
||||
To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
|
||||
|
||||
The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
@ -564,7 +563,7 @@ In some cases, you don't want special characters such as 中, spaces, or &, such
|
||||
|
||||
#### Output template and Windows batch files
|
||||
|
||||
If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
|
||||
If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
|
||||
|
||||
#### Output template examples
|
||||
|
||||
@ -597,7 +596,7 @@ $ youtube-dl -o - BaW_jenozKc
|
||||
|
||||
By default youtube-dl tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, youtube-dl will guess it for you by **default**.
|
||||
|
||||
But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
|
||||
But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
|
||||
|
||||
The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
|
||||
|
||||
@ -605,21 +604,21 @@ The general syntax for format selection is `--format FORMAT` or shorter `-f FORM
|
||||
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download best quality format of particular file extension served as a single file, e.g. `-f webm` will download best quality format with `webm` extension served as a single file.
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
|
||||
|
||||
You can also use special names to select particular edge case format:
|
||||
- `best`: Select best quality format represented by single file with video and audio
|
||||
- `worst`: Select worst quality format represented by single file with video and audio
|
||||
- `bestvideo`: Select best quality video only format (e.g. DASH video), may not be available
|
||||
- `worstvideo`: Select worst quality video only format, may not be available
|
||||
- `bestaudio`: Select best quality audio only format, may not be available
|
||||
- `worstaudio`: Select worst quality audio only format, may not be available
|
||||
You can also use special names to select particular edge case formats:
|
||||
- `best`: Select the best quality format represented by a single file with video and audio.
|
||||
- `worst`: Select the worst quality format represented by a single file with video and audio.
|
||||
- `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
|
||||
- `worstvideo`: Select the worst quality video-only format. May not be available.
|
||||
- `bestaudio`: Select the best quality audio only-format. May not be available.
|
||||
- `worstaudio`: Select the worst quality audio only-format. May not be available.
|
||||
|
||||
For example, to download worst quality video only format you can use `-f worstvideo`.
|
||||
For example, to download the worst quality video-only format you can use `-f worstvideo`.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
|
||||
If you want to download several formats of the same video use comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or more sophisticated example combined with precedence feature `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
|
||||
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
|
||||
|
||||
@ -638,18 +637,18 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native`
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster.
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
|
||||
|
||||
You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download best video only format, best audio only format and mux them together with ffmpeg/avconv.
|
||||
You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv.
|
||||
|
||||
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
@ -664,7 +663,7 @@ $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
|
||||
# Download best format available but not better that 480p
|
||||
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
|
||||
|
||||
# Download best video only format but no bigger that 50 MB
|
||||
# Download best video only format but no bigger than 50 MB
|
||||
$ youtube-dl -f 'best[filesize<50M]'
|
||||
|
||||
# Download best format available via direct link over HTTP/HTTPS protocol
|
||||
@ -728,7 +727,7 @@ Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging people](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||
|
||||
### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
|
||||
|
||||
@ -744,7 +743,7 @@ Most people asking this question are not aware that youtube-dl now defaults to d
|
||||
|
||||
### I get HTTP error 402 when trying to download a video. What's this?
|
||||
|
||||
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
|
||||
Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
|
||||
|
||||
### Do I need any other programs?
|
||||
|
||||
@ -756,9 +755,9 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [
|
||||
|
||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
|
||||
|
||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser.
|
||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
|
||||
|
||||
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl.
|
||||
It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
|
||||
|
||||
It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
|
||||
|
||||
@ -902,7 +901,7 @@ If you want to find out whether a given URL is supported, simply call youtube-dl
|
||||
|
||||
# Why do I need to go through that much red tape when filing bugs?
|
||||
|
||||
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
|
||||
Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
|
||||
|
||||
youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
|
||||
|
||||
@ -923,16 +922,16 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make (both GNU make and BSD make are supported)
|
||||
* make (only GNU make is supported)
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
|
||||
|
||||
After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with:
|
||||
@ -1005,19 +1004,19 @@ In any case, thank you very much for your contributions!
|
||||
|
||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
||||
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hoster out of your control and this layout tend to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize code dependency on source's layout changes and even to make the code foresee potential future changes and be ready for that. This is important because it will allow extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with fix incorporated all the previous version become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say some may never receive an update at all that is possible for non rolling release distros.
|
||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
||||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in *info dict* are considered mandatory for successful extraction process by youtube-dl:
|
||||
For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
- `url` (media download URL) or `formats`
|
||||
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` to be mandatory. Thus aforementioned metafields are the critical data the extraction does not make any sense without and if any of them fail to be extracted then extractor is considered completely broken.
|
||||
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
|
||||
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
#### Example
|
||||
|
||||
@ -1037,7 +1036,7 @@ Assume at this point `meta`'s layout is:
|
||||
}
|
||||
```
|
||||
|
||||
Assume you want to extract `summary` and put into resulting info dict as `description`. Since `description` is optional metafield you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
|
||||
|
||||
```python
|
||||
description = meta.get('summary') # correct
|
||||
@ -1049,7 +1048,7 @@ and not like:
|
||||
description = meta['summary'] # incorrect
|
||||
```
|
||||
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some time later but with former approach extraction will just go ahead with `description` set to `None` that is perfectly fine (remember `None` is equivalent for absence of data).
|
||||
The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
|
||||
|
||||
Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
|
||||
|
||||
@ -1069,21 +1068,21 @@ description = self._search_regex(
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that are known to may or may not be present.
|
||||
On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
|
||||
|
||||
### Provide fallbacks
|
||||
|
||||
When extracting metadata try to provide several scenarios for that. For example if `title` is present in several places/sources try extracting from at least some of them. This would make it more future-proof in case some of the sources became unavailable.
|
||||
When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
|
||||
|
||||
#### Example
|
||||
|
||||
Say `meta` from previous example has a `title` and you are about to extract it. Since `title` is mandatory meta field you should end up with something like:
|
||||
Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
|
||||
|
||||
```python
|
||||
title = meta['title']
|
||||
```
|
||||
|
||||
If `title` disappeares from `meta` in future due to some changes on hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
|
||||
|
||||
Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
|
||||
|
||||
@ -1120,7 +1119,7 @@ title = self._search_regex(
|
||||
webpage, 'title', group='title')
|
||||
```
|
||||
|
||||
Note how you tolerate potential changes in `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
|
||||
|
||||
The code definitely should not look like:
|
||||
|
||||
@ -1149,7 +1148,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||
|
||||
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
|
||||
|
||||
@ -1190,7 +1189,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||
|
||||
# BUGS
|
||||
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||
|
||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||
```
|
||||
@ -1206,7 +1205,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] Proxy map: {}
|
||||
...
|
||||
```
|
||||
**Do not post screenshots of verbose log only plain text is acceptable.**
|
||||
**Do not post screenshots of verbose logs; only plain text is acceptable.**
|
||||
|
||||
The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
@ -1252,7 +1251,7 @@ We are then presented with a very complicated request when the original problem
|
||||
|
||||
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
|
||||
|
||||
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
|
||||
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
|
||||
|
||||
### Is anyone going to need the feature?
|
||||
|
||||
@ -1260,7 +1259,7 @@ Only post features that you (or an incapacitated friend you can personally talk
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
|
||||
# COPYRIGHT
|
||||
|
||||
|
@ -25,5 +25,6 @@ def build_completion(opt_parser):
|
||||
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
|
||||
f.write(filled_template)
|
||||
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
||||
|
@ -424,8 +424,6 @@ class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
self.send_header('Content-Length', len(msg))
|
||||
self.end_headers()
|
||||
self.wfile.write(msg)
|
||||
except HTTPError as e:
|
||||
self.send_response(e.code, str(e))
|
||||
else:
|
||||
self.send_response(500, 'Unknown build method "%s"' % action)
|
||||
else:
|
||||
|
@ -2,11 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import mimetypes
|
||||
import netrc
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@ -90,16 +92,23 @@ class GitHubReleaser(object):
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH')
|
||||
parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 2:
|
||||
if len(args) != 3:
|
||||
parser.error('Expected a version and a build directory')
|
||||
|
||||
version, build_path = args
|
||||
changelog_file, version, build_path = args
|
||||
|
||||
with io.open(changelog_file, encoding='utf-8') as inf:
|
||||
changelog = inf.read()
|
||||
|
||||
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
|
||||
body = mobj.group(1) if mobj else ''
|
||||
|
||||
releaser = GitHubReleaser()
|
||||
|
||||
new_release = releaser.create_release(version, name='youtube-dl %s' % version)
|
||||
new_release = releaser.create_release(
|
||||
version, name='youtube-dl %s' % version, body=body)
|
||||
release_id = new_release['id']
|
||||
|
||||
for asset in os.listdir(build_path):
|
||||
|
@ -44,5 +44,6 @@ def build_completion(opt_parser):
|
||||
with open(FISH_COMPLETION_FILE, 'w') as f:
|
||||
f.write(filled_template)
|
||||
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
||||
|
@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv):
|
||||
out, _ = prog.communicate(secret_msg)
|
||||
return out
|
||||
|
||||
|
||||
iv = key = [0x20, 0x15] + 14 * [0]
|
||||
|
||||
r = openssl_encode('aes-128-cbc', key, iv)
|
||||
|
@ -32,5 +32,6 @@ def main():
|
||||
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
|
||||
sitesf.write(template)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@ -28,5 +28,6 @@ def main():
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -59,6 +59,7 @@ def build_lazy_ie(ie, name):
|
||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||
return s
|
||||
|
||||
|
||||
# find the correct sorting and add the required base classes so that sublcasses
|
||||
# can be correctly created
|
||||
classes = _ALL_CLASSES[:-1]
|
||||
|
@ -41,5 +41,6 @@ def main():
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -74,5 +74,6 @@ def filter_options(readme):
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||
|
||||
ROOT=$(pwd)
|
||||
python devscripts/create-github-release.py $version "$ROOT/build/$version"
|
||||
python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
|
||||
|
||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||
|
||||
|
@ -44,5 +44,6 @@ def build_completion(opt_parser):
|
||||
with open(ZSH_COMPLETION_FILE, "w") as f:
|
||||
f.write(template)
|
||||
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
||||
|
@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
#
|
||||
# youtube-dl documentation build configuration file, created by
|
||||
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
|
||||
|
@ -86,7 +86,7 @@
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BeatportPro**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
- **BellMedia**
|
||||
@ -131,7 +131,8 @@
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CCTV**
|
||||
- **CCMA**
|
||||
- **CCTV**: 央视网
|
||||
- **CDA**
|
||||
- **CeskaTelevize**
|
||||
- **channel9**: Channel 9
|
||||
@ -158,6 +159,7 @@
|
||||
- **CollegeRama**
|
||||
- **ComCarCoff**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralFullEpisodes**
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
@ -225,6 +227,7 @@
|
||||
- **EroProfile**
|
||||
- **Escapist**
|
||||
- **ESPN**
|
||||
- **ESPNArticle**
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
@ -247,6 +250,7 @@
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **FOX9**
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
@ -259,7 +263,6 @@
|
||||
- **francetvinfo.fr**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreeVideo**
|
||||
- **Funimation**
|
||||
- **FunnyOrDie**
|
||||
- **Fusion**
|
||||
@ -289,6 +292,7 @@
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **HBO**
|
||||
- **HBOEpisode**
|
||||
- **HearThisAt**
|
||||
- **Heise**
|
||||
- **HellPorno**
|
||||
@ -307,6 +311,7 @@
|
||||
- **HowStuffWorks**
|
||||
- **HRTi**
|
||||
- **HRTiPlaylist**
|
||||
- **Huajiao**: 花椒直播
|
||||
- **HuffPost**: Huffington Post
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
@ -330,6 +335,8 @@
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Iwara**
|
||||
- **Izlesene**
|
||||
- **Jamendo**
|
||||
- **JamendoAlbum**
|
||||
- **JeuxVideo**
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
@ -357,13 +364,15 @@
|
||||
- **kuwo:singer**: 酷我音乐 - 歌手
|
||||
- **kuwo:song**: 酷我音乐
|
||||
- **la7.it**
|
||||
- **Laola1Tv**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
@ -394,6 +403,8 @@
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **media.ccc.de**
|
||||
- **Meipai**: 美拍
|
||||
- **MelonVOD**
|
||||
- **META**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
@ -478,11 +489,13 @@
|
||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
||||
- **nick.com**
|
||||
- **nick.de**
|
||||
- **nicknight**
|
||||
- **niconico**: ニコニコ動画
|
||||
- **NiconicoPlaylist**
|
||||
- **Nintendo**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
@ -503,15 +516,19 @@
|
||||
- **NRKPlaylist**
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisodes**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **NZZ**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
- **Odnoklassniki**
|
||||
- **OktoberfestTV**
|
||||
- **on.aol.com**
|
||||
- **OnDemandKorea**
|
||||
- **onet.tv**
|
||||
- **onet.tv:channel**
|
||||
- **OnionStudios**
|
||||
@ -523,6 +540,7 @@
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **PandaTV**: 熊猫TV
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
@ -534,6 +552,7 @@
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **phoenix.de**
|
||||
- **Photobucket**
|
||||
- **Piksel**
|
||||
- **Pinkbike**
|
||||
- **Pladform**
|
||||
- **play.fm**
|
||||
@ -582,6 +601,8 @@
|
||||
- **RDS**: RDS.ca
|
||||
- **RedTube**
|
||||
- **RegioTV**
|
||||
- **RENTV**
|
||||
- **RENTVArticle**
|
||||
- **Restudy**
|
||||
- **Reuters**
|
||||
- **ReverbNation**
|
||||
@ -630,15 +651,15 @@
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **ScreenJunkies**
|
||||
- **ScreenwaveMedia**
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **Shahid**
|
||||
- **Shared**: shared.sx and vivo.sx
|
||||
- **Shared**: shared.sx
|
||||
- **ShareSix**
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
- **SixPlay**
|
||||
- **skynewsarabia:article**
|
||||
@ -692,6 +713,7 @@
|
||||
- **SWRMediathek**
|
||||
- **Syfy**
|
||||
- **SztvHu**
|
||||
- **t-online.de**
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
@ -701,7 +723,7 @@
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
- **Teamcoco**
|
||||
- **TeamFour**
|
||||
- **TeamFourStar**
|
||||
- **TechTalks**
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
@ -716,13 +738,16 @@
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
- **theoperaplatform**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
- **TheSixtyOne**
|
||||
- **TheStar**
|
||||
- **TheWeatherChannel**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **tlc.de**
|
||||
- **TMZ**
|
||||
@ -754,6 +779,8 @@
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
@ -765,10 +792,13 @@
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:past_broadcasts**
|
||||
- **twitch:profile**
|
||||
- **twitch:stream**
|
||||
- **twitch:video**
|
||||
- **twitch:videos:all**
|
||||
- **twitch:videos:highlights**
|
||||
- **twitch:videos:past-broadcasts**
|
||||
- **twitch:videos:uploads**
|
||||
- **twitch:vod**
|
||||
- **twitter**
|
||||
- **twitter:amplify**
|
||||
@ -776,6 +806,7 @@
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
- **UKTVPlay**
|
||||
- **Unistra**
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
@ -804,6 +835,7 @@
|
||||
- **ViceShow**
|
||||
- **Vidbit**
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoDetective**
|
||||
@ -840,6 +872,10 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **Viu**
|
||||
- **viu:ott**
|
||||
- **viu:playlist**
|
||||
- **Vivo**: vivo.sx
|
||||
- **vk**: VK
|
||||
- **vk:uservideos**: VK - User's Videos
|
||||
- **vk:wallpost**
|
||||
@ -853,7 +889,9 @@
|
||||
- **VRT**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Walla**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
@ -861,6 +899,8 @@
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **Webcaster**
|
||||
- **WebcasterFeed**
|
||||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **WeiqiTV**: WQTV
|
||||
|
2
setup.py
2
setup.py
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
|
||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -605,6 +605,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'extractor': 'TEST',
|
||||
'duration': 30,
|
||||
'filesize': 10 * 1024,
|
||||
'playlist_id': '42',
|
||||
}
|
||||
second = {
|
||||
'id': '2',
|
||||
@ -614,6 +615,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'duration': 10,
|
||||
'description': 'foo',
|
||||
'filesize': 5 * 1024,
|
||||
'playlist_id': '43',
|
||||
}
|
||||
videos = [first, second]
|
||||
|
||||
@ -650,6 +652,10 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('playlist_id = 42')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
def test_playlist_items_selection(self):
|
||||
entries = [{
|
||||
'id': compat_str(i),
|
||||
|
@ -51,5 +51,6 @@ class TestAES(unittest.TestCase):
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -60,6 +60,7 @@ def _file_md5(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
|
||||
defs = gettestcases()
|
||||
|
||||
|
||||
@ -217,6 +218,7 @@ def generator(test_case):
|
||||
|
||||
return test_template
|
||||
|
||||
|
||||
# And add them to TestDownload
|
||||
for n, test_case in enumerate(defs):
|
||||
test_method = generator(test_case)
|
||||
|
@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase):
|
||||
_, stderr = p.communicate()
|
||||
self.assertFalse(stderr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase):
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||
r = ydl.extract_info('http://localhost:%d/302' % self.port)
|
||||
self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
class TestHTTPS(unittest.TestCase):
|
||||
@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase):
|
||||
|
||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase):
|
||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase):
|
||||
ie._login()
|
||||
self.assertTrue('unable to log in:' in logger.messages[0])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -104,6 +104,14 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}''')
|
||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||
|
||||
def test_call(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return 2; }
|
||||
function y(a) { return x() + a; }
|
||||
function z() { return y(3); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('z'), 5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -69,6 +69,8 @@ from youtube_dl.utils import (
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
base_url,
|
||||
urljoin,
|
||||
urlencode_postdata,
|
||||
urshift,
|
||||
update_url_query,
|
||||
@ -437,6 +439,30 @@ class TestUtil(unittest.TestCase):
|
||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||
'trailer.mp4')
|
||||
|
||||
def test_base_url(self):
|
||||
self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/')
|
||||
self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/')
|
||||
self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/')
|
||||
|
||||
def test_urljoin(self):
|
||||
self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt')
|
||||
self.assertEqual(urljoin('http://foo.de/', None), None)
|
||||
self.assertEqual(urljoin('http://foo.de/', ''), None)
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
@ -1067,5 +1093,6 @@ The first line
|
||||
self.assertEqual(get_element_by_class('foo', html), 'nice')
|
||||
self.assertEqual(get_element_by_class('no-such-class', html), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase):
|
||||
self.assertTrue(b'-p' in serr)
|
||||
self.assertTrue(b'secret' not in serr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL):
|
||||
super(YoutubeDL, self).__init__(*args, **kwargs)
|
||||
self.to_stderr = self.to_screen
|
||||
|
||||
|
||||
params = get_params({
|
||||
'writeannotations': True,
|
||||
'skip_download': True,
|
||||
@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
try_rm(ANNOTATIONS_FILE)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
for entry in result['entries']:
|
||||
self.assertTrue(entry.get('title'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
|
||||
|
||||
for test_spec in _TESTS:
|
||||
make_tfunc(*test_spec)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
@ -1658,7 +1658,7 @@ class YoutubeDL(object):
|
||||
video_ext, audio_ext = audio.get('ext'), video.get('ext')
|
||||
if video_ext and audio_ext:
|
||||
COMPATIBLE_EXTS = (
|
||||
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
|
||||
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
|
||||
('webm')
|
||||
)
|
||||
for exts in COMPATIBLE_EXTS:
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@ -95,8 +95,7 @@ def _real_main(argv=None):
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except IOError:
|
||||
sys.exit('ERROR: batch file could not be read')
|
||||
all_urls = batch_urls + args
|
||||
all_urls = [url.strip() for url in all_urls]
|
||||
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
||||
@ -406,7 +405,7 @@ def _real_main(argv=None):
|
||||
'postprocessor_args': postprocessor_args,
|
||||
'cn_verification_proxy': opts.cn_verification_proxy,
|
||||
'geo_verification_proxy': opts.geo_verification_proxy,
|
||||
|
||||
'config_location': opts.config_location,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
@ -450,4 +449,5 @@ def main(argv=None):
|
||||
except KeyboardInterrupt:
|
||||
sys.exit('\nERROR: Interrupted by user')
|
||||
|
||||
|
||||
__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
|
||||
|
@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
|
||||
return plaintext
|
||||
|
||||
|
||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
|
||||
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
|
||||
@ -328,4 +329,5 @@ def inc(data):
|
||||
break
|
||||
return data
|
||||
|
||||
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||
|
@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass
|
||||
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161():
|
||||
return real_add_option(self, *bargs, **bkwargs)
|
||||
optparse.OptionGroup.add_option = _compat_add_option
|
||||
|
||||
|
||||
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
|
||||
compat_get_terminal_size = shutil.get_terminal_size
|
||||
else:
|
||||
|
@ -7,6 +7,7 @@ from .http import HttpFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
from .rtsp import RtspFD
|
||||
from .ism import IsmFD
|
||||
from .external import (
|
||||
get_external_downloader,
|
||||
FFmpegFD,
|
||||
@ -24,6 +25,7 @@ PROTOCOL_MAP = {
|
||||
'rtsp': RtspFD,
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
}
|
||||
|
||||
|
||||
|
@ -346,7 +346,6 @@ class FileDownloader(object):
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
print(min_sleep_interval, max_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||
time.sleep(sleep_interval)
|
||||
|
@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD):
|
||||
class AVconvFD(FFmpegFD):
|
||||
pass
|
||||
|
||||
|
||||
_BY_NAME = dict(
|
||||
(klass.get_basename(), klass)
|
||||
for name, klass in globals().items()
|
||||
|
@ -314,7 +314,8 @@ class F4mFD(FragmentFD):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
urlh = self.ydl.urlopen(man_url)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
|
||||
@ -387,7 +388,10 @@ class F4mFD(FragmentFD):
|
||||
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': url_parsed.geturl(),
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False
|
||||
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
error_to_compat_str,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
@ -37,6 +38,10 @@ class FragmentFD(FileDownloader):
|
||||
def report_skip_fragment(self, fragment_name):
|
||||
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
@ -59,11 +59,15 @@ class HlsFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
manifest = self.ydl.urlopen(man_url).read()
|
||||
|
||||
manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read()
|
||||
|
||||
s = manifest.decode('utf-8', 'ignore')
|
||||
|
||||
if not self.can_download(s, info_dict):
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
self.report_error('pycrypto not found. Please install it.')
|
||||
return False
|
||||
self.report_warning(
|
||||
'hlsnative has detected features it does not support, '
|
||||
'extraction will be delegated to ffmpeg')
|
||||
@ -112,7 +116,10 @@ class HlsFD(FragmentFD):
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {'url': frag_url})
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
|
271
youtube_dl/downloader/ism.py
Normal file
271
youtube_dl/downloader/ism.py
Normal file
@ -0,0 +1,271 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
import struct
|
||||
import binascii
|
||||
import io
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
u8 = struct.Struct(b'>B')
|
||||
u88 = struct.Struct(b'>Bx')
|
||||
u16 = struct.Struct(b'>H')
|
||||
u1616 = struct.Struct(b'>Hxx')
|
||||
u32 = struct.Struct(b'>I')
|
||||
u64 = struct.Struct(b'>Q')
|
||||
|
||||
s88 = struct.Struct(b'>bx')
|
||||
s16 = struct.Struct(b'>h')
|
||||
s1616 = struct.Struct(b'>hxx')
|
||||
s32 = struct.Struct(b'>i')
|
||||
|
||||
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
|
||||
|
||||
TRACK_ENABLED = 0x1
|
||||
TRACK_IN_MOVIE = 0x2
|
||||
TRACK_IN_PREVIEW = 0x4
|
||||
|
||||
SELF_CONTAINED = 0x1
|
||||
|
||||
|
||||
def box(box_type, payload):
|
||||
return u32.pack(8 + len(payload)) + box_type + payload
|
||||
|
||||
|
||||
def full_box(box_type, version, flags, payload):
|
||||
return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
|
||||
|
||||
|
||||
def write_piff_header(stream, params):
|
||||
track_id = params['track_id']
|
||||
fourcc = params['fourcc']
|
||||
duration = params['duration']
|
||||
timescale = params.get('timescale', 10000000)
|
||||
language = params.get('language', 'und')
|
||||
height = params.get('height', 0)
|
||||
width = params.get('width', 0)
|
||||
is_audio = width == 0 and height == 0
|
||||
creation_time = modification_time = int(time.time())
|
||||
|
||||
ftyp_payload = b'isml' # major brand
|
||||
ftyp_payload += u32.pack(1) # minor version
|
||||
ftyp_payload += b'piff' + b'iso2' # compatible brands
|
||||
stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
|
||||
|
||||
mvhd_payload = u64.pack(creation_time)
|
||||
mvhd_payload += u64.pack(modification_time)
|
||||
mvhd_payload += u32.pack(timescale)
|
||||
mvhd_payload += u64.pack(duration)
|
||||
mvhd_payload += s1616.pack(1) # rate
|
||||
mvhd_payload += s88.pack(1) # volume
|
||||
mvhd_payload += u16.pack(0) # reserved
|
||||
mvhd_payload += u32.pack(0) * 2 # reserved
|
||||
mvhd_payload += unity_matrix
|
||||
mvhd_payload += u32.pack(0) * 6 # pre defined
|
||||
mvhd_payload += u32.pack(0xffffffff) # next track id
|
||||
moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
|
||||
|
||||
tkhd_payload = u64.pack(creation_time)
|
||||
tkhd_payload += u64.pack(modification_time)
|
||||
tkhd_payload += u32.pack(track_id) # track id
|
||||
tkhd_payload += u32.pack(0) # reserved
|
||||
tkhd_payload += u64.pack(duration)
|
||||
tkhd_payload += u32.pack(0) * 2 # reserved
|
||||
tkhd_payload += s16.pack(0) # layer
|
||||
tkhd_payload += s16.pack(0) # alternate group
|
||||
tkhd_payload += s88.pack(1 if is_audio else 0) # volume
|
||||
tkhd_payload += u16.pack(0) # reserved
|
||||
tkhd_payload += unity_matrix
|
||||
tkhd_payload += u1616.pack(width)
|
||||
tkhd_payload += u1616.pack(height)
|
||||
trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
|
||||
|
||||
mdhd_payload = u64.pack(creation_time)
|
||||
mdhd_payload += u64.pack(modification_time)
|
||||
mdhd_payload += u32.pack(timescale)
|
||||
mdhd_payload += u64.pack(duration)
|
||||
mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
|
||||
mdhd_payload += u16.pack(0) # pre defined
|
||||
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
|
||||
|
||||
hdlr_payload = u32.pack(0) # pre defined
|
||||
hdlr_payload += b'soun' if is_audio else b'vide' # handler type
|
||||
hdlr_payload += u32.pack(0) * 3 # reserved
|
||||
hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
|
||||
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
|
||||
|
||||
if is_audio:
|
||||
smhd_payload = s88.pack(0) # balance
|
||||
smhd_payload = u16.pack(0) # reserved
|
||||
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
|
||||
else:
|
||||
vmhd_payload = u16.pack(0) # graphics mode
|
||||
vmhd_payload += u16.pack(0) * 3 # opcolor
|
||||
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
|
||||
minf_payload = media_header_box
|
||||
|
||||
dref_payload = u32.pack(1) # entry count
|
||||
dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
|
||||
dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
|
||||
minf_payload += box(b'dinf', dinf_payload) # Data Information Box
|
||||
|
||||
stsd_payload = u32.pack(1) # entry count
|
||||
|
||||
sample_entry_payload = u8.pack(0) * 6 # reserved
|
||||
sample_entry_payload += u16.pack(1) # data reference index
|
||||
if is_audio:
|
||||
sample_entry_payload += u32.pack(0) * 2 # reserved
|
||||
sample_entry_payload += u16.pack(params.get('channels', 2))
|
||||
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
|
||||
sample_entry_payload += u16.pack(0) # pre defined
|
||||
sample_entry_payload += u16.pack(0) # reserved
|
||||
sample_entry_payload += u1616.pack(params['sampling_rate'])
|
||||
|
||||
if fourcc == 'AACL':
|
||||
sample_entry_box = box(b'mp4a', sample_entry_payload)
|
||||
else:
|
||||
sample_entry_payload = sample_entry_payload
|
||||
sample_entry_payload += u16.pack(0) # pre defined
|
||||
sample_entry_payload += u16.pack(0) # reserved
|
||||
sample_entry_payload += u32.pack(0) * 3 # pre defined
|
||||
sample_entry_payload += u16.pack(width)
|
||||
sample_entry_payload += u16.pack(height)
|
||||
sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
|
||||
sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
|
||||
sample_entry_payload += u32.pack(0) # reserved
|
||||
sample_entry_payload += u16.pack(1) # frame count
|
||||
sample_entry_payload += u8.pack(0) * 32 # compressor name
|
||||
sample_entry_payload += u16.pack(0x18) # depth
|
||||
sample_entry_payload += s16.pack(-1) # pre defined
|
||||
|
||||
codec_private_data = binascii.unhexlify(params['codec_private_data'])
|
||||
if fourcc in ('H264', 'AVC1'):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
||||
avcc_payload += u16.pack(len(sps))
|
||||
avcc_payload += sps
|
||||
avcc_payload += u8.pack(1) # number of pps
|
||||
avcc_payload += u16.pack(len(pps))
|
||||
avcc_payload += pps
|
||||
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
|
||||
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
|
||||
stsd_payload += sample_entry_box
|
||||
|
||||
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
|
||||
|
||||
stts_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
|
||||
|
||||
stsc_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
|
||||
|
||||
stco_payload = u32.pack(0) # entry count
|
||||
stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
|
||||
|
||||
minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
|
||||
|
||||
mdia_payload += box(b'minf', minf_payload) # Media Information Box
|
||||
|
||||
trak_payload += box(b'mdia', mdia_payload) # Media Box
|
||||
|
||||
moov_payload += box(b'trak', trak_payload) # Track Box
|
||||
|
||||
mehd_payload = u64.pack(duration)
|
||||
mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
|
||||
|
||||
trex_payload = u32.pack(track_id) # track id
|
||||
trex_payload += u32.pack(1) # default sample description index
|
||||
trex_payload += u32.pack(0) # default sample duration
|
||||
trex_payload += u32.pack(0) # default sample size
|
||||
trex_payload += u32.pack(0) # default sample flags
|
||||
mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
|
||||
|
||||
moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
|
||||
stream.write(box(b'moov', moov_payload)) # Movie Box
|
||||
|
||||
|
||||
def extract_box_data(data, box_sequence):
|
||||
data_reader = io.BytesIO(data)
|
||||
while True:
|
||||
box_size = u32.unpack(data_reader.read(4))[0]
|
||||
box_type = data_reader.read(4)
|
||||
if box_type == box_sequence[0]:
|
||||
box_data = data_reader.read(box_size - 8)
|
||||
if len(box_sequence) == 1:
|
||||
return box_data
|
||||
return extract_box_data(box_data, box_sequence[1:])
|
||||
data_reader.seek(box_size - 8, 1)
|
||||
|
||||
|
||||
class IsmFD(FragmentFD):
|
||||
"""
|
||||
Download segments in a ISM manifest
|
||||
"""
|
||||
|
||||
FD_NAME = 'ism'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
segments = info_dict['fragments'][:1] if self.params.get(
|
||||
'test', False) else info_dict['fragments']
|
||||
|
||||
ctx = {
|
||||
'filename': filename,
|
||||
'total_frags': len(segments),
|
||||
}
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
track_written = False
|
||||
for i, segment in enumerate(segments):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % i
|
||||
target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {'url': segment_url})
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
down_data = down.read()
|
||||
if not track_written:
|
||||
tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
|
||||
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
|
||||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
track_written = True
|
||||
ctx['dest_stream'].write(down_data)
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(segment_name)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for segment_file in segments_filenames:
|
||||
os.remove(encodeFilename(segment_file))
|
||||
|
||||
return True
|
@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
|
||||
'md5': '979d10b2939101f0d27a06b79edad536',
|
||||
'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'FA1505V024S00',
|
||||
'id': 'ZX9735A001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 27 Ep 24',
|
||||
'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
|
||||
'upload_date': '20160820',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1471719600,
|
||||
'title': 'Diaries Of A Broken Mind',
|
||||
'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
|
||||
'upload_date': '20161010',
|
||||
'uploader_id': 'abc2',
|
||||
'timestamp': 1476064920,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}]
|
||||
@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_params = self._parse_json(self._search_regex(
|
||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||
title = video_params['title']
|
||||
title = video_params.get('title') or video_params['seriesTitle']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
|
||||
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
|
||||
@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': video_params.get('seriesTitle'),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
|
||||
'episode': self._html_search_meta('episode_title', webpage),
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
OnDemandPagedList,
|
||||
)
|
||||
|
||||
@ -15,18 +16,33 @@ from ..utils import (
|
||||
class ACastIE(InfoExtractor):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# test with one bling
|
||||
'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
|
||||
'md5': 'ada3de5a1e3a2a381327d749854788bb',
|
||||
'info_dict': {
|
||||
'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
|
||||
'ext': 'mp3',
|
||||
'title': '"Where Are You?": Taipei 101, Taiwan',
|
||||
'timestamp': 1196172000000,
|
||||
'timestamp': 1196172000,
|
||||
'upload_date': '20071127',
|
||||
'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
|
||||
'duration': 211,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# test with multiple blings
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': '55c0097badd7095f494c99a172f86501',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'duration': 2797,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
@ -35,11 +51,11 @@ class ACastIE(InfoExtractor):
|
||||
return {
|
||||
'id': compat_str(cast_data['id']),
|
||||
'display_id': display_id,
|
||||
'url': cast_data['blings'][0]['audio'],
|
||||
'url': [b['audio'] for b in cast_data['blings'] if b['type'] == 'BlingAudio'][0],
|
||||
'title': cast_data['name'],
|
||||
'description': cast_data.get('description'),
|
||||
'thumbnail': cast_data.get('image'),
|
||||
'timestamp': int_or_none(cast_data.get('publishingDate')),
|
||||
'timestamp': parse_iso8601(cast_data.get('publishingDate')),
|
||||
'duration': int_or_none(cast_data.get('duration')),
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -26,6 +26,11 @@ MSO_INFO = {
|
||||
'username_field': 'UserName',
|
||||
'password_field': 'UserPassword',
|
||||
},
|
||||
'Comcast_SSO': {
|
||||
'name': 'Comcast XFINITY',
|
||||
'username_field': 'user',
|
||||
'password_field': 'passwd',
|
||||
},
|
||||
'thr030': {
|
||||
'name': '3 Rivers Communications'
|
||||
},
|
||||
@ -1364,14 +1369,53 @@ class AdobePassIE(InfoExtractor):
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
})
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password,
|
||||
})
|
||||
if mso_id != 'Rogers':
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
if mso_id == 'Comcast_SSO':
|
||||
# Comcast page flow varies by video site and whether you
|
||||
# are on Comcast's network.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
# Check for Comcast auto login
|
||||
if 'automatically signing you in' in provider_redirect_page:
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'window\.location\s*=\s*[\'"]([^\'"]+)',
|
||||
provider_redirect_page, 'oauth redirect')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(
|
||||
oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
else:
|
||||
if '<form name="signin"' in provider_redirect_page:
|
||||
# already have the form, just fill it
|
||||
provider_login_page_res = provider_redirect_page_res
|
||||
elif 'http-equiv="refresh"' in provider_redirect_page:
|
||||
# redirects to the login page
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'content="0;\s*url=([^\'"]+)',
|
||||
provider_redirect_page, 'meta refresh redirect')
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
oauth_redirect_url,
|
||||
video_id, 'Downloading Provider Login Page')
|
||||
else:
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
||||
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password,
|
||||
})
|
||||
mvpd_confirm_page, urlh = mvpd_confirm_page_res
|
||||
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
else:
|
||||
# Normal, non-Comcast flow
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res, 'Downloading Provider Login Page')
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', {
|
||||
mso_info.get('username_field', 'username'): username,
|
||||
mso_info.get('password_field', 'password'): password,
|
||||
})
|
||||
if mso_id != 'Rogers':
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
session = self._download_webpage(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
|
||||
|
@ -96,6 +96,27 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': 'eYiLsKVgQ6qTC6agD67Sig',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toonami - Friday, October 14th, 2016',
|
||||
'description': 'md5:99892c96ffc85e159a428de85c30acde',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -163,6 +184,8 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
elif video_info.get('videoPlaybackID'):
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
elif video_info.get('id'):
|
||||
segment_ids = [video_info['id']]
|
||||
else:
|
||||
if video_info.get('auth') is True:
|
||||
raise ExtractorError(
|
||||
|
@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
|
||||
'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
|
||||
'info_dict': {
|
||||
'id': '22253814',
|
||||
'ext': 'mp4',
|
||||
@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'assetTypes': 'medium_video_s3'
|
||||
'assetTypes': 'high_video_s3'
|
||||
}
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'id': 'world-war-i-history',
|
||||
'title': 'World War I History',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
'playlist_mincount': 23,
|
||||
}, {
|
||||
'url': 'http://www.history.com/topics/world-war-i-history/videos',
|
||||
'only_matching': True,
|
||||
@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
return self.theplatform_url_result(
|
||||
release_url, video_id, {
|
||||
'mbr': 'true',
|
||||
'switch': 'hls'
|
||||
'switch': 'hls',
|
||||
'assetTypes': 'high_video_ak',
|
||||
})
|
||||
else:
|
||||
webpage = self._download_webpage(url, topic_id)
|
||||
@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
entries.append(self.theplatform_url_result(
|
||||
video_attributes['data-release-url'], video_attributes['data-id'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'hls'
|
||||
'switch': 'hls',
|
||||
'assetTypes': 'high_video_ak',
|
||||
}))
|
||||
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
|
||||
|
@ -11,6 +11,7 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
@ -18,12 +19,18 @@ from ..utils import (
|
||||
|
||||
class AfreecaTVIE(InfoExtractor):
|
||||
IE_DESC = 'afreecatv.com'
|
||||
_VALID_URL = r'''(?x)^
|
||||
https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html))
|
||||
\?.*?\bnTitleNo=(?P<id>\d+)'''
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
|
||||
(?:
|
||||
/app/(?:index|read_ucc_bbs)\.cgi|
|
||||
/player/[Pp]layer\.(?:swf|html)
|
||||
)\?.*?\bnTitleNo=|
|
||||
vod\.afreecatv\.com/PLAYER/STATION/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||
@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor):
|
||||
info_url = compat_urlparse.urlunparse(parsed_url._replace(
|
||||
netloc='afbbs.afreecatv.com:8080',
|
||||
path='/api/video/get_video_info.php'))
|
||||
video_xml = self._download_xml(info_url, video_id)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
update_url_query(info_url, {'nTitleNo': video_id}), video_id)
|
||||
|
||||
if xpath_element(video_xml, './track/video/file') is None:
|
||||
raise ExtractorError('Specified AfreecaTV video does not exist',
|
||||
|
@ -1,29 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
xpath_element,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class AllocineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
||||
'md5': '0c9fcf59a841f65635fa300ac43d8269',
|
||||
'info_dict': {
|
||||
'id': '19546517',
|
||||
'display_id': '18635087',
|
||||
'ext': 'mp4',
|
||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||
'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
|
||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
@ -31,64 +28,82 @@ class AllocineIE(InfoExtractor):
|
||||
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
|
||||
'info_dict': {
|
||||
'id': '19540403',
|
||||
'display_id': '19540403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
|
||||
'md5': '101250fb127ef9ca3d73186ff22a47ce',
|
||||
'info_dict': {
|
||||
'id': '19544709',
|
||||
'display_id': '19544709',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:601d15393ac40f249648ef000720e7e3',
|
||||
'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/video-19550147/',
|
||||
'only_matching': True,
|
||||
'md5': '3566c0668c0235e2d224fd8edb389f67',
|
||||
'info_dict': {
|
||||
'id': '19550147',
|
||||
'ext': 'mp4',
|
||||
'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
|
||||
'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
typ = mobj.group('typ')
|
||||
display_id = mobj.group('id')
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if typ == 'film':
|
||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||
else:
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
|
||||
if player:
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
else:
|
||||
model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
|
||||
model_data = self._parse_json(unescapeHTML(model), display_id)
|
||||
video_id = compat_str(model_data['id'])
|
||||
|
||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||
|
||||
video = xpath_element(xml, './/AcVisionVideo').attrib
|
||||
formats = []
|
||||
quality = qualities(['ld', 'md', 'hd'])
|
||||
|
||||
formats = []
|
||||
for k, v in video.items():
|
||||
if re.match(r'.+_path', k):
|
||||
format_id = k.split('_')[0]
|
||||
model = self._html_search_regex(
|
||||
r'data-model="([^"]+)"', webpage, 'data model', default=None)
|
||||
if model:
|
||||
model_data = self._parse_json(model, display_id)
|
||||
|
||||
for video_url in model_data['sources'].values():
|
||||
video_id, format_id = url_basename(video_url).split('_')[:2]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': v,
|
||||
'url': video_url,
|
||||
})
|
||||
|
||||
title = model_data['title']
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
|
||||
format_id = key[:-len('Path')]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': value,
|
||||
})
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title'
|
||||
).strip(), ' - AlloCiné')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['videoTitle'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
}, {
|
||||
'url': 'http://www.ifc.com/movies/chaos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -157,22 +157,16 @@ class AnvatoIE(InfoExtractor):
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps(payload).encode('utf-8'))
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
|
||||
video_id = anvplayer_data['video']
|
||||
access_key = anvplayer_data['accessKey']
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id):
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
|
||||
formats = []
|
||||
for published_url in video_data['published_urls']:
|
||||
video_url = published_url['embed_url']
|
||||
media_format = published_url.get('format')
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'smil':
|
||||
if ext == 'smil' or media_format == 'smil':
|
||||
formats.extend(self._extract_smil_formats(video_url, video_id))
|
||||
continue
|
||||
|
||||
@ -183,7 +177,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'tbr': tbr if tbr != 0 else None,
|
||||
}
|
||||
|
||||
if ext == 'm3u8':
|
||||
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
|
||||
# Not using _extract_m3u8_formats here as individual media
|
||||
# playlists are also included in published_urls.
|
||||
if tbr is None:
|
||||
@ -194,7 +188,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif ext == 'mp3':
|
||||
elif ext == 'mp3' or media_format == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
else:
|
||||
a_format.update({
|
||||
@ -218,7 +212,19 @@ class AnvatoIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'title': video_data.get('def_title'),
|
||||
'description': video_data.get('def_description'),
|
||||
'tags': video_data.get('def_tags', '').split(','),
|
||||
'categories': video_data.get('categories'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'timestamp': int_or_none(video_data.get(
|
||||
'ts_published') or video_data.get('ts_added')),
|
||||
'uploader': video_data.get('mcp_id'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
|
@ -174,11 +174,15 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||
ERRORS = (
|
||||
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
|
||||
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
|
||||
'Video %s is no longer available'),
|
||||
)
|
||||
|
||||
if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
|
||||
raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
raise ExtractorError(message % video_id, expected=True)
|
||||
|
||||
if re.search(r'[\?&]rss($|[=&])', url):
|
||||
doc = compat_etree_fromstring(webpage.encode('utf-8'))
|
||||
|
@ -4,8 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
@ -15,7 +17,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
video\.arkena\.com/play2/embed/player\?|
|
||||
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
@ -37,6 +45,9 @@ class ArkenaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -53,6 +64,14 @@ class ArkenaIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
account_id = mobj.group('account_id')
|
||||
|
||||
# Handle http://video.arkena.com/play2/embed/player URL
|
||||
if not video_id:
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = qs.get('mediaId', [None])[0]
|
||||
account_id = qs.get('accountId', [None])[0]
|
||||
if not video_id or not account_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -410,6 +410,22 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class TheOperaPlatformIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'theoperaplatform'
|
||||
_VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
|
||||
'md5': '970655901fa2e82e04c00b955e9afe7b',
|
||||
'info_dict': {
|
||||
'id': '060338-009-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verdi - OTELLO',
|
||||
'upload_date': '20160927',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||
|
@ -6,8 +6,8 @@ from ..utils import float_or_none
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
|
||||
'info_dict': {
|
||||
@ -19,7 +19,10 @@ class AudioBoomIE(InfoExtractor):
|
||||
'uploader': 'Steve Czaban',
|
||||
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
|
||||
@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor):
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$'
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
'only_matching': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://azubu.uol.com.br/adolfz',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user = self._match_id(url)
|
||||
|
@ -1,7 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@ -12,6 +14,9 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor):
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascript code
|
||||
all_info = self._parse_json(self._search_regex(
|
||||
r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id)
|
||||
info = all_info[0]
|
||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||
mp3_info = info['downloads']['mp3-320']
|
||||
# If we try to use this url it says the link has expired
|
||||
initial_url = mp3_info['url']
|
||||
m_url = re.match(
|
||||
r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$',
|
||||
initial_url)
|
||||
# We build the url we will use to get the final track url
|
||||
# This url is build in Bandcamp in the script download_bunde_*.js
|
||||
request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts'))
|
||||
final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url')
|
||||
# If we could correctly generate the .rand field the url would be
|
||||
# in the "download_url" key
|
||||
final_url = self._proto_relative_url(self._search_regex(
|
||||
r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:')
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, video_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
|
||||
info = blob['digital_items'][0]
|
||||
|
||||
downloads = info['downloads']
|
||||
track = info['title']
|
||||
|
||||
artist = info.get('artist')
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
download_formats = {}
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
formats = []
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, video_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = stat.get('retry_url')
|
||||
if not isinstance(retry_url, compat_str):
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url'),
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
|
@ -8,10 +8,10 @@ from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BeatportProIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
class BeatportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
|
||||
'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
|
||||
'md5': 'b3c34d8639a2f6a7f734382358478887',
|
||||
'info_dict': {
|
||||
'id': '5379371',
|
||||
@ -20,7 +20,7 @@ class BeatportProIE(InfoExtractor):
|
||||
'title': 'Froxic - Synesthesia (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
|
||||
'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
|
||||
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
|
||||
'info_dict': {
|
||||
'id': '3756896',
|
||||
@ -29,7 +29,7 @@ class BeatportProIE(InfoExtractor):
|
||||
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
|
||||
'url': 'https://beatport.com/track/birds-original-mix/4991738',
|
||||
'md5': 'a1fd8e8046de3950fd039304c186c05f',
|
||||
'info_dict': {
|
||||
'id': '4991738',
|
@ -46,19 +46,19 @@ class BeegIE(InfoExtractor):
|
||||
self._proto_relative_url(cpl_url), video_id,
|
||||
'Downloading cpl JS', fatal=False)
|
||||
if cpl:
|
||||
beeg_version = self._search_regex(
|
||||
r'beeg_version\s*=\s*(\d+)', cpl,
|
||||
'beeg version', default=None) or self._search_regex(
|
||||
beeg_version = int_or_none(self._search_regex(
|
||||
r'beeg_version\s*=\s*([^\b]+)', cpl,
|
||||
'beeg version', default=None)) or self._search_regex(
|
||||
r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
|
||||
beeg_salt = self._search_regex(
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg beeg_salt',
|
||||
r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
|
||||
default=None, group='beeg_salt')
|
||||
|
||||
beeg_version = beeg_version or '1750'
|
||||
beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr'
|
||||
beeg_version = beeg_version or '2000'
|
||||
beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
|
||||
|
||||
video = self._download_json(
|
||||
'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
'https://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id),
|
||||
video_id)
|
||||
|
||||
def split(o, e):
|
||||
|
@ -45,7 +45,8 @@ class BloombergIE(InfoExtractor):
|
||||
name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(
|
||||
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1',
|
||||
(r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
webpage, 'id', group='url', default=None)
|
||||
if not video_id:
|
||||
bplayer_data = self._parse_json(self._search_regex(
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
"""Return a list of all Brightcove URLs from the webpage """
|
||||
|
||||
url_m = re.search(
|
||||
r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]',
|
||||
webpage)
|
||||
r'''(?x)
|
||||
<meta\s+
|
||||
(?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
|
||||
content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
|
||||
''', webpage)
|
||||
if url_m:
|
||||
url = unescapeHTML(url_m.group(1))
|
||||
url = unescapeHTML(url_m.group('url'))
|
||||
# Some sites don't add it, we can't download with this url, for example:
|
||||
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
||||
if 'playerKey' in url or 'videoId' in url:
|
||||
if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
|
||||
return [url]
|
||||
|
||||
matches = re.findall(
|
||||
@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
# Change the 'videoId' and others field to '@videoPlayer'
|
||||
url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
|
||||
url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
|
||||
# Change bckey (used by bcove.me urls) to playerKey
|
||||
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -548,7 +551,7 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if ext == 'ism':
|
||||
if ext == 'ism' or container == 'WVM':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -6,11 +6,13 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
qualities,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
|
||||
'md5': '41f438a4904f7664b91b4ed0dec969dc',
|
||||
'info_dict': {
|
||||
'id': '1192814',
|
||||
'id': '1405510',
|
||||
'display_id': 'pid1830-c-zapping',
|
||||
'ext': 'mp4',
|
||||
'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
|
||||
'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
|
||||
'upload_date': '20150105',
|
||||
'title': 'Zapping - 02/07/2016',
|
||||
'description': 'Le meilleur de toutes les chaînes, tous les jours',
|
||||
'upload_date': '20160702',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
|
||||
'info_dict': {
|
||||
'id': '1108190',
|
||||
'ext': 'flv',
|
||||
'title': 'Le labyrinthe - Boing super ranger',
|
||||
'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
|
||||
'ext': 'mp4',
|
||||
'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
|
||||
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
|
||||
'upload_date': '20140724',
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
}, {
|
||||
'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
|
||||
'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
|
||||
'md5': '4b47b12b4ee43002626b97fad8fb1de5',
|
||||
'info_dict': {
|
||||
'id': '1390231',
|
||||
'id': '1420213',
|
||||
'display_id': 'pid6318-videos-integrales',
|
||||
'ext': 'mp4',
|
||||
'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
|
||||
'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
|
||||
'upload_date': '20160512',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
|
||||
'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
'skip': 'Only works from France',
|
||||
}, {
|
||||
'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
|
||||
'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'info_dict': {
|
||||
'id': '1398334',
|
||||
'id': '1420176',
|
||||
'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
|
||||
'ext': 'mp4',
|
||||
'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
|
||||
'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
|
||||
'upload_date': '20160607',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
|
||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
@ -95,18 +96,18 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
|
||||
|
||||
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)'],
|
||||
webpage, 'video id', group='id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
[r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
|
||||
r'id=["\']canal_video_player(?P<id>\d+)',
|
||||
r'data-video=["\'](?P<id>\d+)'],
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
|
||||
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
|
||||
|
@ -89,6 +89,9 @@ class CanvasIE(InfoExtractor):
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, display_id, f4m_id=format_type, fatal=False))
|
||||
elif format_type == 'MPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id=format_type, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_type,
|
||||
|
@ -9,6 +9,8 @@ from ..utils import (
|
||||
try_get,
|
||||
)
|
||||
|
||||
from .videomore import VideomoreIE
|
||||
|
||||
|
||||
class CarambaTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
|
||||
@ -62,14 +64,16 @@ class CarambaTVPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
|
||||
'md5': '',
|
||||
'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
|
||||
'info_dict': {
|
||||
'id': '191910501',
|
||||
'ext': 'mp4',
|
||||
'id': '475222',
|
||||
'ext': 'flv',
|
||||
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 2678.31,
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
# duration reported by videomore is incorrect
|
||||
'duration': int,
|
||||
},
|
||||
'add_ie': [VideomoreIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -77,6 +81,16 @@ class CarambaTVPageIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
videomore_url = VideomoreIE._extract_url(webpage)
|
||||
if videomore_url:
|
||||
title = self._og_search_title(webpage)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': videomore_url,
|
||||
'ie_key': VideomoreIE.ie_key(),
|
||||
'title': title,
|
||||
}
|
||||
|
||||
video_url = self._og_search_property('video:iframe', webpage, default=None)
|
||||
|
||||
if not video_url:
|
||||
|
@ -283,11 +283,6 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||
formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
|
||||
if len(formats) < 2:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
# Despite metadata in m3u8 all video+audio formats are
|
||||
# actually video-only (no audio)
|
||||
for f in formats:
|
||||
if f.get('acodec') != 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = 'none'
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
|
@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
|
||||
r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
vdata = data.get('video') or data['videos'][0]
|
||||
|
@ -4,11 +4,14 @@ from __future__ import unicode_literals
|
||||
from .anvato import AnvatoIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import unified_timestamp
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
@ -22,6 +25,7 @@ class CBSLocalIE(AnvatoIE):
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1463440500,
|
||||
'upload_date': '20160516',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
@ -35,6 +39,7 @@ class CBSLocalIE(AnvatoIE):
|
||||
'Syndication\\Curb.tv',
|
||||
'Content\\News'
|
||||
],
|
||||
'tags': ['CBS 2 News Evening'],
|
||||
},
|
||||
}, {
|
||||
# SendtoNews embed
|
||||
@ -47,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1479962220,
|
||||
'upload_date': '20161124',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -62,8 +92,11 @@ class CBSLocalIE(AnvatoIE):
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
timestamp = unified_timestamp(time_str)
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
|
||||
if time_str:
|
||||
timestamp = unified_timestamp(time_str)
|
||||
else:
|
||||
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
99
youtube_dl/extractor/ccma.py
Normal file
99
youtube_dl/extractor/ccma.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class CCMAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
|
||||
'md5': '7296ca43977c8ea4469e719c609b0871',
|
||||
'info_dict': {
|
||||
'id': '5630208',
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1470918540,
|
||||
'upload_date': '20160811',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
'md5': 'fa3e38f269329a278271276330261425',
|
||||
'info_dict': {
|
||||
'id': '943685',
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20171205',
|
||||
'timestamp': 1512507300,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_type, media_id = re.match(self._VALID_URL, url).groups()
|
||||
media_data = {}
|
||||
formats = []
|
||||
profiles = ['pc'] if media_type == 'audio' else ['mobil', 'pc']
|
||||
for i, profile in enumerate(profiles):
|
||||
md = self._download_json('http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'profile': profile,
|
||||
}, fatal=False)
|
||||
if md:
|
||||
media_data = md
|
||||
media_url = media_data.get('media', {}).get('url')
|
||||
if media_url:
|
||||
formats.append({
|
||||
'format_id': profile,
|
||||
'url': media_url,
|
||||
'quality': i,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
informacio = media_data['informacio']
|
||||
title = informacio['titol']
|
||||
durada = informacio.get('durada', {})
|
||||
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
||||
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
|
||||
|
||||
subtitles = {}
|
||||
subtitols = media_data.get('subtitols', {})
|
||||
if subtitols:
|
||||
sub_url = subtitols.get('url')
|
||||
if sub_url:
|
||||
subtitles.setdefault(
|
||||
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
imatges = media_data.get('imatges', {})
|
||||
if imatges:
|
||||
thumbnail_url = imatges.get('url')
|
||||
if thumbnail_url:
|
||||
thumbnails = [{
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(imatges.get('amplada')),
|
||||
'height': int_or_none(imatges.get('alcada')),
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': clean_html(informacio.get('descripcio')),
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'thumnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@ -4,27 +4,84 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class CCTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:.+?\.)?
|
||||
(?:
|
||||
cctv\.(?:com|cn)|
|
||||
cntv\.cn
|
||||
)/
|
||||
(?:
|
||||
video/[^/]+/(?P<id>[0-9a-f]{32})|
|
||||
\d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
|
||||
)'''
|
||||
IE_DESC = '央视网'
|
||||
_VALID_URL = r'https?://(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
|
||||
'md5': '819c7b49fc3927d529fb4cd555621823',
|
||||
'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml',
|
||||
'md5': 'd61ec00a493e09da810bf406a078f691',
|
||||
'info_dict': {
|
||||
'id': '454368eb19ad44a1925bf1eb96140a61',
|
||||
'id': '5ecdbeab623f4973b40ff25f18b174e8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
|
||||
}
|
||||
'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)',
|
||||
'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95',
|
||||
'duration': 98,
|
||||
'uploader': 'songjunjie',
|
||||
'timestamp': 1455279956,
|
||||
'upload_date': '20160212',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml',
|
||||
'info_dict': {
|
||||
'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae',
|
||||
'ext': 'mp4',
|
||||
'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)',
|
||||
'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。',
|
||||
'duration': 37,
|
||||
'uploader': 'shujun',
|
||||
'timestamp': 1454677291,
|
||||
'upload_date': '20160205',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml',
|
||||
'info_dict': {
|
||||
'id': '4bb9bb4db7a6471ba85fdeda5af0381e',
|
||||
'ext': 'mp4',
|
||||
'title': 'NHnews008 ANNUAL POLITICAL SEASON',
|
||||
'description': 'Four Comprehensives',
|
||||
'duration': 60,
|
||||
'uploader': 'zhangyunlei',
|
||||
'timestamp': 1425385521,
|
||||
'upload_date': '20150303',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml',
|
||||
'info_dict': {
|
||||
'id': 'b15f009ff45c43968b9af583fc2e04b2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Путь,усыпанный космеями Серия 1',
|
||||
'description': 'Путь, усыпанный космеями',
|
||||
'duration': 2645,
|
||||
'uploader': 'renxue',
|
||||
'timestamp': 1477479241,
|
||||
'upload_date': '20161026',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
|
||||
'only_matching': True,
|
||||
@ -34,20 +91,63 @@ class CCTVIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
|
||||
webpage, 'video_id')
|
||||
api_data = self._download_json(
|
||||
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
|
||||
m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
|
||||
r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
|
||||
r'"changePlayer\s*\(\s*["\']([\da-fA-F]+)',
|
||||
r'"load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)'],
|
||||
webpage, 'video id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id,
|
||||
query={
|
||||
'pid': video_id,
|
||||
'url': url,
|
||||
'idl': 32,
|
||||
'idlr': 32,
|
||||
'modifyed': 'false',
|
||||
})
|
||||
|
||||
title = data['title']
|
||||
|
||||
formats = []
|
||||
|
||||
video = data.get('video')
|
||||
if isinstance(video, dict):
|
||||
for quality, chapters_key in enumerate(('lowChapters', 'chapters')):
|
||||
video_url = try_get(
|
||||
video, lambda x: x[chapters_key][0]['url'], compat_str)
|
||||
if video_url:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
'quality': quality,
|
||||
'preference': -1,
|
||||
})
|
||||
|
||||
hls_url = try_get(data, lambda x: x['hls_url'], compat_str)
|
||||
if hls_url:
|
||||
hls_url = re.sub(r'maxbr=\d+&?', '', hls_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
uploader = data.get('editer_name')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
timestamp = unified_timestamp(data.get('f_pgmtime'))
|
||||
duration = float_or_none(try_get(video, lambda x: x['totalLength']))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': api_data['title'],
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
|
||||
'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -5,14 +5,16 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
ExtractorError,
|
||||
parse_duration
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'http://www.cda.pl/'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cda.pl/video/5749950c',
|
||||
'md5': '6f844bf51b15f31fae165365707ae970',
|
||||
@ -21,6 +23,9 @@ class CDAIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'height': 720,
|
||||
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
|
||||
'description': 'md5:269ccd135d550da90d1662651fcb9772',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'average_rating': float,
|
||||
'duration': 39
|
||||
}
|
||||
}, {
|
||||
@ -30,6 +35,11 @@ class CDAIE(InfoExtractor):
|
||||
'id': '57413289',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lądowanie na lotnisku na Maderze',
|
||||
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137
|
||||
}
|
||||
}, {
|
||||
@ -39,31 +49,55 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
webpage = self._download_webpage(
|
||||
self._BASE_URL + '/video/' + video_id, video_id)
|
||||
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
|
||||
formats = []
|
||||
|
||||
uploader = self._search_regex(r'''(?x)
|
||||
<(span|meta)[^>]+itemprop=(["\'])author\2[^>]*>
|
||||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
||||
''', webpage, 'uploader', default=None, group='uploader')
|
||||
view_count = self._search_regex(
|
||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||
'view_count', default=None)
|
||||
average_rating = self._search_regex(
|
||||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
webpage, 'rating', fatal=False, group='rating_value')
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(view_count),
|
||||
'average_rating': float_or_none(average_rating),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': None,
|
||||
}
|
||||
|
||||
def extract_format(page, version):
|
||||
unpacked = decode_packed_codes(page)
|
||||
format_url = self._search_regex(
|
||||
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
|
||||
'%s url' % version, fatal=False, group='url')
|
||||
if not format_url:
|
||||
json_str = self._search_regex(
|
||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||
'%s player_json' % version, fatal=False, group='player_data')
|
||||
if not json_str:
|
||||
return
|
||||
player_data = self._parse_json(
|
||||
json_str, '%s player_data' % version, fatal=False)
|
||||
if not player_data:
|
||||
return
|
||||
video = player_data.get('video')
|
||||
if not video or 'file' not in video:
|
||||
self.report_warning('Unable to extract %s version information' % version)
|
||||
return
|
||||
f = {
|
||||
'url': format_url,
|
||||
'url': video['file'],
|
||||
}
|
||||
m = re.search(
|
||||
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
|
||||
@ -75,9 +109,7 @@ class CDAIE(InfoExtractor):
|
||||
})
|
||||
info_dict['formats'].append(f)
|
||||
if not info_dict['duration']:
|
||||
info_dict['duration'] = parse_duration(self._search_regex(
|
||||
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
|
||||
unpacked, 'duration', fatal=False, group='duration'))
|
||||
info_dict['duration'] = parse_duration(video.get('duration'))
|
||||
|
||||
extract_format(webpage, 'default')
|
||||
|
||||
@ -85,7 +117,8 @@ class CDAIE(InfoExtractor):
|
||||
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
||||
webpage):
|
||||
webpage = self._download_webpage(
|
||||
href, video_id, 'Downloading %s version information' % resolution, fatal=False)
|
||||
self._BASE_URL + href, video_id,
|
||||
'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
# Manually report warning because empty page is returned when
|
||||
# invalid version is requested.
|
||||
|
@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
@ -70,7 +71,6 @@ class ChirbitProfileIE(InfoExtractor):
|
||||
'url': 'http://chirbit.com/ScarletBeauty',
|
||||
'info_dict': {
|
||||
'id': 'ScarletBeauty',
|
||||
'title': 'Chirbits by ScarletBeauty',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}
|
||||
@ -78,13 +78,10 @@ class ChirbitProfileIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
profile_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://chirbit.com/rss/%s' % profile_id, profile_id)
|
||||
webpage = self._download_webpage(url, profile_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(audio_url.text, 'Chirbit')
|
||||
for audio_url in rss.findall('./channel/item/link')]
|
||||
self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
|
||||
for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
|
||||
|
||||
title = rss.find('./channel/title').text
|
||||
|
||||
return self.playlist_result(entries, profile_id, title)
|
||||
return self.playlist_result(entries, profile_id)
|
||||
|
@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -10,15 +11,15 @@ from ..utils import (
|
||||
class ClipfishIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
|
||||
'md5': '79bc922f3e8a9097b3d68a93780fd475',
|
||||
'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
|
||||
'md5': '720563e467b86374c194bdead08d207d',
|
||||
'info_dict': {
|
||||
'id': '3966754',
|
||||
'id': '4343170',
|
||||
'ext': 'mp4',
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'description': 'Video zu FIFA 14: E3 2013 Trailer',
|
||||
'upload_date': '20130611',
|
||||
'duration': 82,
|
||||
'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
|
||||
'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
|
||||
'upload_date': '20161005',
|
||||
'duration': 1291,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
@ -50,10 +51,14 @@ class ClipfishIE(InfoExtractor):
|
||||
'tbr': int_or_none(video_info.get('bitrate')),
|
||||
})
|
||||
|
||||
descr = video_info.get('descr')
|
||||
if descr:
|
||||
descr = descr.strip()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'description': video_info.get('descr'),
|
||||
'description': descr,
|
||||
'formats': formats,
|
||||
'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
|
||||
'duration': int_or_none(video_info.get('media_length')),
|
||||
|
@ -26,7 +26,7 @@ class CMTIE(MTVIE):
|
||||
'id': '1504699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Still The King Ep. 109 in 3 Minutes',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
|
||||
'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
|
||||
'timestamp': 1469421000.0,
|
||||
'upload_date': '20160725',
|
||||
},
|
||||
@ -42,3 +42,8 @@ class CMTIE(MTVIE):
|
||||
'%s said: video is not available' % cls.IE_NAME, expected=True)
|
||||
|
||||
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._search_regex(
|
||||
r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
|
||||
webpage, 'mgid', group='mgid')
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections|full-episodes|shows)
|
||||
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
|
||||
/(?P<title>.*)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
@ -27,6 +27,40 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(?:full-episodes|shows(?=/[^/]+/full-episodes))
|
||||
/(?P<id>[^?]+)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
|
||||
'info_dict': {
|
||||
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
|
||||
'title': 'November 28, 2016 - Ryan Speedo Green',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed')
|
||||
feed = self._parse_json(feed_json, playlist_id)
|
||||
zones = feed['manifest']['zones']
|
||||
|
||||
video_zone = zones['t2_lc_promo1']
|
||||
feed = self._download_json(video_zone['feed'], playlist_id)
|
||||
mgid = feed['result']['data']['id']
|
||||
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
return videos_info
|
||||
|
||||
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
|
@ -21,6 +21,7 @@ from ..compat import (
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@ -29,6 +30,7 @@ from ..downloader.f4m import remove_encrypted_media
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
age_restricted,
|
||||
base_url,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
@ -57,6 +59,7 @@ from ..utils import (
|
||||
parse_m3u8_attributes,
|
||||
extract_attributes,
|
||||
parse_codecs,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@ -186,9 +189,10 @@ class InfoExtractor(object):
|
||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{language: subformats}. "subformats" is a list sorted from
|
||||
lower to higher preference, each element is a dictionary
|
||||
with the "ext" entry and one of:
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
"subformats" is a list sorted from lower to higher
|
||||
preference, each element is a dictionary with the "ext"
|
||||
entry and one of:
|
||||
* "data": The subtitles file contents
|
||||
* "url": A URL pointing to the subtitles file
|
||||
"ext" will be calculated from URL if missing
|
||||
@ -234,7 +238,7 @@ class InfoExtractor(object):
|
||||
chapter_id: Id of the chapter the video belongs to, as a unicode string.
|
||||
|
||||
The following fields should only be used when the video is an episode of some
|
||||
series or programme:
|
||||
series, programme or podcast:
|
||||
|
||||
series: Title of the series or programme the video episode belongs to.
|
||||
season: Title of the season the video episode belongs to.
|
||||
@ -884,7 +888,7 @@ class InfoExtractor(object):
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl'),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
@ -1099,6 +1103,13 @@ class InfoExtractor(object):
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||
'bootstrap info', default=None)
|
||||
|
||||
vcodec = None
|
||||
mime_type = xpath_text(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
|
||||
'base URL', default=None)
|
||||
if mime_type and mime_type.startswith('audio/'):
|
||||
vcodec = 'none'
|
||||
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
width = int_or_none(media_el.attrib.get('width'))
|
||||
@ -1139,6 +1150,7 @@ class InfoExtractor(object):
|
||||
'width': f.get('width') or width,
|
||||
'height': f.get('height') or height,
|
||||
'format_id': f.get('format_id') if not tbr else format_id,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
formats.extend(f4m_formats)
|
||||
continue
|
||||
@ -1155,6 +1167,7 @@ class InfoExtractor(object):
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vcodec': vcodec,
|
||||
'preference': preference,
|
||||
})
|
||||
return formats
|
||||
@ -1213,6 +1226,7 @@ class InfoExtractor(object):
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
audio_in_video_stream = {}
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
for line in m3u8_doc.splitlines():
|
||||
@ -1222,25 +1236,32 @@ class InfoExtractor(object):
|
||||
media = parse_m3u8_attributes(line)
|
||||
media_type = media.get('TYPE')
|
||||
if media_type in ('VIDEO', 'AUDIO'):
|
||||
group_id = media.get('GROUP-ID')
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
format_id = []
|
||||
for v in (media.get('GROUP-ID'), media.get('NAME')):
|
||||
for v in (group_id, media.get('NAME')):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
formats.append({
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(media_url),
|
||||
'language': media.get('LANGUAGE'),
|
||||
'vcodec': 'none' if media_type == 'AUDIO' else None,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
})
|
||||
}
|
||||
if media_type == 'AUDIO':
|
||||
f['vcodec'] = 'none'
|
||||
if group_id and not audio_in_video_stream.get(group_id):
|
||||
audio_in_video_stream[group_id] = False
|
||||
formats.append(f)
|
||||
else:
|
||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||
# data be used by regular URI lines below
|
||||
last_media = media
|
||||
if media_type == 'AUDIO' and group_id:
|
||||
audio_in_video_stream[group_id] = True
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
@ -1269,9 +1290,10 @@ class InfoExtractor(object):
|
||||
}
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
if resolution:
|
||||
width_str, height_str = resolution.split('x')
|
||||
f['width'] = int(width_str)
|
||||
f['height'] = int(height_str)
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
if mobj:
|
||||
f['width'] = int(mobj.group('width'))
|
||||
f['height'] = int(mobj.group('height'))
|
||||
# Unified Streaming Platform
|
||||
mobj = re.search(
|
||||
r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
|
||||
@ -1283,6 +1305,9 @@ class InfoExtractor(object):
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
|
||||
# TODO: update acodec for for audio only formats with the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
@ -1529,7 +1554,7 @@ class InfoExtractor(object):
|
||||
if res is False:
|
||||
return []
|
||||
mpd, urlh = res
|
||||
mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()
|
||||
mpd_base_url = base_url(urlh.geturl())
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
|
||||
@ -1612,11 +1637,6 @@ class InfoExtractor(object):
|
||||
extract_Initialization(segment_template)
|
||||
return ms_info
|
||||
|
||||
def combine_url(base_url, target_url):
|
||||
if re.match(r'^https?://', target_url):
|
||||
return target_url
|
||||
return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats = []
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
@ -1666,12 +1686,11 @@ class InfoExtractor(object):
|
||||
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
|
||||
'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
'format_note': 'DASH %s' % content_type,
|
||||
'filesize': filesize,
|
||||
}
|
||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
|
||||
|
||||
@ -1691,7 +1710,7 @@ class InfoExtractor(object):
|
||||
representation_ms_info['fragments'] = [{
|
||||
'url': media_template % {
|
||||
'Number': segment_number,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
},
|
||||
'duration': segment_duration,
|
||||
} for segment_number in range(
|
||||
@ -1709,7 +1728,7 @@ class InfoExtractor(object):
|
||||
def add_segment_url():
|
||||
segment_url = media_template % {
|
||||
'Time': segment_time,
|
||||
'Bandwidth': representation_attrib.get('bandwidth'),
|
||||
'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
|
||||
'Number': segment_number,
|
||||
}
|
||||
representation_ms_info['fragments'].append({
|
||||
@ -1755,7 +1774,7 @@ class InfoExtractor(object):
|
||||
f['fragments'].append({'url': initialization_url})
|
||||
f['fragments'].extend(representation_ms_info['fragments'])
|
||||
for fragment in f['fragments']:
|
||||
fragment['url'] = combine_url(base_url, fragment['url'])
|
||||
fragment['url'] = urljoin(base_url, fragment['url'])
|
||||
try:
|
||||
existing_format = next(
|
||||
fo for fo in formats
|
||||
@ -1770,7 +1789,106 @@ class InfoExtractor(object):
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
|
||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
|
||||
res = self._download_webpage_handle(
|
||||
ism_url, video_id,
|
||||
note=note or 'Downloading ISM manifest',
|
||||
errnote=errnote or 'Failed to download ISM manifest',
|
||||
fatal=fatal)
|
||||
if res is False:
|
||||
return []
|
||||
ism, urlh = res
|
||||
|
||||
return self._parse_ism_formats(
|
||||
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
|
||||
|
||||
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
|
||||
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
|
||||
return []
|
||||
|
||||
duration = int(ism_doc.attrib['Duration'])
|
||||
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
|
||||
|
||||
formats = []
|
||||
for stream in ism_doc.findall('StreamIndex'):
|
||||
stream_type = stream.get('Type')
|
||||
if stream_type not in ('video', 'audio'):
|
||||
continue
|
||||
url_pattern = stream.attrib['Url']
|
||||
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
|
||||
stream_name = stream.get('Name')
|
||||
for track in stream.findall('QualityLevel'):
|
||||
fourcc = track.get('FourCC')
|
||||
# TODO: add support for WVC1 and WMAP
|
||||
if fourcc not in ('H264', 'AVC1', 'AACL'):
|
||||
self.report_warning('%s is not a supported codec' % fourcc)
|
||||
continue
|
||||
tbr = int(track.attrib['Bitrate']) // 1000
|
||||
width = int_or_none(track.get('MaxWidth'))
|
||||
height = int_or_none(track.get('MaxHeight'))
|
||||
sampling_rate = int_or_none(track.get('SamplingRate'))
|
||||
|
||||
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
|
||||
track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
|
||||
|
||||
fragments = []
|
||||
fragment_ctx = {
|
||||
'time': 0,
|
||||
}
|
||||
stream_fragments = stream.findall('c')
|
||||
for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
|
||||
fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
|
||||
fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
|
||||
fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
|
||||
if not fragment_ctx['duration']:
|
||||
try:
|
||||
next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
|
||||
except IndexError:
|
||||
next_fragment_time = duration
|
||||
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
|
||||
for _ in range(fragment_repeat):
|
||||
fragments.append({
|
||||
'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
|
||||
'duration': fragment_ctx['duration'] / stream_timescale,
|
||||
})
|
||||
fragment_ctx['time'] += fragment_ctx['duration']
|
||||
|
||||
format_id = []
|
||||
if ism_id:
|
||||
format_id.append(ism_id)
|
||||
if stream_name:
|
||||
format_id.append(stream_name)
|
||||
format_id.append(compat_str(tbr))
|
||||
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': ism_url,
|
||||
'manifest_url': ism_url,
|
||||
'ext': 'ismv' if stream_type == 'video' else 'isma',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
'asr': sampling_rate,
|
||||
'vcodec': 'none' if stream_type == 'audio' else fourcc,
|
||||
'acodec': 'none' if stream_type == 'video' else fourcc,
|
||||
'protocol': 'ism',
|
||||
'fragments': fragments,
|
||||
'_download_params': {
|
||||
'duration': duration,
|
||||
'timescale': stream_timescale,
|
||||
'width': width or 0,
|
||||
'height': height or 0,
|
||||
'fourcc': fourcc,
|
||||
'codec_private_data': track.get('CodecPrivateData'),
|
||||
'sampling_rate': sampling_rate,
|
||||
'channels': int_or_none(track.get('Channels', 2)),
|
||||
'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
|
||||
'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
|
||||
},
|
||||
})
|
||||
return formats
|
||||
|
||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
|
||||
def absolute_url(video_url):
|
||||
return compat_urlparse.urljoin(base_url, video_url)
|
||||
|
||||
@ -1787,11 +1905,16 @@ class InfoExtractor(object):
|
||||
|
||||
def _media_formats(src, cur_media_type):
|
||||
full_url = absolute_url(src)
|
||||
if determine_ext(full_url) == 'm3u8':
|
||||
ext = determine_ext(full_url)
|
||||
if ext == 'm3u8':
|
||||
is_plain_url = False
|
||||
formats = self._extract_m3u8_formats(
|
||||
full_url, video_id, ext='mp4',
|
||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
|
||||
elif ext == 'mpd':
|
||||
is_plain_url = False
|
||||
formats = self._extract_mpd_formats(
|
||||
full_url, video_id, mpd_id=mpd_id)
|
||||
else:
|
||||
is_plain_url = True
|
||||
formats = [{
|
||||
@ -1801,7 +1924,11 @@ class InfoExtractor(object):
|
||||
return is_plain_url, formats
|
||||
|
||||
entries = []
|
||||
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
@ -1870,11 +1997,11 @@ class InfoExtractor(object):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
http_base_url + '/manifest.f4m',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
if 'dash' not in skip_protocols:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
http_base_url + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
if re.search(r'(?:/smil:|\.smil)', url_base):
|
||||
if 'dash' not in skip_protocols:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
http_base_url + '/manifest.mpd',
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
if 'smil' not in skip_protocols:
|
||||
rtmp_formats = self._extract_smil_formats(
|
||||
http_base_url + '/jwplayer.smil',
|
||||
@ -2020,6 +2147,12 @@ class InfoExtractor(object):
|
||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||
return headers
|
||||
|
||||
def _generic_id(self, url):
|
||||
return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
|
||||
def _generic_title(self, url):
|
||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@ -1,13 +1,9 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class RtmpIE(InfoExtractor):
|
||||
@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
|
||||
title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||
video_id = self._generic_id(url)
|
||||
title = self._generic_title(url)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor):
|
||||
'format_id': compat_urlparse.urlparse(url).scheme,
|
||||
}],
|
||||
}
|
||||
|
||||
|
||||
class MmsIE(InfoExtractor):
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'(?i)mms://.+'
|
||||
|
||||
_TEST = {
|
||||
# Direct MMS link
|
||||
'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
|
||||
'info_dict': {
|
||||
'id': 'MilesReid(0709)',
|
||||
'ext': 'wmv',
|
||||
'title': 'MilesReid(0709)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # rtsp downloads, requiring mplayer or mpv
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._generic_id(url)
|
||||
title = self._generic_title(url)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@ -150,6 +150,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
|
||||
'info_dict': {
|
||||
@ -235,7 +236,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
|
||||
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
|
||||
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
|
||||
output += """ScaledBorderAndShadow: yes
|
||||
output += """ScaledBorderAndShadow: no
|
||||
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
|
@ -8,7 +8,7 @@ from ..utils import orderedSet
|
||||
|
||||
|
||||
class CTVNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
||||
@ -40,6 +40,9 @@ class CTVNewsIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
|
||||
'uploader': 'HotWaves1012',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# geo-restricted, player v5
|
||||
{
|
||||
@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
player_v5 = self._search_regex(
|
||||
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
|
||||
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
|
||||
r'buildPlayer\(({.+?})\);'],
|
||||
r'buildPlayer\(({.+?})\);',
|
||||
r'var\s+config\s*=\s*({.+?});'],
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
class DotsubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09',
|
||||
'md5': '21c7ff600f545358134fea762a6d42b6',
|
||||
'info_dict': {
|
||||
@ -24,7 +24,24 @@ class DotsubIE(InfoExtractor):
|
||||
'upload_date': '20131130',
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dotsub.com/view/747bcf58-bd59-45b7-8c8c-ac312d084ee6',
|
||||
'md5': '2bb4a83896434d5c26be868c609429a3',
|
||||
'info_dict': {
|
||||
'id': '168006778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Apartments and flats in Raipur the white symphony',
|
||||
'description': 'md5:784d0639e6b7d1bc29530878508e38fe',
|
||||
'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
|
||||
'duration': 290,
|
||||
'timestamp': 1476767794.2809999,
|
||||
'upload_date': '20160525',
|
||||
'uploader': 'parthivi001',
|
||||
'uploader_id': 'user52596202',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -37,12 +54,23 @@ class DotsubIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
[r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
|
||||
webpage, 'video url')
|
||||
webpage, 'video url', default=None)
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
if not video_url:
|
||||
setup_data = self._parse_json(self._html_search_regex(
|
||||
r'(?s)data-setup=([\'"])(?P<content>(?!\1).+?)\1',
|
||||
webpage, 'setup data', group='content'), video_id)
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': setup_data['src'],
|
||||
}
|
||||
|
||||
info_dict.update({
|
||||
'title': info['title'],
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info.get('screenshotURI'),
|
||||
@ -50,4 +78,6 @@ class DotsubIE(InfoExtractor):
|
||||
'uploader': info.get('user'),
|
||||
'timestamp': float_or_none(info.get('dateCreated'), 1000),
|
||||
'view_count': int_or_none(info.get('numberOfViews')),
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@ -8,6 +8,7 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
USER_AGENTS,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
@ -102,10 +103,16 @@ class DPlayIE(InfoExtractor):
|
||||
manifest_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
|
||||
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
||||
# ourselves
|
||||
# ourselves. Also fragments' URLs are only served signed for
|
||||
# Safari user agent.
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format['url'] = update_url_query(m3u8_format['url'], query)
|
||||
m3u8_format.update({
|
||||
'url': update_url_query(m3u8_format['url'], query),
|
||||
'http_headers': {
|
||||
'User-Agent': USER_AGENTS['Safari'],
|
||||
},
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
elif protocol == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
@ -10,8 +10,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class DrTuberIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?drtuber\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
|
||||
'md5': '93e680cf2536ad0dfb7e74d94a89facd',
|
||||
'info_dict': {
|
||||
@ -25,20 +25,30 @@ class DrTuberIE(InfoExtractor):
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.drtuber.com/embed/489939',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.drtuber.com/video/%s' % video_id, display_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'class="title_watch"[^>]*><p>([^<]+)<',
|
||||
(r'class="title_watch"[^>]*><(?:p|h\d+)[^>]*>([^<]+)<',
|
||||
r'<p[^>]+class="title_substrate">([^<]+)</p>',
|
||||
r'<title>([^<]+) - \d+'),
|
||||
webpage, 'title')
|
||||
|
@ -4,11 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@ -77,7 +79,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
if status != 200:
|
||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', *args, **kwargs):
|
||||
try:
|
||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||
except ExtractorError as ee:
|
||||
@ -116,29 +118,38 @@ class EaglePlatformIE(InfoExtractor):
|
||||
|
||||
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id,
|
||||
'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
mp4_url = self._get_video_url(
|
||||
m3u8_formats_dict = {}
|
||||
for f in m3u8_formats:
|
||||
if f.get('height') is not None:
|
||||
m3u8_formats_dict[f['height']] = f
|
||||
|
||||
mp4_data = self._download_json(
|
||||
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
|
||||
# http://lentaru.media.eagleplatform.com/player/player.js
|
||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4', secure_m3u8),
|
||||
video_id, 'Downloading mp4 JSON')
|
||||
mp4_url_basename = url_basename(mp4_url)
|
||||
for m3u8_format in m3u8_formats:
|
||||
mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url'])
|
||||
if mobj:
|
||||
http_format = m3u8_format.copy()
|
||||
video_url = mp4_url.replace(mp4_url_basename, mobj.group(1))
|
||||
if not self._is_valid_url(video_url, video_id):
|
||||
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
|
||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||
if mp4_data:
|
||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
continue
|
||||
http_format.update({
|
||||
'url': video_url,
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(http_format)
|
||||
height = int_or_none(format_id)
|
||||
if height is not None and m3u8_formats_dict.get(height):
|
||||
f = m3u8_formats_dict[height].copy()
|
||||
f.update({
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
else:
|
||||
f = {
|
||||
'format_id': 'http-%s' % format_id,
|
||||
'height': int_or_none(format_id),
|
||||
}
|
||||
f['url'] = format_url
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -1,38 +1,117 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import remove_end
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
'info_dict': {
|
||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
||||
'id': '10365079',
|
||||
'ext': 'mp4',
|
||||
'title': '30 for 30 Shorts: Judging Jewell',
|
||||
'description': None,
|
||||
'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
|
||||
'timestamp': 1390936111,
|
||||
'upload_date': '20140128',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
||||
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
|
||||
'info_dict': {
|
||||
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
||||
'id': '2743663',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must-See Moments: Best of the MLS season',
|
||||
'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
|
||||
'timestamp': 1449446454,
|
||||
'upload_date': '20151207',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip/_/id/17989860',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
clip = self._download_json(
|
||||
'http://api-app.espn.com/v1/video/clips/%s' % video_id,
|
||||
video_id)['videos'][0]
|
||||
|
||||
title = clip['headline']
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
|
||||
def traverse_source(source, base_source_id=None):
|
||||
for source_id, source in source.items():
|
||||
if isinstance(source, compat_str):
|
||||
extract_source(source, base_source_id)
|
||||
elif isinstance(source, dict):
|
||||
traverse_source(
|
||||
source,
|
||||
'%s-%s' % (base_source_id, source_id)
|
||||
if base_source_id else source_id)
|
||||
|
||||
def extract_source(source_url, source_id=None):
|
||||
if source_url in format_urls:
|
||||
return
|
||||
format_urls.add(source_url)
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
source_url, video_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
source_url, video_id, f4m_id=source_id, fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=source_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': source_id,
|
||||
})
|
||||
|
||||
traverse_source(clip['links']['source'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = clip.get('caption') or clip.get('description')
|
||||
thumbnail = clip.get('thumbnail')
|
||||
duration = int_or_none(clip.get('duration'))
|
||||
timestamp = unified_timestamp(clip.get('originalPublishDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ESPNArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/video/clip?id=10365079',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):
|
||||
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
|
||||
cms = 'espn'
|
||||
if 'data-source="intl"' in webpage:
|
||||
cms = 'intl'
|
||||
player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)
|
||||
player = self._download_webpage(
|
||||
player_url, video_id)
|
||||
|
||||
pcode = self._search_regex(
|
||||
r'["\']pcode=([^"\']+)["\']', player, 'pcode')
|
||||
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage),
|
||||
'- ESPN Video').strip()
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode),
|
||||
'ie_key': 'OoyalaExternal',
|
||||
'title': title,
|
||||
}
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
|
@ -66,6 +66,7 @@ from .arte import (
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
TheOperaPlatformIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
@ -93,7 +94,7 @@ from .bbc import (
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
from .beatportpro import BeatportProIE
|
||||
from .beatport import BeatportIE
|
||||
from .bet import BetIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
@ -149,6 +150,7 @@ from .cbsnews import (
|
||||
)
|
||||
from .cbssports import CBSSportsIE
|
||||
from .ccc import CCCIE
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
@ -179,6 +181,7 @@ from .cnn import (
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralFullEpisodesIE,
|
||||
ComedyCentralIE,
|
||||
ComedyCentralShortnameIE,
|
||||
ComedyCentralTVIE,
|
||||
@ -186,7 +189,10 @@ from .comedycentral import (
|
||||
)
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import RtmpIE
|
||||
from .commonprotocols import (
|
||||
MmsIE,
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
@ -263,7 +269,10 @@ from .engadget import EngadgetIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import EroProfileIE
|
||||
from .escapist import EscapistIE
|
||||
from .espn import ESPNIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
ESPNArticleIE,
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
@ -292,6 +301,7 @@ from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .fox9 import FOX9IE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
@ -310,7 +320,6 @@ from .francetv import (
|
||||
)
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freevideo import FreeVideoIE
|
||||
from .funimation import FunimationIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .fusion import FusionIE
|
||||
@ -345,7 +354,10 @@ from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .hbo import HBOIE
|
||||
from .hbo import (
|
||||
HBOIE,
|
||||
HBOEpisodeIE,
|
||||
)
|
||||
from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
@ -366,6 +378,7 @@ from .hrti import (
|
||||
HRTiIE,
|
||||
HRTiPlaylistIE,
|
||||
)
|
||||
from .huajiao import HuajiaoIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
@ -400,6 +413,10 @@ from .ivi import (
|
||||
from .ivideon import IvideonIE
|
||||
from .iwara import IwaraIE
|
||||
from .izlesene import IzleseneIE
|
||||
from .jamendo import (
|
||||
JamendoIE,
|
||||
JamendoAlbumIE,
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
@ -429,7 +446,10 @@ from .kuwo import (
|
||||
KuwoMvIE,
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
@ -437,6 +457,7 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lego import LEGOIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
@ -480,6 +501,8 @@ from .mangomolo import (
|
||||
)
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .meipai import MeipaiIE
|
||||
from .melonvod import MelonVODIE
|
||||
from .meta import METAIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
@ -583,6 +606,7 @@ from .nhl import (
|
||||
from .nick import (
|
||||
NickIE,
|
||||
NickDeIE,
|
||||
NickNightIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import (
|
||||
@ -592,6 +616,7 @@ from .ninecninemedia import (
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noco import NocoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
@ -629,6 +654,8 @@ from .nrk import (
|
||||
NRKPlaylistIE,
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
from .ntvru import NTVRuIE
|
||||
@ -637,9 +664,11 @@ from .nytimes import (
|
||||
NYTimesArticleIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzz import NZZIE
|
||||
from .odatv import OdaTVIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .ondemandkorea import OnDemandKoreaIE
|
||||
from .onet import (
|
||||
OnetIE,
|
||||
OnetChannelIE,
|
||||
@ -657,6 +686,7 @@ from .orf import (
|
||||
ORFFM4IE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .pandatv import PandaTVIE
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
@ -669,6 +699,7 @@ from .periscope import (
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .piksel import PikselIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pladform import PladformIE
|
||||
from .playfm import PlayFMIE
|
||||
@ -730,6 +761,10 @@ from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .rentv import (
|
||||
RENTVIE,
|
||||
RENTVArticleIE,
|
||||
)
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
@ -779,15 +814,18 @@ from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .shahid import ShahidIE
|
||||
from .shared import SharedIE
|
||||
from .shared import (
|
||||
SharedIE,
|
||||
VivoIE,
|
||||
)
|
||||
from .sharesix import ShareSixIE
|
||||
from .showroomlive import ShowRoomLiveIE
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skynewsarabia import (
|
||||
@ -869,6 +907,7 @@ from .teachertube import (
|
||||
)
|
||||
from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .teamfourstar import TeamFourStarIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tele13 import Tele13IE
|
||||
@ -890,8 +929,10 @@ from .theplatform import (
|
||||
from .thescene import TheSceneIE
|
||||
from .thesixtyone import TheSixtyOneIE
|
||||
from .thestar import TheStarIE
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcDeIE
|
||||
@ -906,6 +947,7 @@ from .tnaflix import (
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@ -934,6 +976,10 @@ from .tv2 import (
|
||||
)
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesIE,
|
||||
TVANouvellesArticleIE,
|
||||
)
|
||||
from .tvc import (
|
||||
TVCIE,
|
||||
TVCArticleIE,
|
||||
@ -962,7 +1008,10 @@ from .twitch import (
|
||||
TwitchChapterIE,
|
||||
TwitchVodIE,
|
||||
TwitchProfileIE,
|
||||
TwitchAllVideosIE,
|
||||
TwitchUploadsIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchHighlightsIE,
|
||||
TwitchStreamIE,
|
||||
TwitchClipsIE,
|
||||
)
|
||||
@ -976,6 +1025,7 @@ from .udemy import (
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .unistra import UnistraIE
|
||||
from .uol import UOLIE
|
||||
@ -1015,6 +1065,7 @@ from .vice import (
|
||||
from .viceland import VicelandIE
|
||||
from .vidbit import VidbitIE
|
||||
from .viddler import ViddlerIE
|
||||
from .videa import VideaIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomega import VideoMegaIE
|
||||
@ -1059,6 +1110,11 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
ViuOTTIE,
|
||||
)
|
||||
from .vk import (
|
||||
VKIE,
|
||||
VKUserVideosIE,
|
||||
@ -1073,7 +1129,9 @@ from .vporn import VpornIE
|
||||
from .vrt import VRTIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
@ -1085,6 +1143,10 @@ from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
)
|
||||
from .webcaster import (
|
||||
WebcasterIE,
|
||||
WebcasterFeedIE,
|
||||
)
|
||||
from .webofstories import (
|
||||
WebOfStoriesIE,
|
||||
WebOfStoriesPlaylistIE,
|
||||
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@ -28,7 +27,7 @@ class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
https?://
|
||||
(?:[\w-]+\.)?facebook\.com/
|
||||
(?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
@ -100,7 +99,8 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '"What are you doing running in the snow?"',
|
||||
'uploader': 'FailArmy',
|
||||
}
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
|
||||
'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
|
||||
@ -110,6 +110,7 @@ class FacebookIE(InfoExtractor):
|
||||
'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
|
||||
'uploader': 'S. Saint',
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'note': 'swf params escaped',
|
||||
'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
|
||||
@ -119,6 +120,18 @@ class FacebookIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #10153664894881749',
|
||||
},
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
'title': 'Holocaust survivor becomes US citizen',
|
||||
'timestamp': 1477818095,
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
@ -137,6 +150,9 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -227,41 +243,13 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
video_data = None
|
||||
|
||||
BEFORE = '{swf.addParam(param[0], param[1]);});'
|
||||
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
|
||||
PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER)
|
||||
|
||||
for m in re.findall(PATTERN, webpage):
|
||||
swf_params = m.replace('\\\\', '\\').replace('\\"', '"')
|
||||
data = dict(json.loads(swf_params))
|
||||
params_raw = compat_urllib_parse_unquote(data['params'])
|
||||
video_data_candidate = json.loads(params_raw)['video_data']
|
||||
for _, f in video_data_candidate.items():
|
||||
if not f:
|
||||
continue
|
||||
if isinstance(f, dict):
|
||||
f = [f]
|
||||
if not isinstance(f, list):
|
||||
continue
|
||||
if f[0].get('video_id') == video_id:
|
||||
video_data = video_data_candidate
|
||||
break
|
||||
if video_data:
|
||||
break
|
||||
|
||||
def video_data_list2dict(video_data):
|
||||
ret = {}
|
||||
for item in video_data:
|
||||
format_id = item['stream_type']
|
||||
ret.setdefault(format_id, []).append(item)
|
||||
return ret
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_data = video_data_list2dict(item[2][0]['videoData'])
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
|
||||
for item in server_js_data.get('instances', []):
|
||||
if item[1][0] == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id') == video_id:
|
||||
video_data = video_item['videoData']
|
||||
break
|
||||
|
||||
if not video_data:
|
||||
@ -272,11 +260,14 @@ class FacebookIE(InfoExtractor):
|
||||
raise ExtractorError(
|
||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
expected=True)
|
||||
elif '>You must log in to continue' in webpage:
|
||||
self.raise_login_required()
|
||||
else:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
formats = []
|
||||
for format_id, f in video_data.items():
|
||||
for f in video_data:
|
||||
format_id = f['stream_type']
|
||||
if f and isinstance(f, dict):
|
||||
f = [f]
|
||||
if not f or not isinstance(f, list):
|
||||
|
@ -1,4 +1,4 @@
|
||||
# encoding: utf-8
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user