Compare commits
75 Commits
2017.08.18
...
2017.09.11
Author | SHA1 | Date | |
---|---|---|---|
|
7dacceae75 | ||
|
43df248f10 | ||
|
f12a6e88b2 | ||
|
806498cf2f | ||
|
b98339b54b | ||
|
bf6ec2fea9 | ||
|
c3dd44e085 | ||
|
c7e327c4d4 | ||
|
48b813748d | ||
|
debed8d759 | ||
|
51aee72d16 | ||
|
931edb2ada | ||
|
5113b69124 | ||
|
66c9fa36c1 | ||
|
c5c9bf0c12 | ||
|
880fa66f4f | ||
|
6348671c4a | ||
|
efc57145c1 | ||
|
e9b865267a | ||
|
bc35f07537 | ||
|
0b4a8eb3ac | ||
|
c1c1585b31 | ||
|
0cbb841ba9 | ||
|
d7c7100e3d | ||
|
73602bcd0c | ||
|
23b2df82c7 | ||
|
503115540d | ||
|
64f0e30b93 | ||
|
a3431e1224 | ||
|
a2022b0c40 | ||
|
8681ed7fc8 | ||
|
8d81f3e36d | ||
|
7998520933 | ||
|
5b4bfbfc3b | ||
|
53647dfd0a | ||
|
22f65a9efc | ||
|
c75c384fb6 | ||
|
1b41da488d | ||
|
fea82c1780 | ||
|
3902cdd0e3 | ||
|
2cfa7cbdd0 | ||
|
cc0412ef91 | ||
|
1c9c8de29e | ||
|
f031b76065 | ||
|
62c06c593d | ||
|
ff17be3ac9 | ||
|
1ed4549942 | ||
|
dd121cc1ca | ||
|
a3c3a1e128 | ||
|
085d9dd9be | ||
|
151978f38a | ||
|
c7121fa7b8 | ||
|
745968bc72 | ||
|
df235dbba8 | ||
|
c4bdc68113 | ||
|
5bae33485c | ||
|
0830f3e048 | ||
|
8d7a24aff6 | ||
|
37d9af306a | ||
|
e01c3d2ef7 | ||
|
05915e379a | ||
|
7b67b60773 | ||
|
8d9c2a681a | ||
|
903d4d1625 | ||
|
8239c6791a | ||
|
b359e977b9 | ||
|
305d99f0bd | ||
|
d3d45e0a45 | ||
|
381ad4f309 | ||
|
e2481b9b6e | ||
|
09747ba766 | ||
|
f8f18f332f | ||
|
95f3f7c20a | ||
|
f5469da9e6 | ||
|
d14d9d8903 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.11*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.18**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.11**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.08.18
|
[debug] youtube-dl version 2017.09.11
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
$ youtube-dl -v <your command line>
|
$ youtube-dl -v <your command line>
|
||||||
[debug] System config: []
|
[debug] System config: []
|
||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2015.12.06
|
[debug] youtube-dl version 2015.12.06
|
||||||
[debug] Git HEAD: 135392e
|
[debug] Git HEAD: 135392e
|
||||||
@@ -34,7 +34,7 @@ For bug reports, this means that your report should contain the *complete* outpu
|
|||||||
|
|
||||||
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
@@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
|
|||||||
|
|
||||||
# DEVELOPER INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
||||||
|
|
||||||
@@ -82,6 +82,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
|||||||
python test/test_download.py
|
python test/test_download.py
|
||||||
nosetests
|
nosetests
|
||||||
|
|
||||||
|
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||||
|
|
||||||
If you want to create a build of youtube-dl yourself, you'll need
|
If you want to create a build of youtube-dl yourself, you'll need
|
||||||
|
|
||||||
* python
|
* python
|
||||||
@@ -118,7 +120,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
class YourExtractorIE(InfoExtractor):
|
class YourExtractorIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://yourextractor.com/watch/42',
|
'url': 'https://yourextractor.com/watch/42',
|
||||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '42',
|
'id': '42',
|
||||||
@@ -149,10 +151,10 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
|
99
ChangeLog
99
ChangeLog
@@ -1,3 +1,100 @@
|
|||||||
|
version 2017.09.11
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [rutube:playlist] Fix suitable (#14166)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.10
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Introduce bool_or_none
|
||||||
|
* [YoutubeDL] Ensure dir existence for each requested format (#14116)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [fox] Fix extraction (#14147)
|
||||||
|
* [rutube] Use bool_or_none
|
||||||
|
* [rutube] Rework and generalize playlist extractors (#13565)
|
||||||
|
+ [rutube:playlist] Add support for playlists (#13534, #13565)
|
||||||
|
+ [radiocanada] Add fallback for title extraction (#14145)
|
||||||
|
* [vk] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [vice] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [cracked] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [chilloutzone] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [abcnews] Use dedicated YouTube embeds extraction routine
|
||||||
|
* [youtube] Separate methods for embeds extraction
|
||||||
|
* [redtube] Fix formats extraction (#14122)
|
||||||
|
* [arte] Relax unavailability check (#14112)
|
||||||
|
+ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059)
|
||||||
|
* [vidme:user] Relax URL regular expression (#14054)
|
||||||
|
* [bpb] Fix extraction (#14043, #14086)
|
||||||
|
* [soundcloud] Fix download URL with private tracks (#14093)
|
||||||
|
* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707)
|
||||||
|
* [viidea] Capture and output lecture error message (#14099)
|
||||||
|
* [radiocanada] Skip unsupported platforms (#14100)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.09.02
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
|
||||||
|
#14077, #14079, #14082, #14083, #14094, #14095, #14096)
|
||||||
|
* [youtube] Fix upload date extraction (#14065)
|
||||||
|
+ [charlierose] Add support for episodes (#14062)
|
||||||
|
+ [bbccouk] Add support for w-prefixed ids (#14056)
|
||||||
|
* [googledrive] Extend URL regular expression (#9785)
|
||||||
|
+ [googledrive] Add support for source format (#14046)
|
||||||
|
* [pornhd] Fix extraction (#14005)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.27.1
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
|
||||||
|
* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.27
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Extract height and format id for HTML5 videos (#14034)
|
||||||
|
* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023,
|
||||||
|
#8625, #9483)
|
||||||
|
* Simplify code and split into separate routines to facilitate maintaining
|
||||||
|
* Make retry mechanism work on errors during actual download not only
|
||||||
|
during connection establishment phase
|
||||||
|
* Retry on ECONNRESET and ETIMEDOUT during reading data from network
|
||||||
|
* Retry on content too short
|
||||||
|
* Show error description on retry
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [generic] Lower preference for extraction from LD-JSON
|
||||||
|
* [rai] Fix audio formats extraction (#14024)
|
||||||
|
* [youtube] Fix controversy videos extraction (#14027, #14029)
|
||||||
|
* [mixcloud] Fix extraction (#14015, #14020)
|
||||||
|
|
||||||
|
|
||||||
|
version 2017.08.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Introduce _parse_xml
|
||||||
|
* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries
|
||||||
|
non fatal (#13970)
|
||||||
|
* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [cbc:watch] Bypass geo restriction (#13993)
|
||||||
|
* [toutv] Relax DRM check (#13994)
|
||||||
|
+ [googledrive] Add support for subtitles (#13619, #13638)
|
||||||
|
* [pornhub] Relax uploader regular expression (#13906, #13975)
|
||||||
|
* [bandcamp:album] Extract track titles (#13962)
|
||||||
|
+ [bbccouk] Add support for events URLs (#13893)
|
||||||
|
+ [liveleak] Support multi-video pages (#6542)
|
||||||
|
+ [liveleak] Support another liveleak embedding pattern (#13336)
|
||||||
|
* [cda] Fix extraction (#13935)
|
||||||
|
+ [laola1tv] Add support for tv.ittf.com (#13965)
|
||||||
|
* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003)
|
||||||
|
|
||||||
|
|
||||||
version 2017.08.18
|
version 2017.08.18
|
||||||
|
|
||||||
Core
|
Core
|
||||||
@@ -118,7 +215,7 @@ Extractors
|
|||||||
* [youku:show] Fix playlist extraction (#13248)
|
* [youku:show] Fix playlist extraction (#13248)
|
||||||
+ [dispeak] Recognize sevt subdomain (#13276)
|
+ [dispeak] Recognize sevt subdomain (#13276)
|
||||||
* [adn] Improve error reporting (#13663)
|
* [adn] Improve error reporting (#13663)
|
||||||
* [crunchyroll] Relax series and season regex (#13659)
|
* [crunchyroll] Relax series and season regular expression (#13659)
|
||||||
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
|
+ [spiegel:article] Add support for nexx iframe embeds (#13029)
|
||||||
+ [nexx:embed] Add support for iframe embeds
|
+ [nexx:embed] Add support for iframe embeds
|
||||||
* [nexx] Improve JS embed extraction
|
* [nexx] Improve JS embed extraction
|
||||||
|
11
Makefile
11
Makefile
@@ -46,8 +46,15 @@ tar: youtube-dl.tar.gz
|
|||||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||||
|
|
||||||
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||||
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
|
mkdir -p zip
|
||||||
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
|
for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
|
||||||
|
mkdir -p zip/$$d ;\
|
||||||
|
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||||
|
done
|
||||||
|
touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
|
||||||
|
mv zip/youtube_dl/__main__.py zip/
|
||||||
|
cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
|
||||||
|
rm -rf zip
|
||||||
echo '#!$(PYTHON)' > youtube-dl
|
echo '#!$(PYTHON)' > youtube-dl
|
||||||
cat youtube-dl.zip >> youtube-dl
|
cat youtube-dl.zip >> youtube-dl
|
||||||
rm youtube-dl.zip
|
rm youtube-dl.zip
|
||||||
|
54
README.md
54
README.md
@@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
|
|||||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||||
|
|
||||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||||
|
|
||||||
You can also use pip:
|
You can also use pip:
|
||||||
|
|
||||||
@@ -33,7 +33,7 @@ You can also use pip:
|
|||||||
|
|
||||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||||
|
|
||||||
OS X users can install youtube-dl with [Homebrew](http://brew.sh/):
|
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||||
|
|
||||||
brew install youtube-dl
|
brew install youtube-dl
|
||||||
|
|
||||||
@@ -458,7 +458,7 @@ You can also use `--config-location` if you want to use custom configuration fil
|
|||||||
|
|
||||||
### Authentication with `.netrc` file
|
### Authentication with `.netrc` file
|
||||||
|
|
||||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
|
||||||
```
|
```
|
||||||
touch $HOME/.netrc
|
touch $HOME/.netrc
|
||||||
chmod a-rwx,u+rw $HOME/.netrc
|
chmod a-rwx,u+rw $HOME/.netrc
|
||||||
@@ -485,7 +485,7 @@ The `-o` option allows users to indicate a template for the output file names.
|
|||||||
|
|
||||||
**tl;dr:** [navigate me to examples](#output-template-examples).
|
**tl;dr:** [navigate me to examples](#output-template-examples).
|
||||||
|
|
||||||
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
|
||||||
|
|
||||||
- `id` (string): Video identifier
|
- `id` (string): Video identifier
|
||||||
- `title` (string): Video title
|
- `title` (string): Video title
|
||||||
@@ -603,7 +603,7 @@ $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)
|
|||||||
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
|
||||||
|
|
||||||
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
|
||||||
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" http://videomore.ru/kino_v_detalayah/5_sezon/367617
|
$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
|
||||||
|
|
||||||
# Stream the video being downloaded to stdout
|
# Stream the video being downloaded to stdout
|
||||||
$ youtube-dl -o - BaW_jenozKc
|
$ youtube-dl -o - BaW_jenozKc
|
||||||
@@ -716,17 +716,17 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
|
|||||||
|
|
||||||
### How do I update youtube-dl?
|
### How do I update youtube-dl?
|
||||||
|
|
||||||
If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
|
If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
|
||||||
|
|
||||||
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
|
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
|
||||||
|
|
||||||
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
|
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
|
||||||
|
|
||||||
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
|
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
|
||||||
|
|
||||||
sudo apt-get remove -y youtube-dl
|
sudo apt-get remove -y youtube-dl
|
||||||
|
|
||||||
Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
|
Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
|
||||||
|
|
||||||
```
|
```
|
||||||
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||||
@@ -766,11 +766,11 @@ Apparently YouTube requires you to pass a CAPTCHA test if you download too much.
|
|||||||
|
|
||||||
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
|
youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
|
||||||
|
|
||||||
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](http://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
|
Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
|
||||||
|
|
||||||
### I have downloaded a video but how can I play it?
|
### I have downloaded a video but how can I play it?
|
||||||
|
|
||||||
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/).
|
Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
|
||||||
|
|
||||||
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
|
### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
|
||||||
|
|
||||||
@@ -845,10 +845,10 @@ Use the `-o` to specify an [output template](#output-template), for example `-o
|
|||||||
|
|
||||||
### How do I download a video starting with a `-`?
|
### How do I download a video starting with a `-`?
|
||||||
|
|
||||||
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
|
Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
|
||||||
|
|
||||||
youtube-dl -- -wNyEUrxzFU
|
youtube-dl -- -wNyEUrxzFU
|
||||||
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||||
|
|
||||||
### How do I pass cookies to youtube-dl?
|
### How do I pass cookies to youtube-dl?
|
||||||
|
|
||||||
@@ -862,9 +862,9 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula
|
|||||||
|
|
||||||
### How do I stream directly to media player?
|
### How do I stream directly to media player?
|
||||||
|
|
||||||
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with:
|
You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
|
||||||
|
|
||||||
youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
|
||||||
|
|
||||||
### How do I download only new videos from a playlist?
|
### How do I download only new videos from a playlist?
|
||||||
|
|
||||||
@@ -884,7 +884,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
|
|||||||
|
|
||||||
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
|
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
|
||||||
|
|
||||||
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](http://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
|
In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
|
||||||
|
|
||||||
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
|
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
|
||||||
|
|
||||||
@@ -910,7 +910,7 @@ Feel free to bump the issue from time to time by writing a small comment ("Issue
|
|||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
@@ -924,7 +924,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
|
|||||||
|
|
||||||
# DEVELOPER INSTRUCTIONS
|
# DEVELOPER INSTRUCTIONS
|
||||||
|
|
||||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||||
|
|
||||||
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
||||||
|
|
||||||
@@ -936,6 +936,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
|||||||
python test/test_download.py
|
python test/test_download.py
|
||||||
nosetests
|
nosetests
|
||||||
|
|
||||||
|
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||||
|
|
||||||
If you want to create a build of youtube-dl yourself, you'll need
|
If you want to create a build of youtube-dl yourself, you'll need
|
||||||
|
|
||||||
* python
|
* python
|
||||||
@@ -972,7 +974,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
class YourExtractorIE(InfoExtractor):
|
class YourExtractorIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://yourextractor.com/watch/42',
|
'url': 'https://yourextractor.com/watch/42',
|
||||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '42',
|
'id': '42',
|
||||||
@@ -1003,10 +1005,10 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
|
||||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/extractors.py
|
$ git add youtube_dl/extractor/extractors.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
@@ -1162,7 +1164,7 @@ import youtube_dl
|
|||||||
|
|
||||||
ydl_opts = {}
|
ydl_opts = {}
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||||
```
|
```
|
||||||
|
|
||||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L129-L279). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
|
||||||
@@ -1201,19 +1203,19 @@ ydl_opts = {
|
|||||||
'progress_hooks': [my_hook],
|
'progress_hooks': [my_hook],
|
||||||
}
|
}
|
||||||
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc'])
|
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
|
||||||
```
|
```
|
||||||
|
|
||||||
# BUGS
|
# BUGS
|
||||||
|
|
||||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](http://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
|
||||||
|
|
||||||
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
|
||||||
```
|
```
|
||||||
$ youtube-dl -v <your command line>
|
$ youtube-dl -v <your command line>
|
||||||
[debug] System config: []
|
[debug] System config: []
|
||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2015.12.06
|
[debug] youtube-dl version 2015.12.06
|
||||||
[debug] Git HEAD: 135392e
|
[debug] Git HEAD: 135392e
|
||||||
@@ -1244,7 +1246,7 @@ For bug reports, this means that your report should contain the *complete* outpu
|
|||||||
|
|
||||||
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
|
||||||
|
|
||||||
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `http://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `http://www.youtube.com/`) is *not* an example URL.
|
**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
|
||||||
|
|
||||||
### Are you using the latest version?
|
### Are you using the latest version?
|
||||||
|
|
||||||
|
@@ -38,6 +38,7 @@
|
|||||||
- **afreecatv**: afreecatv.com
|
- **afreecatv**: afreecatv.com
|
||||||
- **afreecatv:global**: afreecatv.com
|
- **afreecatv:global**: afreecatv.com
|
||||||
- **AirMozilla**
|
- **AirMozilla**
|
||||||
|
- **AliExpressLive**
|
||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
@@ -363,6 +364,7 @@
|
|||||||
- **IPrima**
|
- **IPrima**
|
||||||
- **iqiyi**: 爱奇艺
|
- **iqiyi**: 爱奇艺
|
||||||
- **Ir90Tv**
|
- **Ir90Tv**
|
||||||
|
- **ITTF**
|
||||||
- **ITV**
|
- **ITV**
|
||||||
- **ivi**: ivi.ru
|
- **ivi**: ivi.ru
|
||||||
- **ivi:compilation**: ivi.ru compilations
|
- **ivi:compilation**: ivi.ru compilations
|
||||||
@@ -419,6 +421,7 @@
|
|||||||
- **limelight:channel_list**
|
- **limelight:channel_list**
|
||||||
- **LiTV**
|
- **LiTV**
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
|
- **LiveLeakEmbed**
|
||||||
- **livestream**
|
- **livestream**
|
||||||
- **livestream:original**
|
- **livestream:original**
|
||||||
- **LnkGo**
|
- **LnkGo**
|
||||||
@@ -435,6 +438,7 @@
|
|||||||
- **MakerTV**
|
- **MakerTV**
|
||||||
- **mangomolo:live**
|
- **mangomolo:live**
|
||||||
- **mangomolo:video**
|
- **mangomolo:video**
|
||||||
|
- **ManyVids**
|
||||||
- **MatchTV**
|
- **MatchTV**
|
||||||
- **MDR**: MDR.DE and KiKA
|
- **MDR**: MDR.DE and KiKA
|
||||||
- **media.ccc.de**
|
- **media.ccc.de**
|
||||||
@@ -699,6 +703,7 @@
|
|||||||
- **rutube:embed**: Rutube embedded videos
|
- **rutube:embed**: Rutube embedded videos
|
||||||
- **rutube:movie**: Rutube movies
|
- **rutube:movie**: Rutube movies
|
||||||
- **rutube:person**: Rutube person videos
|
- **rutube:person**: Rutube person videos
|
||||||
|
- **rutube:playlist**: Rutube playlists
|
||||||
- **RUTV**: RUTV.RU
|
- **RUTV**: RUTV.RU
|
||||||
- **Ruutu**
|
- **Ruutu**
|
||||||
- **Ruv**
|
- **Ruv**
|
||||||
|
@@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unescapeHTML('/'), '/')
|
self.assertEqual(unescapeHTML('/'), '/')
|
||||||
self.assertEqual(unescapeHTML('é'), 'é')
|
self.assertEqual(unescapeHTML('é'), 'é')
|
||||||
self.assertEqual(unescapeHTML('�'), '�')
|
self.assertEqual(unescapeHTML('�'), '�')
|
||||||
|
self.assertEqual(unescapeHTML('&a"'), '&a"')
|
||||||
# HTML5 entities
|
# HTML5 entities
|
||||||
self.assertEqual(unescapeHTML('.''), '.\'')
|
self.assertEqual(unescapeHTML('.''), '.\'')
|
||||||
|
|
||||||
|
@@ -1710,12 +1710,17 @@ class YoutubeDL(object):
|
|||||||
if filename is None:
|
if filename is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
def ensure_dir_exists(path):
|
||||||
dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
|
try:
|
||||||
if dn and not os.path.exists(dn):
|
dn = os.path.dirname(path)
|
||||||
os.makedirs(dn)
|
if dn and not os.path.exists(dn):
|
||||||
except (OSError, IOError) as err:
|
os.makedirs(dn)
|
||||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
return True
|
||||||
|
except (OSError, IOError) as err:
|
||||||
|
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writedescription', False):
|
if self.params.get('writedescription', False):
|
||||||
@@ -1853,8 +1858,11 @@ class YoutubeDL(object):
|
|||||||
for f in requested_formats:
|
for f in requested_formats:
|
||||||
new_info = dict(info_dict)
|
new_info = dict(info_dict)
|
||||||
new_info.update(f)
|
new_info.update(f)
|
||||||
fname = self.prepare_filename(new_info)
|
fname = prepend_extension(
|
||||||
fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
|
self.prepare_filename(new_info),
|
||||||
|
'f%s' % f['format_id'], new_info['ext'])
|
||||||
|
if not ensure_dir_exists(fname):
|
||||||
|
return
|
||||||
downloaded.append(fname)
|
downloaded.append(fname)
|
||||||
partial_success = dl(fname, new_info)
|
partial_success = dl(fname, new_info)
|
||||||
success = success and partial_success
|
success = success and partial_success
|
||||||
|
@@ -304,11 +304,11 @@ class FileDownloader(object):
|
|||||||
"""Report attempt to resume at given byte."""
|
"""Report attempt to resume at given byte."""
|
||||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||||
|
|
||||||
def report_retry(self, count, retries):
|
def report_retry(self, err, count, retries):
|
||||||
"""Report retry in case of HTTP error 5xx"""
|
"""Report retry in case of HTTP error 5xx"""
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
|
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
|
||||||
% (count, self.format_retries(retries)))
|
% (error_to_compat_str(err), count, self.format_retries(retries)))
|
||||||
|
|
||||||
def report_file_already_downloaded(self, file_name):
|
def report_file_already_downloaded(self, file_name):
|
||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
|
@@ -22,8 +22,16 @@ from ..utils import (
|
|||||||
class HttpFD(FileDownloader):
|
class HttpFD(FileDownloader):
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
tmpfilename = self.temp_name(filename)
|
|
||||||
stream = None
|
class DownloadContext(dict):
|
||||||
|
__getattr__ = dict.get
|
||||||
|
__setattr__ = dict.__setitem__
|
||||||
|
__delattr__ = dict.__delitem__
|
||||||
|
|
||||||
|
ctx = DownloadContext()
|
||||||
|
ctx.filename = filename
|
||||||
|
ctx.tmpfilename = self.temp_name(filename)
|
||||||
|
ctx.stream = None
|
||||||
|
|
||||||
# Do not include the Accept-Encoding header
|
# Do not include the Accept-Encoding header
|
||||||
headers = {'Youtubedl-no-compression': 'True'}
|
headers = {'Youtubedl-no-compression': 'True'}
|
||||||
@@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
|
|||||||
if is_test:
|
if is_test:
|
||||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||||
|
|
||||||
# Establish possible resume length
|
ctx.open_mode = 'wb'
|
||||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
ctx.resume_len = 0
|
||||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
|
||||||
else:
|
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
open_mode = 'wb'
|
if self.params.get('continuedl', True):
|
||||||
if resume_len != 0:
|
# Establish possible resume length
|
||||||
if self.params.get('continuedl', True):
|
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||||
self.report_resuming_byte(resume_len)
|
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
|
||||||
open_mode = 'ab'
|
|
||||||
else:
|
|
||||||
resume_len = 0
|
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
retries = self.params.get('retries', 0)
|
retries = self.params.get('retries', 0)
|
||||||
while count <= retries:
|
|
||||||
|
class SucceedDownload(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class RetryDownload(Exception):
|
||||||
|
def __init__(self, source_error):
|
||||||
|
self.source_error = source_error
|
||||||
|
|
||||||
|
def establish_connection():
|
||||||
|
if ctx.resume_len != 0:
|
||||||
|
self.report_resuming_byte(ctx.resume_len)
|
||||||
|
request.add_header('Range', 'bytes=%d-' % ctx.resume_len)
|
||||||
|
ctx.open_mode = 'ab'
|
||||||
# Establish connection
|
# Establish connection
|
||||||
try:
|
try:
|
||||||
data = self.ydl.urlopen(request)
|
ctx.data = self.ydl.urlopen(request)
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||||
# that don't support resuming and serve a whole file with no Content-Range
|
# that don't support resuming and serve a whole file with no Content-Range
|
||||||
# set in response despite of requested Range (see
|
# set in response despite of requested Range (see
|
||||||
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
|
||||||
if resume_len > 0:
|
if ctx.resume_len > 0:
|
||||||
content_range = data.headers.get('Content-Range')
|
content_range = ctx.data.headers.get('Content-Range')
|
||||||
if content_range:
|
if content_range:
|
||||||
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
content_range_m = re.search(r'bytes (\d+)-', content_range)
|
||||||
# Content-Range is present and matches requested Range, resume is possible
|
# Content-Range is present and matches requested Range, resume is possible
|
||||||
if content_range_m and resume_len == int(content_range_m.group(1)):
|
if content_range_m and ctx.resume_len == int(content_range_m.group(1)):
|
||||||
break
|
return
|
||||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||||
# and performing entire redownload
|
# and performing entire redownload
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
resume_len = 0
|
ctx.resume_len = 0
|
||||||
open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
break
|
return
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||||
# Unexpected HTTP error
|
# Unexpected HTTP error
|
||||||
@@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
|
|||||||
# Unable to resume (requested range not satisfiable)
|
# Unable to resume (requested range not satisfiable)
|
||||||
try:
|
try:
|
||||||
# Open the connection again without the range header
|
# Open the connection again without the range header
|
||||||
data = self.ydl.urlopen(basic_request)
|
ctx.data = self.ydl.urlopen(basic_request)
|
||||||
content_length = data.info()['Content-Length']
|
content_length = ctx.data.info()['Content-Length']
|
||||||
except (compat_urllib_error.HTTPError, ) as err:
|
except (compat_urllib_error.HTTPError, ) as err:
|
||||||
if err.code < 500 or err.code >= 600:
|
if err.code < 500 or err.code >= 600:
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
# Examine the reported length
|
# Examine the reported length
|
||||||
if (content_length is not None and
|
if (content_length is not None and
|
||||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
(ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
|
||||||
# The file had already been fully downloaded.
|
# The file had already been fully downloaded.
|
||||||
# Explanation to the above condition: in issue #175 it was revealed that
|
# Explanation to the above condition: in issue #175 it was revealed that
|
||||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||||
@@ -102,152 +115,184 @@ class HttpFD(FileDownloader):
|
|||||||
# I decided to implement a suggested change and consider the file
|
# I decided to implement a suggested change and consider the file
|
||||||
# completely downloaded if the file size differs less than 100 bytes from
|
# completely downloaded if the file size differs less than 100 bytes from
|
||||||
# the one in the hard drive.
|
# the one in the hard drive.
|
||||||
self.report_file_already_downloaded(filename)
|
self.report_file_already_downloaded(ctx.filename)
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'filename': filename,
|
'filename': ctx.filename,
|
||||||
'status': 'finished',
|
'status': 'finished',
|
||||||
'downloaded_bytes': resume_len,
|
'downloaded_bytes': ctx.resume_len,
|
||||||
'total_bytes': resume_len,
|
'total_bytes': ctx.resume_len,
|
||||||
})
|
})
|
||||||
return True
|
raise SucceedDownload()
|
||||||
else:
|
else:
|
||||||
# The length does not match, we start the download over
|
# The length does not match, we start the download over
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
resume_len = 0
|
ctx.resume_len = 0
|
||||||
open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
break
|
return
|
||||||
except socket.error as e:
|
raise RetryDownload(err)
|
||||||
if e.errno != errno.ECONNRESET:
|
except socket.error as err:
|
||||||
|
if err.errno != errno.ECONNRESET:
|
||||||
# Connection reset is no problem, just retry
|
# Connection reset is no problem, just retry
|
||||||
raise
|
raise
|
||||||
|
raise RetryDownload(err)
|
||||||
|
|
||||||
# Retry
|
def download():
|
||||||
count += 1
|
data_len = ctx.data.info().get('Content-length', None)
|
||||||
if count <= retries:
|
|
||||||
self.report_retry(count, retries)
|
|
||||||
|
|
||||||
if count > retries:
|
# Range HTTP header may be ignored/unsupported by a webserver
|
||||||
self.report_error('giving up after %s retries' % retries)
|
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||||
return False
|
# However, for a test we still would like to download just a piece of a file.
|
||||||
|
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||||
|
# block size when downloading a file.
|
||||||
|
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||||
|
data_len = self._TEST_FILE_SIZE
|
||||||
|
|
||||||
data_len = data.info().get('Content-length', None)
|
if data_len is not None:
|
||||||
|
data_len = int(data_len) + ctx.resume_len
|
||||||
# Range HTTP header may be ignored/unsupported by a webserver
|
min_data_len = self.params.get('min_filesize')
|
||||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
max_data_len = self.params.get('max_filesize')
|
||||||
# However, for a test we still would like to download just a piece of a file.
|
if min_data_len is not None and data_len < min_data_len:
|
||||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||||
# block size when downloading a file.
|
return False
|
||||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
if max_data_len is not None and data_len > max_data_len:
|
||||||
data_len = self._TEST_FILE_SIZE
|
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||||
|
|
||||||
if data_len is not None:
|
|
||||||
data_len = int(data_len) + resume_len
|
|
||||||
min_data_len = self.params.get('min_filesize')
|
|
||||||
max_data_len = self.params.get('max_filesize')
|
|
||||||
if min_data_len is not None and data_len < min_data_len:
|
|
||||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
|
||||||
return False
|
|
||||||
if max_data_len is not None and data_len > max_data_len:
|
|
||||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
|
||||||
return False
|
|
||||||
|
|
||||||
byte_counter = 0 + resume_len
|
|
||||||
block_size = self.params.get('buffersize', 1024)
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
|
||||||
now = None # needed for slow_down() in the first loop run
|
|
||||||
before = start # start measuring
|
|
||||||
while True:
|
|
||||||
|
|
||||||
# Download and write
|
|
||||||
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
|
||||||
byte_counter += len(data_block)
|
|
||||||
|
|
||||||
# exit loop when download is finished
|
|
||||||
if len(data_block) == 0:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Open destination file just in time
|
|
||||||
if stream is None:
|
|
||||||
try:
|
|
||||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
|
||||||
assert stream is not None
|
|
||||||
filename = self.undo_temp_name(tmpfilename)
|
|
||||||
self.report_destination(filename)
|
|
||||||
except (OSError, IOError) as err:
|
|
||||||
self.report_error('unable to open for writing: %s' % str(err))
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
byte_counter = 0 + ctx.resume_len
|
||||||
|
block_size = self.params.get('buffersize', 1024)
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
|
||||||
|
now = None # needed for slow_down() in the first loop run
|
||||||
|
before = start # start measuring
|
||||||
|
|
||||||
|
def retry(e):
|
||||||
|
if ctx.tmpfilename != '-':
|
||||||
|
ctx.stream.close()
|
||||||
|
ctx.stream = None
|
||||||
|
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||||
|
raise RetryDownload(e)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# Download and write
|
||||||
|
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||||
|
# socket.timeout is a subclass of socket.error but may not have
|
||||||
|
# errno set
|
||||||
|
except socket.timeout as e:
|
||||||
|
retry(e)
|
||||||
|
except socket.error as e:
|
||||||
|
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||||
|
raise
|
||||||
|
retry(e)
|
||||||
|
|
||||||
|
byte_counter += len(data_block)
|
||||||
|
|
||||||
|
# exit loop when download is finished
|
||||||
|
if len(data_block) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Open destination file just in time
|
||||||
|
if ctx.stream is None:
|
||||||
try:
|
try:
|
||||||
write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
ctx.stream, ctx.tmpfilename = sanitize_open(
|
||||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
ctx.tmpfilename, ctx.open_mode)
|
||||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
assert ctx.stream is not None
|
||||||
|
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||||
|
self.report_destination(ctx.filename)
|
||||||
|
except (OSError, IOError) as err:
|
||||||
|
self.report_error('unable to open for writing: %s' % str(err))
|
||||||
|
return False
|
||||||
|
|
||||||
try:
|
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||||
stream.write(data_block)
|
try:
|
||||||
except (IOError, OSError) as err:
|
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
|
||||||
|
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||||
|
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||||
|
|
||||||
|
try:
|
||||||
|
ctx.stream.write(data_block)
|
||||||
|
except (IOError, OSError) as err:
|
||||||
|
self.to_stderr('\n')
|
||||||
|
self.report_error('unable to write data: %s' % str(err))
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Apply rate limit
|
||||||
|
self.slow_down(start, now, byte_counter - ctx.resume_len)
|
||||||
|
|
||||||
|
# end measuring of one loop run
|
||||||
|
now = time.time()
|
||||||
|
after = now
|
||||||
|
|
||||||
|
# Adjust block size
|
||||||
|
if not self.params.get('noresizebuffer', False):
|
||||||
|
block_size = self.best_block_size(after - before, len(data_block))
|
||||||
|
|
||||||
|
before = after
|
||||||
|
|
||||||
|
# Progress message
|
||||||
|
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||||
|
if data_len is None:
|
||||||
|
eta = None
|
||||||
|
else:
|
||||||
|
eta = self.calc_eta(start, time.time(), data_len - ctx.resume_len, byte_counter - ctx.resume_len)
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'status': 'downloading',
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': data_len,
|
||||||
|
'tmpfilename': ctx.tmpfilename,
|
||||||
|
'filename': ctx.filename,
|
||||||
|
'eta': eta,
|
||||||
|
'speed': speed,
|
||||||
|
'elapsed': now - start,
|
||||||
|
})
|
||||||
|
|
||||||
|
if is_test and byte_counter == data_len:
|
||||||
|
break
|
||||||
|
|
||||||
|
if ctx.stream is None:
|
||||||
self.to_stderr('\n')
|
self.to_stderr('\n')
|
||||||
self.report_error('unable to write data: %s' % str(err))
|
self.report_error('Did not get any data blocks')
|
||||||
return False
|
return False
|
||||||
|
if ctx.tmpfilename != '-':
|
||||||
|
ctx.stream.close()
|
||||||
|
|
||||||
# Apply rate limit
|
if data_len is not None and byte_counter != data_len:
|
||||||
self.slow_down(start, now, byte_counter - resume_len)
|
err = ContentTooShortError(byte_counter, int(data_len))
|
||||||
|
if count <= retries:
|
||||||
|
retry(err)
|
||||||
|
raise err
|
||||||
|
|
||||||
# end measuring of one loop run
|
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||||
now = time.time()
|
|
||||||
after = now
|
|
||||||
|
|
||||||
# Adjust block size
|
# Update file modification time
|
||||||
if not self.params.get('noresizebuffer', False):
|
if self.params.get('updatetime', True):
|
||||||
block_size = self.best_block_size(after - before, len(data_block))
|
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||||
|
|
||||||
before = after
|
|
||||||
|
|
||||||
# Progress message
|
|
||||||
speed = self.calc_speed(start, now, byte_counter - resume_len)
|
|
||||||
if data_len is None:
|
|
||||||
eta = None
|
|
||||||
else:
|
|
||||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'status': 'downloading',
|
|
||||||
'downloaded_bytes': byte_counter,
|
'downloaded_bytes': byte_counter,
|
||||||
'total_bytes': data_len,
|
'total_bytes': byte_counter,
|
||||||
'tmpfilename': tmpfilename,
|
'filename': ctx.filename,
|
||||||
'filename': filename,
|
'status': 'finished',
|
||||||
'eta': eta,
|
'elapsed': time.time() - start,
|
||||||
'speed': speed,
|
|
||||||
'elapsed': now - start,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if is_test and byte_counter == data_len:
|
return True
|
||||||
break
|
|
||||||
|
|
||||||
if stream is None:
|
while count <= retries:
|
||||||
self.to_stderr('\n')
|
try:
|
||||||
self.report_error('Did not get any data blocks')
|
establish_connection()
|
||||||
return False
|
download()
|
||||||
if tmpfilename != '-':
|
return True
|
||||||
stream.close()
|
except RetryDownload as e:
|
||||||
|
count += 1
|
||||||
|
if count <= retries:
|
||||||
|
self.report_retry(e.source_error, count, retries)
|
||||||
|
continue
|
||||||
|
except SucceedDownload:
|
||||||
|
return True
|
||||||
|
|
||||||
if data_len is not None and byte_counter != data_len:
|
self.report_error('giving up after %s retries' % retries)
|
||||||
raise ContentTooShortError(byte_counter, int(data_len))
|
return False
|
||||||
self.try_rename(tmpfilename, filename)
|
|
||||||
|
|
||||||
# Update file modification time
|
|
||||||
if self.params.get('updatetime', True):
|
|
||||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
|
||||||
|
|
||||||
self._hook_progress({
|
|
||||||
'downloaded_bytes': byte_counter,
|
|
||||||
'total_bytes': byte_counter,
|
|
||||||
'filename': filename,
|
|
||||||
'status': 'finished',
|
|
||||||
'elapsed': time.time() - start,
|
|
||||||
})
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
@@ -7,6 +7,7 @@ import time
|
|||||||
|
|
||||||
from .amp import AMPIE
|
from .amp import AMPIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
@@ -108,9 +109,7 @@ class AbcNewsIE(InfoExtractor):
|
|||||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||||
|
|
||||||
youtube_url = self._html_search_regex(
|
youtube_url = YoutubeIE._extract_url(webpage)
|
||||||
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
|
|
||||||
webpage, 'YouTube URL', default=None)
|
|
||||||
|
|
||||||
timestamp = None
|
timestamp = None
|
||||||
date_str = self._html_search_regex(
|
date_str = self._html_search_regex(
|
||||||
@@ -140,7 +139,7 @@ class AbcNewsIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if youtube_url:
|
if youtube_url:
|
||||||
entries = [entry, self.url_result(youtube_url, 'Youtube')]
|
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
||||||
return self.playlist_result(entries)
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
return entry
|
return entry
|
||||||
|
53
youtube_dl/extractor/aliexpress.py
Normal file
53
youtube_dl/extractor/aliexpress.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AliExpressLiveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://live.aliexpress.com/live/2800002704436634',
|
||||||
|
'md5': 'e729e25d47c5e557f2630eaf99b740a5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2800002704436634',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CASIMA7.22',
|
||||||
|
'thumbnail': r're:http://.*\.jpg',
|
||||||
|
'uploader': 'CASIMA Official Store',
|
||||||
|
'timestamp': 1500717600,
|
||||||
|
'upload_date': '20170722',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var',
|
||||||
|
webpage, 'runParams'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
title = data['title']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
data['replyStreamUrl'], video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': data.get('coverUrl'),
|
||||||
|
'uploader': try_get(
|
||||||
|
data, lambda x: x['followBar']['name'], compat_str),
|
||||||
|
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -82,7 +82,7 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
vsr = player_info['VSR']
|
vsr = player_info['VSR']
|
||||||
|
|
||||||
if not vsr and not player_info.get('VRU'):
|
if not vsr:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Video %s is not available' % player_info.get('VID') or video_id,
|
'Video %s is not available' % player_info.get('VID') or video_id,
|
||||||
expected=True)
|
expected=True)
|
||||||
|
@@ -242,7 +242,12 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
# Only tracks with duration info have songs
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
self.url_result(
|
||||||
|
compat_urlparse.urljoin(url, t_path),
|
||||||
|
ie=BandcampIE.ie_key(),
|
||||||
|
video_title=self._search_regex(
|
||||||
|
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||||
|
elem_content, 'track title', fatal=False))
|
||||||
for elem_content, t_path in track_elements
|
for elem_content, t_path in track_elements
|
||||||
if self._html_search_meta('duration', elem_content, default=None)]
|
if self._html_search_meta('duration', elem_content, default=None)]
|
||||||
|
|
||||||
|
@@ -29,7 +29,7 @@ from ..compat import (
|
|||||||
class BBCCoUkIE(InfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_ID_REGEX = r'[pb][\da-z]{7}'
|
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?bbc\.co\.uk/
|
(?:www\.)?bbc\.co\.uk/
|
||||||
@@ -37,7 +37,8 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
programmes/(?!articles/)|
|
programmes/(?!articles/)|
|
||||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||||
music/(?:clips|audiovideo/popular)[/#]|
|
music/(?:clips|audiovideo/popular)[/#]|
|
||||||
radio/player/
|
radio/player/|
|
||||||
|
events/[^/]+/play/[^/]+/
|
||||||
)
|
)
|
||||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||||
''' % _ID_REGEX
|
''' % _ID_REGEX
|
||||||
@@ -232,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||||
|
@@ -33,13 +33,18 @@ class BpbIE(InfoExtractor):
|
|||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||||
video_info_dicts = re.findall(
|
video_info_dicts = re.findall(
|
||||||
r"({\s*src:\s*'http://film\.bpb\.de/[^}]+})", webpage)
|
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_info in video_info_dicts:
|
for video_info in video_info_dicts:
|
||||||
video_info = self._parse_json(video_info, video_id, transform_source=js_to_json)
|
video_info = self._parse_json(
|
||||||
quality = video_info['quality']
|
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||||
video_url = video_info['src']
|
if not video_info:
|
||||||
|
continue
|
||||||
|
video_url = video_info.get('src')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
quality = 'high' if '_high' in video_url else 'low'
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'preference': 10 if quality == 'high' else 0,
|
'preference': 10 if quality == 'high' else 0,
|
||||||
|
@@ -200,6 +200,7 @@ class CBCWatchBaseIE(InfoExtractor):
|
|||||||
'media': 'http://search.yahoo.com/mrss/',
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||||
}
|
}
|
||||||
|
_GEO_COUNTRIES = ['CA']
|
||||||
|
|
||||||
def _call_api(self, path, video_id):
|
def _call_api(self, path, video_id):
|
||||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||||
@@ -287,6 +288,11 @@ class CBCWatchBaseIE(InfoExtractor):
|
|||||||
class CBCWatchVideoIE(CBCWatchBaseIE):
|
class CBCWatchVideoIE(CBCWatchBaseIE):
|
||||||
IE_NAME = 'cbc.ca:watch:video'
|
IE_NAME = 'cbc.ca:watch:video'
|
||||||
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
_VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
|
_TEST = {
|
||||||
|
# geo-restricted to Canada, bypassable
|
||||||
|
'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -323,9 +329,10 @@ class CBCWatchIE(CBCWatchBaseIE):
|
|||||||
IE_NAME = 'cbc.ca:watch'
|
IE_NAME = 'cbc.ca:watch'
|
||||||
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
_VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# geo-restricted to Canada, bypassable
|
||||||
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38e815a-009e3ab12e4',
|
'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Customer (Dis)Service',
|
'title': 'Customer (Dis)Service',
|
||||||
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
|
||||||
@@ -337,8 +344,8 @@ class CBCWatchIE(CBCWatchBaseIE):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
'format': 'bestvideo',
|
'format': 'bestvideo',
|
||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to Canada',
|
|
||||||
}, {
|
}, {
|
||||||
|
# geo-restricted to Canada, bypassable
|
||||||
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
|
||||||
@@ -346,7 +353,6 @@ class CBCWatchIE(CBCWatchBaseIE):
|
|||||||
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 30,
|
'playlist_mincount': 30,
|
||||||
'skip': 'Geo-restricted to Canada',
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@@ -124,7 +124,7 @@ class CDAIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def extract_format(page, version):
|
def extract_format(page, version):
|
||||||
json_str = self._search_regex(
|
json_str = self._html_search_regex(
|
||||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||||
'%s player_json' % version, fatal=False, group='player_data')
|
'%s player_json' % version, fatal=False, group='player_data')
|
||||||
if not json_str:
|
if not json_str:
|
||||||
|
@@ -5,7 +5,7 @@ from ..utils import remove_end
|
|||||||
|
|
||||||
|
|
||||||
class CharlieRoseIE(InfoExtractor):
|
class CharlieRoseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://charlierose.com/videos/27996',
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
|
||||||
@@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://charlierose.com/videos/27996',
|
'url': 'https://charlierose.com/videos/27996',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://charlierose.com/episodes/30887?autoplay=true',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
_PLAYER_BASE = 'https://charlierose.com/video/player/%s'
|
||||||
|
@@ -5,6 +5,7 @@ import base64
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError
|
ExtractorError
|
||||||
@@ -70,11 +71,9 @@ class ChilloutzoneIE(InfoExtractor):
|
|||||||
|
|
||||||
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
# If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
|
||||||
if native_platform is None:
|
if native_platform is None:
|
||||||
youtube_url = self._html_search_regex(
|
youtube_url = YoutubeIE._extract_url(webpage)
|
||||||
r'<iframe.* src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
if youtube_url:
|
||||||
webpage, 'fallback video URL', default=None)
|
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||||
if youtube_url is not None:
|
|
||||||
return self.url_result(youtube_url, ie='Youtube')
|
|
||||||
|
|
||||||
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
# Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
|
||||||
# the own CDN
|
# the own CDN
|
||||||
|
@@ -27,6 +27,7 @@ from ..compat import (
|
|||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_xml_parse_error,
|
||||||
)
|
)
|
||||||
from ..downloader.f4m import remove_encrypted_media
|
from ..downloader.f4m import remove_encrypted_media
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -646,15 +647,29 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _download_xml(self, url_or_request, video_id,
|
def _download_xml(self, url_or_request, video_id,
|
||||||
note='Downloading XML', errnote='Unable to download XML',
|
note='Downloading XML', errnote='Unable to download XML',
|
||||||
transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
transform_source=None, fatal=True, encoding=None,
|
||||||
|
data=None, headers={}, query={}):
|
||||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
xml_string = self._download_webpage(
|
xml_string = self._download_webpage(
|
||||||
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
|
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||||
|
encoding=encoding, data=data, headers=headers, query=query)
|
||||||
if xml_string is False:
|
if xml_string is False:
|
||||||
return xml_string
|
return xml_string
|
||||||
|
return self._parse_xml(
|
||||||
|
xml_string, video_id, transform_source=transform_source,
|
||||||
|
fatal=fatal)
|
||||||
|
|
||||||
|
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||||
if transform_source:
|
if transform_source:
|
||||||
xml_string = transform_source(xml_string)
|
xml_string = transform_source(xml_string)
|
||||||
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
try:
|
||||||
|
return compat_etree_fromstring(xml_string.encode('utf-8'))
|
||||||
|
except compat_xml_parse_error as ve:
|
||||||
|
errmsg = '%s: Failed to parse XML ' % video_id
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(errmsg, cause=ve)
|
||||||
|
else:
|
||||||
|
self.report_warning(errmsg + str(ve))
|
||||||
|
|
||||||
def _download_json(self, url_or_request, video_id,
|
def _download_json(self, url_or_request, video_id,
|
||||||
note='Downloading JSON metadata',
|
note='Downloading JSON metadata',
|
||||||
@@ -2123,11 +2138,11 @@ class InfoExtractor(object):
|
|||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
full_url, video_id, ext='mp4',
|
full_url, video_id, ext='mp4',
|
||||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
||||||
preference=preference)
|
preference=preference, fatal=False)
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
is_plain_url = False
|
is_plain_url = False
|
||||||
formats = self._extract_mpd_formats(
|
formats = self._extract_mpd_formats(
|
||||||
full_url, video_id, mpd_id=mpd_id)
|
full_url, video_id, mpd_id=mpd_id, fatal=False)
|
||||||
else:
|
else:
|
||||||
is_plain_url = True
|
is_plain_url = True
|
||||||
formats = [{
|
formats = [{
|
||||||
@@ -2169,6 +2184,12 @@ class InfoExtractor(object):
|
|||||||
f = parse_content_type(source_attributes.get('type'))
|
f = parse_content_type(source_attributes.get('type'))
|
||||||
is_plain_url, formats = _media_formats(src, media_type, f)
|
is_plain_url, formats = _media_formats(src, media_type, f)
|
||||||
if is_plain_url:
|
if is_plain_url:
|
||||||
|
# res attribute is not standard but seen several times
|
||||||
|
# in the wild
|
||||||
|
f.update({
|
||||||
|
'height': int_or_none(source_attributes.get('res')),
|
||||||
|
'format_id': source_attributes.get('label'),
|
||||||
|
})
|
||||||
f.update(formats[0])
|
f.update(formats[0])
|
||||||
media_info['formats'].append(f)
|
media_info['formats'].append(f)
|
||||||
else:
|
else:
|
||||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@@ -41,11 +42,9 @@ class CrackedIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
youtube_url = self._search_regex(
|
youtube_url = YoutubeIE._extract_url(webpage)
|
||||||
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
|
|
||||||
webpage, 'youtube url', default=None)
|
|
||||||
if youtube_url:
|
if youtube_url:
|
||||||
return self.url_result(youtube_url, 'Youtube')
|
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
|
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
|
||||||
|
@@ -45,6 +45,7 @@ from .anvato import AnvatoIE
|
|||||||
from .anysex import AnySexIE
|
from .anysex import AnySexIE
|
||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
from .allocine import AllocineIE
|
from .allocine import AllocineIE
|
||||||
|
from .aliexpress import AliExpressLiveIE
|
||||||
from .aparat import AparatIE
|
from .aparat import AparatIE
|
||||||
from .appleconnect import AppleConnectIE
|
from .appleconnect import AppleConnectIE
|
||||||
from .appletrailers import (
|
from .appletrailers import (
|
||||||
@@ -509,6 +510,7 @@ from .la7 import LA7IE
|
|||||||
from .laola1tv import (
|
from .laola1tv import (
|
||||||
Laola1TvEmbedIE,
|
Laola1TvEmbedIE,
|
||||||
Laola1TvIE,
|
Laola1TvIE,
|
||||||
|
ITTFIE,
|
||||||
)
|
)
|
||||||
from .lci import LCIIE
|
from .lci import LCIIE
|
||||||
from .lcp import (
|
from .lcp import (
|
||||||
@@ -536,7 +538,10 @@ from .limelight import (
|
|||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import (
|
||||||
|
LiveLeakIE,
|
||||||
|
LiveLeakEmbedIE,
|
||||||
|
)
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
LivestreamOriginalIE,
|
LivestreamOriginalIE,
|
||||||
@@ -559,6 +564,7 @@ from .mangomolo import (
|
|||||||
MangomoloVideoIE,
|
MangomoloVideoIE,
|
||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
)
|
)
|
||||||
|
from .manyvids import ManyVidsIE
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
from .mediaset import MediasetIE
|
from .mediaset import MediasetIE
|
||||||
@@ -893,6 +899,7 @@ from .rutube import (
|
|||||||
RutubeEmbedIE,
|
RutubeEmbedIE,
|
||||||
RutubeMovieIE,
|
RutubeMovieIE,
|
||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
|
RutubePlaylistIE,
|
||||||
)
|
)
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .ruutu import RuutuIE
|
from .ruutu import RuutuIE
|
||||||
|
@@ -3,56 +3,99 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
int_or_none,
|
||||||
update_url_query,
|
parse_age_limit,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FOXIE(AdobePassIE):
|
class FOXIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.fox.com/watch/255180355939/7684182528',
|
# clip
|
||||||
|
'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
|
||||||
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '255180355939',
|
'id': '4b765a60490325103ea69888fb2bd4e8',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Official Trailer: Gotham',
|
'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
|
||||||
'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.',
|
'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
|
||||||
'duration': 129,
|
'duration': 102,
|
||||||
'timestamp': 1400020798,
|
'timestamp': 1504291893,
|
||||||
'upload_date': '20140513',
|
'upload_date': '20170901',
|
||||||
'uploader': 'NEWA-FNG-FOXCOM',
|
'creator': 'FOX',
|
||||||
|
'series': 'Gotham',
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'params': {
|
||||||
}
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# episode, geo-restricted
|
||||||
|
'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# episode, geo-restricted, tv provided required
|
||||||
|
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
settings = self._parse_json(self._search_regex(
|
video = self._download_json(
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
|
||||||
webpage, 'drupal settings'), video_id)
|
video_id, headers={
|
||||||
fox_pdk_player = settings['fox_pdk_player']
|
'apikey': 'abdcbed02c124d393b39e818a4312055',
|
||||||
release_url = fox_pdk_player['release_url']
|
'Content-Type': 'application/json',
|
||||||
query = {
|
'Referer': url,
|
||||||
'mbr': 'true',
|
})
|
||||||
'switch': 'http'
|
|
||||||
}
|
|
||||||
if fox_pdk_player.get('access') == 'locked':
|
|
||||||
ap_p = settings['foxAdobePassProvider']
|
|
||||||
rating = ap_p.get('videoRating')
|
|
||||||
if rating == 'n/a':
|
|
||||||
rating = None
|
|
||||||
resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating)
|
|
||||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource)
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
title = video['name']
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
m3u8_url = self._download_json(
|
||||||
'ie_key': 'ThePlatform',
|
video['videoRelease']['url'], video_id)['playURL']
|
||||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = video.get('description')
|
||||||
|
duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
|
||||||
|
video.get('duration')) or parse_duration(video.get('duration'))
|
||||||
|
timestamp = unified_timestamp(video.get('datePublished'))
|
||||||
|
age_limit = parse_age_limit(video.get('contentRating'))
|
||||||
|
|
||||||
|
data = try_get(
|
||||||
|
video, lambda x: x['trackingData']['properties'], dict) or {}
|
||||||
|
|
||||||
|
creator = data.get('brand') or data.get('network') or video.get('network')
|
||||||
|
|
||||||
|
series = video.get('seriesName') or data.get(
|
||||||
|
'seriesName') or data.get('show')
|
||||||
|
season_number = int_or_none(video.get('seasonNumber'))
|
||||||
|
episode = video.get('name')
|
||||||
|
episode_number = int_or_none(video.get('episodeNumber'))
|
||||||
|
release_year = int_or_none(video.get('releaseYear'))
|
||||||
|
|
||||||
|
if data.get('authRequired'):
|
||||||
|
# TODO: AP
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
})
|
'title': title,
|
||||||
|
'description': description,
|
||||||
return info
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'creator': creator,
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'release_year': release_year,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
@@ -1519,14 +1519,27 @@ class GenericIE(InfoExtractor):
|
|||||||
# LiveLeak embed
|
# LiveLeak embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.wykop.pl/link/3088787/',
|
'url': 'http://www.wykop.pl/link/3088787/',
|
||||||
'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
|
'md5': '7619da8c820e835bef21a1efa2a0fc71',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '874_1459135191',
|
'id': '874_1459135191',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Man shows poor quality of new apartment building',
|
'title': 'Man shows poor quality of new apartment building',
|
||||||
'description': 'The wall is like a sand pile.',
|
'description': 'The wall is like a sand pile.',
|
||||||
'uploader': 'Lake8737',
|
'uploader': 'Lake8737',
|
||||||
}
|
},
|
||||||
|
'add_ie': [LiveLeakIE.ie_key()],
|
||||||
|
},
|
||||||
|
# Another LiveLeak embed pattern (#13336)
|
||||||
|
{
|
||||||
|
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2eb_1496309988',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Thief robs place where everyone was armed',
|
||||||
|
'description': 'md5:694d73ee79e535953cf2488562288eee',
|
||||||
|
'uploader': 'brazilwtf',
|
||||||
|
},
|
||||||
|
'add_ie': [LiveLeakIE.ie_key()],
|
||||||
},
|
},
|
||||||
# Duplicated embedded video URLs
|
# Duplicated embedded video URLs
|
||||||
{
|
{
|
||||||
@@ -2230,36 +2243,11 @@ class GenericIE(InfoExtractor):
|
|||||||
if vid_me_embed_url is not None:
|
if vid_me_embed_url is not None:
|
||||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||||
|
|
||||||
# Look for embedded YouTube player
|
# Look for YouTube embeds
|
||||||
matches = re.findall(r'''(?x)
|
youtube_urls = YoutubeIE._extract_urls(webpage)
|
||||||
(?:
|
if youtube_urls:
|
||||||
<iframe[^>]+?src=|
|
|
||||||
data-video-url=|
|
|
||||||
<embed[^>]+?src=|
|
|
||||||
embedSWF\(?:\s*|
|
|
||||||
<object[^>]+data=|
|
|
||||||
new\s+SWFObject\(
|
|
||||||
)
|
|
||||||
(["\'])
|
|
||||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
|
||||||
(?:embed|v|p)/.+?)
|
|
||||||
\1''', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
|
youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
# Look for lazyYT YouTube embed
|
|
||||||
matches = re.findall(
|
|
||||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
|
|
||||||
|
|
||||||
# Look for Wordpress "YouTube Video Importer" plugin
|
|
||||||
matches = re.findall(r'''(?x)<div[^>]+
|
|
||||||
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
|
||||||
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
|
||||||
if matches:
|
|
||||||
return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
|
|
||||||
|
|
||||||
matches = DailymotionIE._extract_urls(webpage)
|
matches = DailymotionIE._extract_urls(webpage)
|
||||||
if matches:
|
if matches:
|
||||||
@@ -2757,9 +2745,9 @@ class GenericIE(InfoExtractor):
|
|||||||
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
||||||
|
|
||||||
# Look for LiveLeak embeds
|
# Look for LiveLeak embeds
|
||||||
liveleak_url = LiveLeakIE._extract_url(webpage)
|
liveleak_urls = LiveLeakIE._extract_urls(webpage)
|
||||||
if liveleak_url:
|
if liveleak_urls:
|
||||||
return self.url_result(liveleak_url, 'LiveLeak')
|
return self.playlist_from_matches(liveleak_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for 3Q SDN embeds
|
# Look for 3Q SDN embeds
|
||||||
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
|
||||||
@@ -2858,12 +2846,6 @@ class GenericIE(InfoExtractor):
|
|||||||
merged[k] = v
|
merged[k] = v
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
|
||||||
json_ld = self._search_json_ld(
|
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
|
||||||
if json_ld.get('url'):
|
|
||||||
return merge_dicts(json_ld, info_dict)
|
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
@@ -2882,6 +2864,12 @@ class GenericIE(InfoExtractor):
|
|||||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
return merge_dicts(info, info_dict)
|
return merge_dicts(info, info_dict)
|
||||||
|
|
||||||
|
# Looking for http://schema.org/VideoObject
|
||||||
|
json_ld = self._search_json_ld(
|
||||||
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
|
if json_ld.get('url'):
|
||||||
|
return merge_dicts(json_ld, info_dict)
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
@@ -4,26 +4,61 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class GoogleDriveIE(InfoExtractor):
|
class GoogleDriveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:docs|drive)\.google\.com/
|
||||||
|
(?:
|
||||||
|
(?:uc|open)\?.*?id=|
|
||||||
|
file/d/
|
||||||
|
)|
|
||||||
|
video\.google\.com/get_player\?.*?docid=
|
||||||
|
)
|
||||||
|
(?P<id>[a-zA-Z0-9_-]{28,})
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
'md5': '5c602afbbf2c1db91831f5d82f678554',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny.mp4',
|
'title': 'Big Buck Bunny.mp4',
|
||||||
'duration': 45,
|
'duration': 45,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
|
# but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
|
||||||
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
|
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||||
|
'duration': 189,
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_FORMATS_EXT = {
|
_FORMATS_EXT = {
|
||||||
@@ -44,6 +79,13 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
'46': 'webm',
|
'46': 'webm',
|
||||||
'59': 'mp4',
|
'59': 'mp4',
|
||||||
}
|
}
|
||||||
|
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
||||||
|
_CAPTIONS_ENTRY_TAG = {
|
||||||
|
'subtitles': 'track',
|
||||||
|
'automatic_captions': 'target',
|
||||||
|
}
|
||||||
|
_caption_formats_ext = []
|
||||||
|
_captions_xml = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
@@ -53,54 +95,183 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
if mobj:
|
if mobj:
|
||||||
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
|
return 'https://drive.google.com/file/d/%s' % mobj.group('id')
|
||||||
|
|
||||||
|
def _download_subtitles_xml(self, video_id, subtitles_id, hl):
|
||||||
|
if self._captions_xml:
|
||||||
|
return
|
||||||
|
self._captions_xml = self._download_xml(
|
||||||
|
self._BASE_URL_CAPTIONS, video_id, query={
|
||||||
|
'id': video_id,
|
||||||
|
'vid': subtitles_id,
|
||||||
|
'hl': hl,
|
||||||
|
'v': video_id,
|
||||||
|
'type': 'list',
|
||||||
|
'tlangs': '1',
|
||||||
|
'fmts': '1',
|
||||||
|
'vssids': '1',
|
||||||
|
}, note='Downloading subtitles XML',
|
||||||
|
errnote='Unable to download subtitles XML', fatal=False)
|
||||||
|
if self._captions_xml:
|
||||||
|
for f in self._captions_xml.findall('format'):
|
||||||
|
if f.attrib.get('fmt_code') and not f.attrib.get('default'):
|
||||||
|
self._caption_formats_ext.append(f.attrib['fmt_code'])
|
||||||
|
|
||||||
|
def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
|
||||||
|
origin_lang_code=None):
|
||||||
|
if not subtitles_id or not caption_type:
|
||||||
|
return
|
||||||
|
captions = {}
|
||||||
|
for caption_entry in self._captions_xml.findall(
|
||||||
|
self._CAPTIONS_ENTRY_TAG[caption_type]):
|
||||||
|
caption_lang_code = caption_entry.attrib.get('lang_code')
|
||||||
|
if not caption_lang_code:
|
||||||
|
continue
|
||||||
|
caption_format_data = []
|
||||||
|
for caption_format in self._caption_formats_ext:
|
||||||
|
query = {
|
||||||
|
'vid': subtitles_id,
|
||||||
|
'v': video_id,
|
||||||
|
'fmt': caption_format,
|
||||||
|
'lang': (caption_lang_code if origin_lang_code is None
|
||||||
|
else origin_lang_code),
|
||||||
|
'type': 'track',
|
||||||
|
'name': '',
|
||||||
|
'kind': '',
|
||||||
|
}
|
||||||
|
if origin_lang_code is not None:
|
||||||
|
query.update({'tlang': caption_lang_code})
|
||||||
|
caption_format_data.append({
|
||||||
|
'url': update_url_query(self._BASE_URL_CAPTIONS, query),
|
||||||
|
'ext': caption_format,
|
||||||
|
})
|
||||||
|
captions[caption_lang_code] = caption_format_data
|
||||||
|
return captions
|
||||||
|
|
||||||
|
def _get_subtitles(self, video_id, subtitles_id, hl):
|
||||||
|
if not subtitles_id or not hl:
|
||||||
|
return
|
||||||
|
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
||||||
|
if not self._captions_xml:
|
||||||
|
return
|
||||||
|
return self._get_captions_by_type(video_id, subtitles_id, 'subtitles')
|
||||||
|
|
||||||
|
def _get_automatic_captions(self, video_id, subtitles_id, hl):
|
||||||
|
if not subtitles_id or not hl:
|
||||||
|
return
|
||||||
|
self._download_subtitles_xml(video_id, subtitles_id, hl)
|
||||||
|
if not self._captions_xml:
|
||||||
|
return
|
||||||
|
track = self._captions_xml.find('track')
|
||||||
|
if track is None:
|
||||||
|
return
|
||||||
|
origin_lang_code = track.attrib.get('lang_code')
|
||||||
|
if not origin_lang_code:
|
||||||
|
return
|
||||||
|
return self._get_captions_by_type(
|
||||||
|
video_id, subtitles_id, 'automatic_captions', origin_lang_code)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||||
|
|
||||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
title = self._search_regex(
|
||||||
if reason:
|
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||||
raise ExtractorError(reason)
|
default=None) or self._og_search_title(webpage)
|
||||||
|
|
||||||
title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
|
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(self._search_regex(
|
||||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
|
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||||
fmt_stream_map = self._search_regex(
|
default=None))
|
||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
|
|
||||||
fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
|
|
||||||
|
|
||||||
resolutions = {}
|
|
||||||
for fmt in fmt_list:
|
|
||||||
mobj = re.search(
|
|
||||||
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
|
||||||
if mobj:
|
|
||||||
resolutions[mobj.group('format_id')] = (
|
|
||||||
int(mobj.group('width')), int(mobj.group('height')))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt_stream in fmt_stream_map:
|
fmt_stream_map = self._search_regex(
|
||||||
fmt_stream_split = fmt_stream.split('|')
|
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||||
if len(fmt_stream_split) < 2:
|
'fmt stream map', default='').split(',')
|
||||||
continue
|
fmt_list = self._search_regex(
|
||||||
format_id, format_url = fmt_stream_split[:2]
|
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||||
f = {
|
'fmt_list', default='').split(',')
|
||||||
'url': lowercase_escape(format_url),
|
if fmt_stream_map and fmt_list:
|
||||||
'format_id': format_id,
|
resolutions = {}
|
||||||
'ext': self._FORMATS_EXT[format_id],
|
for fmt in fmt_list:
|
||||||
}
|
mobj = re.search(
|
||||||
resolution = resolutions.get(format_id)
|
r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
|
||||||
if resolution:
|
if mobj:
|
||||||
f.update({
|
resolutions[mobj.group('format_id')] = (
|
||||||
'width': resolution[0],
|
int(mobj.group('width')), int(mobj.group('height')))
|
||||||
'height': resolution[1],
|
|
||||||
|
for fmt_stream in fmt_stream_map:
|
||||||
|
fmt_stream_split = fmt_stream.split('|')
|
||||||
|
if len(fmt_stream_split) < 2:
|
||||||
|
continue
|
||||||
|
format_id, format_url = fmt_stream_split[:2]
|
||||||
|
f = {
|
||||||
|
'url': lowercase_escape(format_url),
|
||||||
|
'format_id': format_id,
|
||||||
|
'ext': self._FORMATS_EXT[format_id],
|
||||||
|
}
|
||||||
|
resolution = resolutions.get(format_id)
|
||||||
|
if resolution:
|
||||||
|
f.update({
|
||||||
|
'width': resolution[0],
|
||||||
|
'height': resolution[1],
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
|
||||||
|
source_url = update_url_query(
|
||||||
|
'https://drive.google.com/uc', {
|
||||||
|
'id': video_id,
|
||||||
|
'export': 'download',
|
||||||
|
})
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
source_url, video_id, note='Requesting source file',
|
||||||
|
errnote='Unable to request source file', fatal=False)
|
||||||
|
if urlh:
|
||||||
|
def add_source_format(src_url):
|
||||||
|
formats.append({
|
||||||
|
'url': src_url,
|
||||||
|
'ext': determine_ext(title, 'mp4').lower(),
|
||||||
|
'format_id': 'source',
|
||||||
|
'quality': 1,
|
||||||
})
|
})
|
||||||
formats.append(f)
|
if urlh.headers.get('Content-Disposition'):
|
||||||
|
add_source_format(source_url)
|
||||||
|
else:
|
||||||
|
confirmation_webpage = self._webpage_read_content(
|
||||||
|
urlh, url, video_id, note='Downloading confirmation page',
|
||||||
|
errnote='Unable to confirm download', fatal=False)
|
||||||
|
if confirmation_webpage:
|
||||||
|
confirm = self._search_regex(
|
||||||
|
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||||
|
'confirmation code', fatal=False)
|
||||||
|
if confirm:
|
||||||
|
add_source_format(update_url_query(source_url, {
|
||||||
|
'confirm': confirm,
|
||||||
|
}))
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
reason = self._search_regex(
|
||||||
|
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||||
|
if reason:
|
||||||
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
hl = self._search_regex(
|
||||||
|
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
||||||
|
subtitles_id = None
|
||||||
|
ttsurl = self._search_regex(
|
||||||
|
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
||||||
|
if ttsurl:
|
||||||
|
# the video Id for subtitles will be the last value in the ttsurl
|
||||||
|
# query string
|
||||||
|
subtitles_id = ttsurl.encode('utf-8').decode(
|
||||||
|
'unicode_escape').split('=')[-1]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||||
|
'automatic_captions': self.extract_automatic_captions(
|
||||||
|
video_id, subtitles_id, hl),
|
||||||
}
|
}
|
||||||
|
@@ -215,3 +215,21 @@ class Laola1TvIE(Laola1TvEmbedIE):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ITTFIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tv\.ittf\.com/video/[^/]+/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://tv.ittf.com/video/peng-wang-wei-matsudaira-kenta/951802',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result(
|
||||||
|
update_url_query('https://www.laola1.tv/titanplayer.php', {
|
||||||
|
'videoid': self._match_id(url),
|
||||||
|
'type': 'V',
|
||||||
|
'lang': 'en',
|
||||||
|
'portal': 'int',
|
||||||
|
'customer': 1024,
|
||||||
|
}), Laola1TvEmbedIE.ie_key())
|
||||||
|
@@ -72,15 +72,20 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.liveleak.com/view?i=677_1439397581',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '677_1439397581',
|
||||||
|
'title': 'Fuel Depot in China Explosion caught on video',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_urls(webpage):
|
||||||
mobj = re.search(
|
return re.findall(
|
||||||
r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
|
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj:
|
|
||||||
return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@@ -111,23 +116,54 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
||||||
info_dict = entries[0]
|
for idx, info_dict in enumerate(entries):
|
||||||
|
for a_format in info_dict['formats']:
|
||||||
|
if not a_format.get('height'):
|
||||||
|
a_format['height'] = int_or_none(self._search_regex(
|
||||||
|
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
||||||
|
default=None))
|
||||||
|
|
||||||
for a_format in info_dict['formats']:
|
self._sort_formats(info_dict['formats'])
|
||||||
if not a_format.get('height'):
|
|
||||||
a_format['height'] = int_or_none(self._search_regex(
|
|
||||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
|
||||||
default=None))
|
|
||||||
|
|
||||||
self._sort_formats(info_dict['formats'])
|
# Don't append entry ID for one-video pages to keep backward compatibility
|
||||||
|
if len(entries) > 1:
|
||||||
|
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
|
||||||
|
else:
|
||||||
|
info_dict['id'] = video_id
|
||||||
|
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'id': video_id,
|
'title': video_title,
|
||||||
'title': video_title,
|
'description': video_description,
|
||||||
'description': video_description,
|
'uploader': video_uploader,
|
||||||
'uploader': video_uploader,
|
'age_limit': age_limit,
|
||||||
'age_limit': age_limit,
|
'thumbnail': video_thumbnail,
|
||||||
'thumbnail': video_thumbnail,
|
})
|
||||||
})
|
|
||||||
|
|
||||||
return info_dict
|
return self.playlist_result(entries, video_id, video_title)
|
||||||
|
|
||||||
|
|
||||||
|
class LiveLeakEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
|
||||||
|
|
||||||
|
# See generic.py for actual test cases
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
kind, video_id = mobj.group('kind', 'id')
|
||||||
|
|
||||||
|
if kind == 'f':
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
liveleak_url = self._search_regex(
|
||||||
|
r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
||||||
|
webpage, 'LiveLeak URL', group='url')
|
||||||
|
elif kind == 'i':
|
||||||
|
liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
|
||||||
|
|
||||||
|
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
||||||
|
48
youtube_dl/extractor/manyvids.py
Normal file
48
youtube_dl/extractor/manyvids.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class ManyVidsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||||
|
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '133957',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'everthing about me (Preview)',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'video URL', group='url')
|
||||||
|
|
||||||
|
title = '%s (Preview)' % self._html_search_regex(
|
||||||
|
r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
|
||||||
|
|
||||||
|
like_count = int_or_none(self._search_regex(
|
||||||
|
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||||
|
'view count', default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'formats': [{
|
||||||
|
'url': video_url,
|
||||||
|
}],
|
||||||
|
}
|
@@ -91,12 +91,14 @@ class MixcloudIE(InfoExtractor):
|
|||||||
if js_url:
|
if js_url:
|
||||||
js = self._download_webpage(js_url, track_id, fatal=False)
|
js = self._download_webpage(js_url, track_id, fatal=False)
|
||||||
if js:
|
if js:
|
||||||
key = self._search_regex(
|
KEY_RE_TEMPLATE = r'player\s*:\s*{.*?\b%s\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1'
|
||||||
r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P<key>(?:(?!\1).)+)\1',
|
for key_name in ('value', 'key_value', 'key_value.*?', '.*?value.*?'):
|
||||||
js, 'key', default=None, group='key')
|
key = self._search_regex(
|
||||||
if key and isinstance(key, compat_str):
|
KEY_RE_TEMPLATE % key_name, js, 'key',
|
||||||
self._keys.insert(0, key)
|
default=None, group='key')
|
||||||
self._current_key = key
|
if key and isinstance(key, compat_str):
|
||||||
|
self._keys.insert(0, key)
|
||||||
|
self._current_key = key
|
||||||
|
|
||||||
message = self._html_search_regex(
|
message = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||||
|
@@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
|
|||||||
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
|
||||||
|
|
||||||
sources = self._parse_json(js_to_json(self._search_regex(
|
sources = self._parse_json(js_to_json(self._search_regex(
|
||||||
r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
|
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
|
||||||
webpage, 'sources', default='{}')), video_id)
|
webpage, 'sources', default='{}')), video_id)
|
||||||
|
|
||||||
if not sources:
|
if not sources:
|
||||||
@@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
|
|||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||||
|
'thumbnail', fatal=False, group='url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@@ -186,7 +186,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
title, thumbnail, duration = [None] * 3
|
title, thumbnail, duration = [None] * 3
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
|
||||||
view_count = self._extract_count(
|
view_count = self._extract_count(
|
||||||
|
@@ -20,20 +20,37 @@ from ..utils import (
|
|||||||
class RadioCanadaIE(InfoExtractor):
|
class RadioCanadaIE(InfoExtractor):
|
||||||
IE_NAME = 'radiocanada'
|
IE_NAME = 'radiocanada'
|
||||||
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
{
|
||||||
'info_dict': {
|
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||||
'id': '7184272',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '7184272',
|
||||||
'title': 'Le parcours du tireur capté sur vidéo',
|
'ext': 'mp4',
|
||||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
'title': 'Le parcours du tireur capté sur vidéo',
|
||||||
'upload_date': '20141023',
|
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||||
|
'upload_date': '20141023',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
},
|
},
|
||||||
'params': {
|
{
|
||||||
# m3u8 download
|
# empty Title
|
||||||
'skip_download': True,
|
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
|
||||||
},
|
'info_dict': {
|
||||||
}
|
'id': '7754998',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'letelejournal22h',
|
||||||
|
'description': 'INTEGRALE WEB 22H-TJ',
|
||||||
|
'upload_date': '20170720',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
@@ -59,6 +76,7 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
device_types.append('android')
|
device_types.append('android')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
error = None
|
||||||
# TODO: extract f4m formats
|
# TODO: extract f4m formats
|
||||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||||
for device_type in device_types:
|
for device_type in device_types:
|
||||||
@@ -84,8 +102,8 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
if not v_url:
|
if not v_url:
|
||||||
continue
|
continue
|
||||||
if v_url == 'null':
|
if v_url == 'null':
|
||||||
raise ExtractorError('%s said: %s' % (
|
error = xpath_text(v_data, 'message')
|
||||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
continue
|
||||||
ext = determine_ext(v_url)
|
ext = determine_ext(v_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
@@ -129,6 +147,9 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
base_url + '/manifest.f4m', video_id,
|
base_url + '/manifest.f4m', video_id,
|
||||||
f4m_id='hds', fatal=False))
|
f4m_id='hds', fatal=False))
|
||||||
|
if not formats and error:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@@ -141,7 +162,7 @@ class RadioCanadaIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': get_meta('Title'),
|
'title': get_meta('Title') or get_meta('AV-nomEmission'),
|
||||||
'description': get_meta('Description') or get_meta('ShortDescription'),
|
'description': get_meta('Description') or get_meta('ShortDescription'),
|
||||||
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
||||||
'duration': int_or_none(get_meta('length')),
|
'duration': int_or_none(get_meta('length')),
|
||||||
|
@@ -345,11 +345,11 @@ class RaiIE(RaiBaseIE):
|
|||||||
media_type = media['type']
|
media_type = media['type']
|
||||||
if 'Audio' in media_type:
|
if 'Audio' in media_type:
|
||||||
relinker_info = {
|
relinker_info = {
|
||||||
'formats': {
|
'formats': [{
|
||||||
'format_id': media.get('formatoAudio'),
|
'format_id': media.get('formatoAudio'),
|
||||||
'url': media['audioUrl'],
|
'url': media['audioUrl'],
|
||||||
'ext': media.get('formatoAudio'),
|
'ext': media.get('formatoAudio'),
|
||||||
}
|
}]
|
||||||
}
|
}
|
||||||
elif 'Video' in media_type:
|
elif 'Video' in media_type:
|
||||||
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
relinker_info = self._extract_relinker_info(media['mediaUri'], content_id)
|
||||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@@ -62,7 +63,23 @@ class RedTubeIE(InfoExtractor):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'height': int_or_none(format_id),
|
'height': int_or_none(format_id),
|
||||||
})
|
})
|
||||||
else:
|
medias = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
|
||||||
|
'media definitions', default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if medias and isinstance(medias, list):
|
||||||
|
for media in medias:
|
||||||
|
format_url = media.get('videoUrl')
|
||||||
|
if not format_url or not isinstance(format_url, compat_str):
|
||||||
|
continue
|
||||||
|
format_id = media.get('quality')
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': int_or_none(format_id),
|
||||||
|
})
|
||||||
|
if not formats:
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
||||||
formats.append({'url': video_url})
|
formats.append({'url': video_url})
|
||||||
@@ -73,7 +90,7 @@ class RedTubeIE(InfoExtractor):
|
|||||||
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
|
r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<',
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
duration = int_or_none(self._search_regex(
|
duration = int_or_none(self._search_regex(
|
||||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
|
r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
@@ -7,43 +7,84 @@ import itertools
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
unified_strdate,
|
bool_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RutubeIE(InfoExtractor):
|
class RutubeBaseIE(InfoExtractor):
|
||||||
|
def _extract_video(self, video, video_id=None, require_title=True):
|
||||||
|
title = video['title'] if require_title else video.get('title')
|
||||||
|
|
||||||
|
age_limit = video.get('is_adult')
|
||||||
|
if age_limit is not None:
|
||||||
|
age_limit = 18 if age_limit is True else 0
|
||||||
|
|
||||||
|
uploader_id = try_get(video, lambda x: x['author']['id'])
|
||||||
|
category = try_get(video, lambda x: x['category']['name'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video.get('id') or video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': video.get('thumbnail_url'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'uploader': try_get(video, lambda x: x['author']['name']),
|
||||||
|
'uploader_id': compat_str(uploader_id) if uploader_id else None,
|
||||||
|
'timestamp': unified_timestamp(video.get('created_ts')),
|
||||||
|
'category': [category] if category else None,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'view_count': int_or_none(video.get('hits')),
|
||||||
|
'comment_count': int_or_none(video.get('comments_count')),
|
||||||
|
'is_live': bool_or_none(video.get('is_livestream')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeIE(RutubeBaseIE):
|
||||||
IE_NAME = 'rutube'
|
IE_NAME = 'rutube'
|
||||||
IE_DESC = 'Rutube videos'
|
IE_DESC = 'Rutube videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/(?P<id>[\da-z]{32})'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||||
|
'md5': '79938ade01294ef7e27574890d0d3769',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Раненный кенгуру забежал в аптеку',
|
'title': 'Раненный кенгуру забежал в аптеку',
|
||||||
'description': 'http://www.ntdtv.ru ',
|
'description': 'http://www.ntdtv.ru ',
|
||||||
'duration': 80,
|
'duration': 80,
|
||||||
'uploader': 'NTDRussian',
|
'uploader': 'NTDRussian',
|
||||||
'uploader_id': '29790',
|
'uploader_id': '29790',
|
||||||
|
'timestamp': 1381943602,
|
||||||
'upload_date': '20131016',
|
'upload_date': '20131016',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# It requires ffmpeg (m3u8 download)
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return [mobj.group('url') for mobj in re.finditer(
|
return [mobj.group('url') for mobj in re.finditer(
|
||||||
@@ -52,12 +93,12 @@ class RutubeIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
'http://rutube.ru/api/video/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading video JSON')
|
video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
# Some videos don't have the author field
|
info = self._extract_video(video, video_id)
|
||||||
author = video.get('author') or {}
|
|
||||||
|
|
||||||
options = self._download_json(
|
options = self._download_json(
|
||||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||||
@@ -79,19 +120,8 @@ class RutubeIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info['formats'] = formats
|
||||||
'id': video['id'],
|
return info
|
||||||
'title': video['title'],
|
|
||||||
'description': video['description'],
|
|
||||||
'duration': video['duration'],
|
|
||||||
'view_count': video['hits'],
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': video['thumbnail_url'],
|
|
||||||
'uploader': author.get('name'),
|
|
||||||
'uploader_id': compat_str(author['id']) if author else None,
|
|
||||||
'upload_date': unified_strdate(video['created_ts']),
|
|
||||||
'age_limit': 18 if video['is_adult'] else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RutubeEmbedIE(InfoExtractor):
|
class RutubeEmbedIE(InfoExtractor):
|
||||||
@@ -103,7 +133,8 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
|
'timestamp': 1387830582,
|
||||||
'upload_date': '20131223',
|
'upload_date': '20131223',
|
||||||
'uploader_id': '297833',
|
'uploader_id': '297833',
|
||||||
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
||||||
@@ -111,7 +142,7 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'Requires ffmpeg',
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://rutube.ru/play/embed/8083783',
|
'url': 'http://rutube.ru/play/embed/8083783',
|
||||||
@@ -125,10 +156,51 @@ class RutubeEmbedIE(InfoExtractor):
|
|||||||
canonical_url = self._html_search_regex(
|
canonical_url = self._html_search_regex(
|
||||||
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
|
||||||
'Canonical URL')
|
'Canonical URL')
|
||||||
return self.url_result(canonical_url, 'Rutube')
|
return self.url_result(canonical_url, RutubeIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
class RutubeChannelIE(InfoExtractor):
|
class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||||
|
def _next_page_url(self, page_num, playlist_id, *args, **kwargs):
|
||||||
|
return self._PAGE_TEMPLATE % (playlist_id, page_num)
|
||||||
|
|
||||||
|
def _entries(self, playlist_id, *args, **kwargs):
|
||||||
|
next_page_url = None
|
||||||
|
for pagenum in itertools.count(1):
|
||||||
|
page = self._download_json(
|
||||||
|
next_page_url or self._next_page_url(
|
||||||
|
pagenum, playlist_id, *args, **kwargs),
|
||||||
|
playlist_id, 'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
|
results = page.get('results')
|
||||||
|
if not results or not isinstance(results, list):
|
||||||
|
break
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
video_url = result.get('video_url')
|
||||||
|
if not video_url or not isinstance(video_url, compat_str):
|
||||||
|
continue
|
||||||
|
entry = self._extract_video(result, require_title=False)
|
||||||
|
entry.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': video_url,
|
||||||
|
'ie_key': RutubeIE.ie_key(),
|
||||||
|
})
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
next_page_url = page.get('next')
|
||||||
|
if not next_page_url or not page.get('has_next'):
|
||||||
|
break
|
||||||
|
|
||||||
|
def _extract_playlist(self, playlist_id, *args, **kwargs):
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(playlist_id, *args, **kwargs),
|
||||||
|
playlist_id, kwargs.get('playlist_name'))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_playlist(self._match_id(url))
|
||||||
|
|
||||||
|
|
||||||
|
class RutubeChannelIE(RutubePlaylistBaseIE):
|
||||||
IE_NAME = 'rutube:channel'
|
IE_NAME = 'rutube:channel'
|
||||||
IE_DESC = 'Rutube channels'
|
IE_DESC = 'Rutube channels'
|
||||||
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
||||||
@@ -142,27 +214,8 @@ class RutubeChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
||||||
|
|
||||||
def _extract_videos(self, channel_id, channel_title=None):
|
|
||||||
entries = []
|
|
||||||
for pagenum in itertools.count(1):
|
|
||||||
page = self._download_json(
|
|
||||||
self._PAGE_TEMPLATE % (channel_id, pagenum),
|
|
||||||
channel_id, 'Downloading page %s' % pagenum)
|
|
||||||
results = page['results']
|
|
||||||
if not results:
|
|
||||||
break
|
|
||||||
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
|
|
||||||
if not page['has_next']:
|
|
||||||
break
|
|
||||||
return self.playlist_result(entries, channel_id, channel_title)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
class RutubeMovieIE(RutubePlaylistBaseIE):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
channel_id = mobj.group('id')
|
|
||||||
return self._extract_videos(channel_id)
|
|
||||||
|
|
||||||
|
|
||||||
class RutubeMovieIE(RutubeChannelIE):
|
|
||||||
IE_NAME = 'rutube:movie'
|
IE_NAME = 'rutube:movie'
|
||||||
IE_DESC = 'Rutube movies'
|
IE_DESC = 'Rutube movies'
|
||||||
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
||||||
@@ -176,11 +229,11 @@ class RutubeMovieIE(RutubeChannelIE):
|
|||||||
movie = self._download_json(
|
movie = self._download_json(
|
||||||
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
self._MOVIE_TEMPLATE % movie_id, movie_id,
|
||||||
'Downloading movie JSON')
|
'Downloading movie JSON')
|
||||||
movie_name = movie['name']
|
return self._extract_playlist(
|
||||||
return self._extract_videos(movie_id, movie_name)
|
movie_id, playlist_name=movie.get('name'))
|
||||||
|
|
||||||
|
|
||||||
class RutubePersonIE(RutubeChannelIE):
|
class RutubePersonIE(RutubePlaylistBaseIE):
|
||||||
IE_NAME = 'rutube:person'
|
IE_NAME = 'rutube:person'
|
||||||
IE_DESC = 'Rutube person videos'
|
IE_DESC = 'Rutube person videos'
|
||||||
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
||||||
@@ -193,3 +246,37 @@ class RutubePersonIE(RutubeChannelIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||||
|
|
||||||
|
|
||||||
|
class RutubePlaylistIE(RutubePlaylistBaseIE):
|
||||||
|
IE_NAME = 'rutube:playlist'
|
||||||
|
IE_DESC = 'Rutube playlists'
|
||||||
|
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3097',
|
||||||
|
},
|
||||||
|
'playlist_count': 27,
|
||||||
|
}, {
|
||||||
|
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
if not super(RutubePlaylistIE, cls).suitable(url):
|
||||||
|
return False
|
||||||
|
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
|
||||||
|
|
||||||
|
def _next_page_url(self, page_num, playlist_id, item_kind):
|
||||||
|
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
playlist_kind = qs['pl_type'][0]
|
||||||
|
playlist_id = qs['pl_id'][0]
|
||||||
|
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
|
||||||
|
@@ -1,8 +1,8 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import (
|
from .common import (
|
||||||
InfoExtractor,
|
InfoExtractor,
|
||||||
@@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -120,6 +121,21 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'license': 'cc-by-sa',
|
'license': 'cc-by-sa',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# private link, downloadable format
|
||||||
|
{
|
||||||
|
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
||||||
|
'md5': '64a60b16e617d41d0bef032b7f55441e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '340344461',
|
||||||
|
'ext': 'wav',
|
||||||
|
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||||
|
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||||
|
'uploader': 'Ori Uplift Music',
|
||||||
|
'upload_date': '20170831',
|
||||||
|
'duration': 7449,
|
||||||
|
'license': 'all-rights-reserved',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
_CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
|
_CLIENT_ID = 'JlZIsxg2hY5WnBgtn3jfS0UYCl0K8DOg'
|
||||||
@@ -160,11 +176,13 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
'license': info.get('license'),
|
'license': info.get('license'),
|
||||||
}
|
}
|
||||||
formats = []
|
formats = []
|
||||||
|
query = {'client_id': self._CLIENT_ID}
|
||||||
|
if secret_token is not None:
|
||||||
|
query['secret_token'] = secret_token
|
||||||
if info.get('downloadable', False):
|
if info.get('downloadable', False):
|
||||||
# We can build a direct link to the song
|
# We can build a direct link to the song
|
||||||
format_url = (
|
format_url = update_url_query(
|
||||||
'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
|
'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
|
||||||
track_id, self._CLIENT_ID))
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'download',
|
'format_id': 'download',
|
||||||
'ext': info.get('original_format', 'mp3'),
|
'ext': info.get('original_format', 'mp3'),
|
||||||
@@ -176,10 +194,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
# We have to retrieve the url
|
# We have to retrieve the url
|
||||||
format_dict = self._download_json(
|
format_dict = self._download_json(
|
||||||
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
|
||||||
track_id, 'Downloading track url', query={
|
track_id, 'Downloading track url', query=query)
|
||||||
'client_id': self._CLIENT_ID,
|
|
||||||
'secret_token': secret_token,
|
|
||||||
})
|
|
||||||
|
|
||||||
for key, stream_url in format_dict.items():
|
for key, stream_url in format_dict.items():
|
||||||
abr = int_or_none(self._search_regex(
|
abr = int_or_none(self._search_regex(
|
||||||
@@ -216,7 +231,7 @@ class SoundcloudIE(InfoExtractor):
|
|||||||
# cannot be always used, sometimes it can give an HTTP 404 error
|
# cannot be always used, sometimes it can give an HTTP 404 error
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'fallback',
|
'format_id': 'fallback',
|
||||||
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
|
'url': update_url_query(info['stream_url'], query),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@@ -5,7 +5,6 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
ExtractorError,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@@ -78,8 +77,10 @@ class TouTvIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path = self._match_id(url)
|
path = self._match_id(url)
|
||||||
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
|
metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
|
||||||
|
# IsDrm does not necessarily mean the video is DRM protected (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/13994).
|
||||||
if metadata.get('IsDrm'):
|
if metadata.get('IsDrm'):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
self.report_warning('This video is probably DRM protected.', path)
|
||||||
video_id = metadata['IdMedia']
|
video_id = metadata['IdMedia']
|
||||||
details = metadata['Details']
|
details = metadata['Details']
|
||||||
title = details['OriginalTitle']
|
title = details['OriginalTitle']
|
||||||
|
@@ -7,6 +7,7 @@ import hashlib
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@@ -261,11 +262,9 @@ class ViceArticleIE(InfoExtractor):
|
|||||||
if embed_code:
|
if embed_code:
|
||||||
return _url_res('ooyala:%s' % embed_code, 'Ooyala')
|
return _url_res('ooyala:%s' % embed_code, 'Ooyala')
|
||||||
|
|
||||||
youtube_url = self._html_search_regex(
|
youtube_url = YoutubeIE._extract_url(body)
|
||||||
r'<iframe[^>]+src="(.*youtube\.com/.*)"',
|
|
||||||
body, 'YouTube URL', default=None)
|
|
||||||
if youtube_url:
|
if youtube_url:
|
||||||
return _url_res(youtube_url, 'Youtube')
|
return _url_res(youtube_url, YoutubeIE.ie_key())
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'data-video-url="([^"]+)"',
|
r'data-video-url="([^"]+)"',
|
||||||
|
@@ -263,29 +263,35 @@ class VidmeListBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class VidmeUserIE(VidmeListBaseIE):
|
class VidmeUserIE(VidmeListBaseIE):
|
||||||
IE_NAME = 'vidme:user'
|
IE_NAME = 'vidme:user'
|
||||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
|
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)'
|
||||||
_API_ITEM = 'list'
|
_API_ITEM = 'list'
|
||||||
_TITLE = 'Videos'
|
_TITLE = 'Videos'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://vid.me/EFARCHIVE',
|
'url': 'https://vid.me/MasakoX',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3834632',
|
'id': '16112341',
|
||||||
'title': 'EFARCHIVE - %s' % _TITLE,
|
'title': 'MasakoX - %s' % _TITLE,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 238,
|
'playlist_mincount': 191,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://vid.me/unsQuare_netWork',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
class VidmeUserLikesIE(VidmeListBaseIE):
|
class VidmeUserLikesIE(VidmeListBaseIE):
|
||||||
IE_NAME = 'vidme:user:likes'
|
IE_NAME = 'vidme:user:likes'
|
||||||
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
|
_VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z_-]{6,})/likes'
|
||||||
_API_ITEM = 'likes'
|
_API_ITEM = 'likes'
|
||||||
_TITLE = 'Likes'
|
_TITLE = 'Likes'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://vid.me/ErinAlexis/likes',
|
'url': 'https://vid.me/ErinAlexis/likes',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6483530',
|
'id': '6483530',
|
||||||
'title': 'ErinAlexis - %s' % _TITLE,
|
'title': 'ErinAlexis - %s' % _TITLE,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 415,
|
'playlist_mincount': 415,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://vid.me/Kaleidoscope-Ish/likes',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
@@ -4,12 +4,14 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
compat_HTTPError,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -128,9 +130,16 @@ class ViideaIE(InfoExtractor):
|
|||||||
|
|
||||||
base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
|
base_url = self._proto_relative_url(cfg['livepipe'], 'http:')
|
||||||
|
|
||||||
lecture_data = self._download_json(
|
try:
|
||||||
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
|
lecture_data = self._download_json(
|
||||||
lecture_id)['lecture'][0]
|
'%s/site/api/lecture/%s?format=json' % (base_url, lecture_id),
|
||||||
|
lecture_id)['lecture'][0]
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
msg = self._parse_json(
|
||||||
|
e.cause.read().decode('utf-8'), lecture_id)
|
||||||
|
raise ExtractorError(msg['detail'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
lecture_info = {
|
lecture_info = {
|
||||||
'id': lecture_id,
|
'id': lecture_id,
|
||||||
|
@@ -25,6 +25,7 @@ from ..utils import (
|
|||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
class VKBaseIE(InfoExtractor):
|
class VKBaseIE(InfoExtractor):
|
||||||
@@ -345,11 +346,9 @@ class VKIE(VKBaseIE):
|
|||||||
if re.search(error_re, info_page):
|
if re.search(error_re, info_page):
|
||||||
raise ExtractorError(error_msg % video_id, expected=True)
|
raise ExtractorError(error_msg % video_id, expected=True)
|
||||||
|
|
||||||
youtube_url = self._search_regex(
|
youtube_url = YoutubeIE._extract_url(info_page)
|
||||||
r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
|
|
||||||
info_page, 'youtube iframe', default=None)
|
|
||||||
if youtube_url:
|
if youtube_url:
|
||||||
return self.url_result(youtube_url, 'Youtube')
|
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
vimeo_url = VimeoIE._extract_url(url, info_page)
|
vimeo_url = VimeoIE._extract_url(url, info_page)
|
||||||
if vimeo_url is not None:
|
if vimeo_url is not None:
|
||||||
|
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
|
|||||||
from ..swfinterp import SWFInterpreter
|
from ..swfinterp import SWFInterpreter
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
|
compat_kwargs,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
@@ -245,6 +246,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _download_webpage(self, *args, **kwargs):
|
||||||
|
kwargs.setdefault('query', {})['disable_polymer'] = 'true'
|
||||||
|
return super(YoutubeBaseInfoExtractor, self)._download_webpage(
|
||||||
|
*args, **compat_kwargs(kwargs))
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return
|
return
|
||||||
@@ -1003,6 +1009,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'Skipping DASH manifest',
|
'Skipping DASH manifest',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# The following content has been identified by the YouTube community
|
||||||
|
# as inappropriate or offensive to some audiences.
|
||||||
|
'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6SJNVb0GnPI',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Race Differences in Intelligence',
|
||||||
|
'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
|
||||||
|
'duration': 965,
|
||||||
|
'upload_date': '20140124',
|
||||||
|
'uploader': 'New Century Foundation',
|
||||||
|
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
|
||||||
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
|
||||||
|
'license': 'Standard YouTube License',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# itag 212
|
# itag 212
|
||||||
'url': '1t24XAntNCY',
|
'url': '1t24XAntNCY',
|
||||||
@@ -1347,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
playback_url, video_id, 'Marking watched',
|
playback_url, video_id, 'Marking watched',
|
||||||
'Unable to mark watched', fatal=False)
|
'Unable to mark watched', fatal=False)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
# Embedded YouTube player
|
||||||
|
entries = [
|
||||||
|
unescapeHTML(mobj.group('url'))
|
||||||
|
for mobj in re.finditer(r'''(?x)
|
||||||
|
(?:
|
||||||
|
<iframe[^>]+?src=|
|
||||||
|
data-video-url=|
|
||||||
|
<embed[^>]+?src=|
|
||||||
|
embedSWF\(?:\s*|
|
||||||
|
<object[^>]+data=|
|
||||||
|
new\s+SWFObject\(
|
||||||
|
)
|
||||||
|
(["\'])
|
||||||
|
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||||
|
(?:embed|v|p)/.+?)
|
||||||
|
\1''', webpage)]
|
||||||
|
|
||||||
|
# lazyYT YouTube embed
|
||||||
|
entries.extend(list(map(
|
||||||
|
unescapeHTML,
|
||||||
|
re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
|
||||||
|
|
||||||
|
# Wordpress "YouTube Video Importer" plugin
|
||||||
|
matches = re.findall(r'''(?x)<div[^>]+
|
||||||
|
class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
|
||||||
|
data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
|
||||||
|
entries.extend(m[-1] for m in matches)
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
urls = YoutubeIE._extract_urls(webpage)
|
||||||
|
return urls[0] if urls else None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def extract_id(cls, url):
|
def extract_id(cls, url):
|
||||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||||
@@ -1437,9 +1501,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if dash_mpd and dash_mpd[0] not in dash_mpds:
|
if dash_mpd and dash_mpd[0] not in dash_mpds:
|
||||||
dash_mpds.append(dash_mpd[0])
|
dash_mpds.append(dash_mpd[0])
|
||||||
|
|
||||||
|
is_live = None
|
||||||
|
view_count = None
|
||||||
|
|
||||||
|
def extract_view_count(v_info):
|
||||||
|
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||||
|
|
||||||
# Get video info
|
# Get video info
|
||||||
embed_webpage = None
|
embed_webpage = None
|
||||||
is_live = None
|
|
||||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||||
age_gate = True
|
age_gate = True
|
||||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||||
@@ -1509,6 +1578,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
continue
|
continue
|
||||||
get_video_info = compat_parse_qs(video_info_webpage)
|
get_video_info = compat_parse_qs(video_info_webpage)
|
||||||
add_dash_mpd(get_video_info)
|
add_dash_mpd(get_video_info)
|
||||||
|
if view_count is None:
|
||||||
|
view_count = extract_view_count(get_video_info)
|
||||||
if not video_info:
|
if not video_info:
|
||||||
video_info = get_video_info
|
video_info = get_video_info
|
||||||
if 'token' in get_video_info:
|
if 'token' in get_video_info:
|
||||||
@@ -1592,10 +1663,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
return self.playlist_result(entries, video_id, video_title, video_description)
|
return self.playlist_result(entries, video_id, video_title, video_description)
|
||||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
if 'view_count' in video_info:
|
if view_count is None:
|
||||||
view_count = int(video_info['view_count'][0])
|
view_count = extract_view_count(video_info)
|
||||||
else:
|
|
||||||
view_count = None
|
|
||||||
|
|
||||||
# Check for "rental" videos
|
# Check for "rental" videos
|
||||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||||
@@ -1639,10 +1708,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not upload_date:
|
if not upload_date:
|
||||||
upload_date = self._search_regex(
|
upload_date = self._search_regex(
|
||||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||||
r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
|
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
||||||
video_webpage, 'upload date', default=None)
|
video_webpage, 'upload date', default=None)
|
||||||
if upload_date:
|
|
||||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
video_license = self._html_search_regex(
|
video_license = self._html_search_regex(
|
||||||
@@ -2028,7 +2095,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
|
|
|
|
||||||
(%(playlist_id)s)
|
(%(playlist_id)s)
|
||||||
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
|
||||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
|
||||||
IE_NAME = 'youtube:playlist'
|
IE_NAME = 'youtube:playlist'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@@ -596,7 +596,7 @@ def unescapeHTML(s):
|
|||||||
assert type(s) == compat_str
|
assert type(s) == compat_str
|
||||||
|
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
|
r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
|
||||||
|
|
||||||
|
|
||||||
def get_subprocess_encoding():
|
def get_subprocess_encoding():
|
||||||
@@ -1815,6 +1815,10 @@ def float_or_none(v, scale=1, invscale=1, default=None):
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def bool_or_none(v, default=None):
|
||||||
|
return v if isinstance(v, bool) else default
|
||||||
|
|
||||||
|
|
||||||
def strip_or_none(v):
|
def strip_or_none(v):
|
||||||
return None if v is None else v.strip()
|
return None if v is None else v.strip()
|
||||||
|
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.08.18'
|
__version__ = '2017.09.11'
|
||||||
|
Reference in New Issue
Block a user