Compare commits
228 Commits
2014.11.24
...
2014.12.12
Author | SHA1 | Date | |
---|---|---|---|
|
da3a2d8137 | ||
|
13dcfd41bd | ||
|
e56190b378 | ||
|
a79553f39f | ||
|
b3efb3ebae | ||
|
68d301ffd4 | ||
|
3b0bec8d11 | ||
|
412c617d0f | ||
|
751536f5c8 | ||
|
025f30ba38 | ||
|
0d2fb1d193 | ||
|
82b34105d3 | ||
|
73aeb2dc56 | ||
|
c6973bd412 | ||
|
f8780e6d11 | ||
|
e2f89ec7aa | ||
|
62651c556a | ||
|
bf94e38d3d | ||
|
4f97852316 | ||
|
16040f46d6 | ||
|
d068ba24f3 | ||
|
f5e43bc695 | ||
|
6a5308ab49 | ||
|
63e0f29564 | ||
|
42bdd9d051 | ||
|
4e40de6e2a | ||
|
0fa2b899d1 | ||
|
f17e4c9c28 | ||
|
807962f4a1 | ||
|
9c1aa1d668 | ||
|
69f491f14e | ||
|
cb007f47c1 | ||
|
9abd500a74 | ||
|
cf68bcaeff | ||
|
cbe2bd914d | ||
|
75111274ed | ||
|
624dcebff6 | ||
|
9684f17cde | ||
|
e52a40abf7 | ||
|
0daa05961b | ||
|
158731f83e | ||
|
24270b0301 | ||
|
3c1b81b957 | ||
|
45c24df512 | ||
|
bf671b605e | ||
|
09c82fbc9a | ||
|
3bca0409fe | ||
|
d6f78a354d | ||
|
e0b9d47387 | ||
|
f8795e102b | ||
|
4bb4a18876 | ||
|
8560c61842 | ||
|
a81bbebf44 | ||
|
72e3ffeb74 | ||
|
2fc9f2b41d | ||
|
5f3544baa3 | ||
|
da27660014 | ||
|
b8a6114309 | ||
|
774e208f94 | ||
|
f20b52778b | ||
|
83e865a370 | ||
|
b89a938687 | ||
|
e89a2aabed | ||
|
f58766ce5c | ||
|
15644a40df | ||
|
d4800f3c3f | ||
|
09a5dd2d3b | ||
|
819039ee63 | ||
|
ce36339575 | ||
|
684712076f | ||
|
603c92080f | ||
|
16ae61f655 | ||
|
0ef4d4ab7e | ||
|
4542535f94 | ||
|
6a52eed80e | ||
|
acf5cbfe93 | ||
|
8d1c8cae9c | ||
|
c84890f708 | ||
|
6d0886204a | ||
|
04d02a9d57 | ||
|
6ac4e8065a | ||
|
b82f815f37 | ||
|
158f8cadc0 | ||
|
7d70cf4157 | ||
|
6591fdf51f | ||
|
47d7c64274 | ||
|
db175341c7 | ||
|
9ff6772790 | ||
|
5f9b83944d | ||
|
f6735be4da | ||
|
6a3e0103bb | ||
|
0b5cc1983e | ||
|
1a9f8b1ad4 | ||
|
7115599121 | ||
|
0df23ba9f9 | ||
|
58daf5ebed | ||
|
1a7c6c69d3 | ||
|
045c48847a | ||
|
e638e83662 | ||
|
90644a6843 | ||
|
d958fa9ff9 | ||
|
ebb6419960 | ||
|
122c2f87c1 | ||
|
a154eb3d15 | ||
|
81028ff9eb | ||
|
e8df5cee12 | ||
|
ab07963b5c | ||
|
7e26084d09 | ||
|
4349c07dd7 | ||
|
1139a54d9b | ||
|
b128c9ed68 | ||
|
9776bc7f57 | ||
|
e703fc66c2 | ||
|
39c52bbd32 | ||
|
6219802165 | ||
|
8b97115358 | ||
|
810fb84d5e | ||
|
5f5e993dc6 | ||
|
191cc41ba4 | ||
|
abe70fa044 | ||
|
7f142293df | ||
|
d4e06d4a83 | ||
|
ecd7ea1e6b | ||
|
b92c548693 | ||
|
eecd6a467d | ||
|
dce2a3cf9e | ||
|
9095aa38ac | ||
|
0403b06985 | ||
|
de9bd74bc2 | ||
|
233d37fb6b | ||
|
c627f7d48c | ||
|
163c8babaa | ||
|
6708542099 | ||
|
ea2ee40357 | ||
|
62d8b56655 | ||
|
c492970b4b | ||
|
ac5633592a | ||
|
706d7d4ee7 | ||
|
752c8c9b76 | ||
|
b1399a144d | ||
|
05177b34a6 | ||
|
c41a9650c3 | ||
|
df015c69ea | ||
|
1434bffa1f | ||
|
94aa25b995 | ||
|
d128cfe393 | ||
|
954f36f890 | ||
|
19e92770c9 | ||
|
95c673a148 | ||
|
a196a53265 | ||
|
3266f0c68e | ||
|
1940fadd53 | ||
|
03fd72d996 | ||
|
f2b44a2513 | ||
|
c522adb1f0 | ||
|
7160532d41 | ||
|
4e62ebe250 | ||
|
4472f84f0c | ||
|
b766eb2707 | ||
|
10a404c335 | ||
|
c056efa2e3 | ||
|
283ac8d592 | ||
|
313d4572ce | ||
|
42939b6129 | ||
|
37ea8164d3 | ||
|
8c810a7db3 | ||
|
248a0b890f | ||
|
96b7c7fe3f | ||
|
e987e91fcc | ||
|
cb6444e197 | ||
|
93b8a10e3b | ||
|
4207558e8b | ||
|
ad0d800fc3 | ||
|
e232f787f6 | ||
|
155f9550c0 | ||
|
72476fcc42 | ||
|
29e950f7c8 | ||
|
7c8ea53b96 | ||
|
dcddc10a50 | ||
|
a1008af412 | ||
|
61c0663c1e | ||
|
81a7a521c5 | ||
|
e293711802 | ||
|
ceb3367320 | ||
|
a03aaaed2e | ||
|
e075a44afb | ||
|
8865bdeb37 | ||
|
3aa578cad2 | ||
|
d3b5101a91 | ||
|
5c32110114 | ||
|
24144e3b8d | ||
|
b3034f9df7 | ||
|
4c6d2ff8dc | ||
|
faf3494894 | ||
|
535a66ef66 | ||
|
5c40bba82f | ||
|
855dc479c2 | ||
|
0792d5634e | ||
|
e91cdcae1a | ||
|
27e1400f55 | ||
|
e0938e7731 | ||
|
b72823a0a4 | ||
|
673cf0e773 | ||
|
f8aace93cd | ||
|
80310134e0 | ||
|
4d2d638df4 | ||
|
0e44f90e18 | ||
|
15938ab67a | ||
|
ab4ee31eb1 | ||
|
b061ea6e9f | ||
|
4aae94f9d0 | ||
|
acda92f6bc | ||
|
ddfd0f2727 | ||
|
d0720e7118 | ||
|
4e262a8838 | ||
|
b9ed3af343 | ||
|
63c9b2c1d9 | ||
|
65f3a228b1 | ||
|
3004ae2c3a | ||
|
d9836a5917 | ||
|
be64b5b098 | ||
|
c3e74731c2 | ||
|
c920d7f00d | ||
|
0bbf12239c | ||
|
70d68eb46f | ||
|
c553fe5d29 | ||
|
f0c3d729d7 | ||
|
1cdedfee10 |
6
AUTHORS
6
AUTHORS
@@ -86,3 +86,9 @@ Mauroy Sébastien
|
||||
William Sewell
|
||||
Dao Hoang Son
|
||||
Oskar Jauch
|
||||
Matthew Rayfield
|
||||
t0mm0
|
||||
Tithen-Firion
|
||||
Zack Fernandes
|
||||
cryptonaut
|
||||
Adrian Kretz
|
||||
|
136
CONTRIBUTING.md
Normal file
136
CONTRIBUTING.md
Normal file
@@ -0,0 +1,136 @@
|
||||
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
### Is the description of the issue itself sufficient?
|
||||
|
||||
We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
|
||||
|
||||
So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
|
||||
|
||||
- What the problem is
|
||||
- How it could be fixed
|
||||
- How your proposed solution would look like
|
||||
|
||||
If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
Before reporting any issue, type youtube-dl -U. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
|
||||
|
||||
### Is the issue already documented?
|
||||
|
||||
Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or at https://github.com/rg3/youtube-dl/search?type=Issues . If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
|
||||
|
||||
### Why are existing options not enough?
|
||||
|
||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||
|
||||
### Is there enough context in your bug report?
|
||||
|
||||
People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
|
||||
|
||||
We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
|
||||
|
||||
### Does the issue involve one problem, and one problem only?
|
||||
|
||||
Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
|
||||
|
||||
In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, Whitehouse podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
|
||||
|
||||
### Is anyone going to need the feature?
|
||||
|
||||
Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
|
||||
|
||||
### Is your question about youtube-dl?
|
||||
|
||||
It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
|
||||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
|
||||
|
||||
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
|
||||
|
||||
python -m youtube_dl
|
||||
|
||||
To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
|
||||
|
||||
python -m unittest discover
|
||||
python test/test_download.py
|
||||
nosetests
|
||||
|
||||
If you want to create a build of youtube-dl yourself, you'll need
|
||||
|
||||
* python
|
||||
* make
|
||||
* pandoc
|
||||
* zip
|
||||
* nosetests
|
||||
|
||||
### Adding support for a new site
|
||||
|
||||
If you want to add support for a new site, you can follow this quick list (assuming your service is called `yourextractor`):
|
||||
|
||||
1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
```python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class YourExtractorIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://yourextractor.com/watch/42',
|
||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video title goes here',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
# TODO more properties, either as:
|
||||
# * A value
|
||||
# * MD5 checksum; start the string with md5:
|
||||
# * A regular expression; start the string with re:
|
||||
# * Any Python type (for example int or float)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add youtube_dl/extractor/__init__.py
|
||||
$ git add youtube_dl/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add new extractor'
|
||||
$ git push origin yourextractor
|
||||
|
||||
10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
In any case, thank you very much for your contributions!
|
||||
|
7
Makefile
7
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json CONTRIBUTING.md.tmp
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
@@ -56,6 +56,9 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
README.md: youtube_dl/*.py youtube_dl/*/*.py
|
||||
COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
|
||||
|
||||
CONTRIBUTING.md: README.md
|
||||
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||
|
||||
README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
|
22
README.md
22
README.md
@@ -30,7 +30,7 @@ Alternatively, refer to the developer instructions below for how to check out an
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a small command-line program to download videos from
|
||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||
2.6, 2.7, or 3.3+, and it is not platform specific. It should work on
|
||||
2.6, 2.7, or 3.2+, and it is not platform specific. It should work on
|
||||
your Unix box, on Windows or on Mac OS X. It is released to the public domain,
|
||||
which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
@@ -65,10 +65,10 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: do not read the user
|
||||
configuration in ~/.config/youtube-dl.conf
|
||||
(%APPDATA%/youtube-dl/config.txt on
|
||||
Windows)
|
||||
/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them.
|
||||
|
||||
@@ -93,7 +93,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--no-playlist download only the currently playing video
|
||||
--no-playlist If the URL refers to a video and a
|
||||
playlist, download only the video.
|
||||
--age-limit YEARS download only videos suitable for the given
|
||||
age
|
||||
--download-archive FILE Download only videos not listed in the
|
||||
@@ -492,14 +493,15 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
@@ -534,13 +536,11 @@ Most likely, you'll want to use various options. For a list of what can be done,
|
||||
|
||||
# BUGS
|
||||
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email.
|
||||
Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues> . Unless you were prompted so or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the irc channel #youtube-dl on freenode.
|
||||
|
||||
Please include the full output of the command when run with `--verbose`. The output (including the first lines) contain important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
|
||||
|
||||
For discussions, join us in the irc channel #youtube-dl on freenode.
|
||||
|
||||
When you submit a request, please re-read it once to avoid a couple of mistakes (you can and should use this as a checklist):
|
||||
Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
|
||||
|
||||
### Is the description of the issue itself sufficient?
|
||||
|
||||
|
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
"""
|
||||
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import urllib.request
|
||||
import json
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals, with_statement
|
||||
|
||||
import rsa
|
||||
import json
|
||||
@@ -29,4 +30,5 @@ signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).enc
|
||||
print('signature: ' + signature)
|
||||
|
||||
versions_info['signature'] = signature
|
||||
json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
|
||||
with open('update/versions.json', 'w') as versionsf:
|
||||
json.dump(versions_info, versionsf, indent=4, sort_keys=True)
|
||||
|
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import with_statement
|
||||
from __future__ import with_statement, unicode_literals
|
||||
|
||||
import datetime
|
||||
import glob
|
||||
@@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
|
||||
for fn in glob.glob('*.html*'):
|
||||
with io.open(fn, encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
newc = re.sub(u'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', u'Copyright © 2006-' + year, content)
|
||||
newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
|
||||
if content != newc:
|
||||
tmpFn = fn + '.part'
|
||||
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import io
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
32
devscripts/make_contributing.py
Executable file
32
devscripts/make_contributing.py
Executable file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import argparse
|
||||
import io
|
||||
import re
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'INFILE', help='README.md file name to read from')
|
||||
parser.add_argument(
|
||||
'OUTFILE', help='CONTRIBUTING.md file name to write to')
|
||||
args = parser.parse_args()
|
||||
|
||||
with io.open(args.INFILE, encoding='utf-8') as inf:
|
||||
readme = inf.read()
|
||||
|
||||
bug_text = re.search(
|
||||
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
|
||||
dev_text = re.search(
|
||||
r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING YOUTUBE-DL',
|
||||
readme).group(1)
|
||||
|
||||
out = bug_text + dev_text
|
||||
|
||||
with io.open(args.OUTFILE, 'w', encoding='utf-8') as outf:
|
||||
outf.write(out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import sys
|
||||
import re
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import os.path
|
||||
|
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
4
setup.py
4
setup.py
@@ -102,7 +102,9 @@ setup(
|
||||
"Programming Language :: Python :: 2.6",
|
||||
"Programming Language :: Python :: 2.7",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.3"
|
||||
"Programming Language :: Python :: 3.2",
|
||||
"Programming Language :: Python :: 3.3",
|
||||
"Programming Language :: Python :: 3.4",
|
||||
],
|
||||
|
||||
**params
|
||||
|
@@ -141,7 +141,7 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
if missing_keys:
|
||||
def _repr(v):
|
||||
if isinstance(v, compat_str):
|
||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'")
|
||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
|
||||
else:
|
||||
return repr(v)
|
||||
info_dict_str = ''.join(
|
||||
|
@@ -97,7 +97,7 @@ def generator(test_case):
|
||||
return
|
||||
for other_ie in other_ies:
|
||||
if not other_ie.working():
|
||||
print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
|
||||
return
|
||||
|
||||
params = get_params(test_case.get('params', {}))
|
||||
@@ -143,7 +143,7 @@ def generator(test_case):
|
||||
raise
|
||||
|
||||
if try_num == RETRIES:
|
||||
report_warning(u'Failed due to network errors, skipping...')
|
||||
report_warning('Failed due to network errors, skipping...')
|
||||
return
|
||||
|
||||
print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
|
||||
|
@@ -238,7 +238,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
|
@@ -9,14 +9,13 @@ rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
IGNORED_FILES = [
|
||||
'setup.py', # http://bugs.python.org/issue13943
|
||||
'conf.py',
|
||||
'buildserver.py',
|
||||
]
|
||||
|
||||
|
||||
class TestUnicodeLiterals(unittest.TestCase):
|
||||
def test_all_files(self):
|
||||
print('Skipping this test (not yet fully implemented)')
|
||||
return
|
||||
|
||||
for dirpath, _, filenames in os.walk(rootDir):
|
||||
for basename in filenames:
|
||||
if not basename.endswith('.py'):
|
||||
@@ -30,10 +29,10 @@ class TestUnicodeLiterals(unittest.TestCase):
|
||||
|
||||
if "'" not in code and '"' not in code:
|
||||
continue
|
||||
imps = 'from __future__ import unicode_literals'
|
||||
self.assertTrue(
|
||||
imps in code,
|
||||
' %s missing in %s' % (imps, fn))
|
||||
self.assertRegexpMatches(
|
||||
code,
|
||||
r'(?:#.*\n*)?from __future__ import (?:[a-z_]+,\s*)*unicode_literals',
|
||||
'unicode_literals import missing in %s' % fn)
|
||||
|
||||
m = re.search(r'(?<=\s)u[\'"](?!\)|,|$)', code)
|
||||
if m is not None:
|
||||
|
@@ -47,6 +47,8 @@ from youtube_dl.utils import (
|
||||
js_to_json,
|
||||
intlist_to_bytes,
|
||||
args_to_str,
|
||||
parse_filesize,
|
||||
version_tuple,
|
||||
)
|
||||
|
||||
|
||||
@@ -142,6 +144,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
|
||||
self.assertEqual(
|
||||
unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False),
|
||||
'20141126')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@@ -170,7 +175,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||
|
||||
def test_smuggle_url(self):
|
||||
data = {u"ö": u"ö", u"abc": [3]}
|
||||
data = {"ö": "ö", "abc": [3]}
|
||||
url = 'https://foo.bar/baz?x=y#a'
|
||||
smug_url = smuggle_url(url, data)
|
||||
unsmug_url, unsmug_data = unsmuggle_url(smug_url)
|
||||
@@ -219,6 +224,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_duration('0s'), 0)
|
||||
self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
|
||||
self.assertEqual(parse_duration('T30M38S'), 1838)
|
||||
self.assertEqual(parse_duration('5 s'), 5)
|
||||
self.assertEqual(parse_duration('3 min'), 180)
|
||||
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
@@ -367,5 +375,20 @@ class TestUtil(unittest.TestCase):
|
||||
'foo ba/r -baz \'2 be\' \'\''
|
||||
)
|
||||
|
||||
def test_parse_filesize(self):
|
||||
self.assertEqual(parse_filesize(None), None)
|
||||
self.assertEqual(parse_filesize(''), None)
|
||||
self.assertEqual(parse_filesize('91 B'), 91)
|
||||
self.assertEqual(parse_filesize('foobar'), None)
|
||||
self.assertEqual(parse_filesize('2 MiB'), 2097152)
|
||||
self.assertEqual(parse_filesize('5 GB'), 5000000000)
|
||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||
self.assertEqual(parse_filesize('1,24 KB'), 1240)
|
||||
|
||||
def test_version_tuple(self):
|
||||
self.assertEqual(version_tuple('1'), (1,))
|
||||
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
|
||||
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
@@ -32,7 +33,7 @@ params = get_params({
|
||||
TEST_ID = 'BaW_jenozKc'
|
||||
INFO_JSON_FILE = TEST_ID + '.info.json'
|
||||
DESCRIPTION_FILE = TEST_ID + '.mp4.description'
|
||||
EXPECTED_DESCRIPTION = u'''test chars: "'/\ä↭𝕐
|
||||
EXPECTED_DESCRIPTION = '''test chars: "'/\ä↭𝕐
|
||||
test URL: https://github.com/rg3/youtube-dl/issues/1892
|
||||
|
||||
This is a test video for youtube-dl.
|
||||
@@ -53,11 +54,11 @@ class TestInfoJSON(unittest.TestCase):
|
||||
self.assertTrue(os.path.exists(INFO_JSON_FILE))
|
||||
with io.open(INFO_JSON_FILE, 'r', encoding='utf-8') as jsonf:
|
||||
jd = json.load(jsonf)
|
||||
self.assertEqual(jd['upload_date'], u'20121002')
|
||||
self.assertEqual(jd['upload_date'], '20121002')
|
||||
self.assertEqual(jd['description'], EXPECTED_DESCRIPTION)
|
||||
self.assertEqual(jd['id'], TEST_ID)
|
||||
self.assertEqual(jd['extractor'], 'youtube')
|
||||
self.assertEqual(jd['title'], u'''youtube-dl test video "'/\ä↭𝕐''')
|
||||
self.assertEqual(jd['title'], '''youtube-dl test video "'/\ä↭𝕐''')
|
||||
self.assertEqual(jd['uploader'], 'Philipp Hagemeister')
|
||||
|
||||
self.assertTrue(os.path.exists(DESCRIPTION_FILE))
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
|
@@ -7,6 +7,7 @@ import collections
|
||||
import datetime
|
||||
import errno
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import locale
|
||||
import os
|
||||
@@ -621,23 +622,15 @@ class YoutubeDL(object):
|
||||
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||
extra_info=extra_info, download=False, process=False)
|
||||
|
||||
def make_result(embedded_info):
|
||||
new_result = ie_result.copy()
|
||||
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||
'entries', 'ie_key', 'duration',
|
||||
'subtitles', 'annotations', 'format',
|
||||
'thumbnail', 'thumbnails'):
|
||||
if f in new_result:
|
||||
del new_result[f]
|
||||
if f in embedded_info:
|
||||
new_result[f] = embedded_info[f]
|
||||
return new_result
|
||||
new_result = make_result(info)
|
||||
force_properties = dict(
|
||||
(k, v) for k, v in ie_result.items() if v is not None)
|
||||
for f in ('_type', 'url'):
|
||||
if f in force_properties:
|
||||
del force_properties[f]
|
||||
new_result = info.copy()
|
||||
new_result.update(force_properties)
|
||||
|
||||
assert new_result.get('_type') != 'url_transparent'
|
||||
if new_result.get('_type') == 'compat_list':
|
||||
new_result['entries'] = [
|
||||
make_result(e) for e in new_result['entries']]
|
||||
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
@@ -654,21 +647,28 @@ class YoutubeDL(object):
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
if isinstance(ie_result['entries'], list):
|
||||
n_all_entries = len(ie_result['entries'])
|
||||
entries = ie_result['entries'][playliststart:playlistend]
|
||||
ie_entries = ie_result['entries']
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
else:
|
||||
assert isinstance(ie_result['entries'], PagedList)
|
||||
entries = ie_result['entries'].getslice(
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
else: # iterable
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
"[%s] playlist %s: Downloading %d videos" %
|
||||
(ie_result['extractor'], playlist, n_entries))
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
||||
@@ -787,6 +787,10 @@ class YoutubeDL(object):
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||
# Working around negative timestamps in Windows
|
||||
# (see http://bugs.python.org/issue1646728)
|
||||
if info_dict['timestamp'] < 0 and os.name == 'nt':
|
||||
info_dict['timestamp'] = 0
|
||||
upload_date = datetime.datetime.utcfromtimestamp(
|
||||
info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
@@ -930,8 +934,12 @@ class YoutubeDL(object):
|
||||
if self.params.get('forceid', False):
|
||||
self.to_stdout(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
for f in info_dict['requested_formats']:
|
||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
||||
else:
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||
self.to_stdout(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Execute with
|
||||
# $ python youtube_dl/__main__.py (2.6+)
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||
|
||||
import base64
|
||||
|
@@ -247,7 +247,7 @@ else:
|
||||
userhome = compat_getenv('HOME')
|
||||
elif 'USERPROFILE' in os.environ:
|
||||
userhome = compat_getenv('USERPROFILE')
|
||||
elif not 'HOMEPATH' in os.environ:
|
||||
elif 'HOMEPATH' not in os.environ:
|
||||
return path
|
||||
else:
|
||||
try:
|
||||
@@ -270,7 +270,7 @@ if sys.version_info < (3, 0):
|
||||
print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
|
||||
else:
|
||||
def compat_print(s):
|
||||
assert type(s) == type(u'')
|
||||
assert isinstance(s, compat_str)
|
||||
print(s)
|
||||
|
||||
|
||||
@@ -297,7 +297,9 @@ else:
|
||||
|
||||
# Old 2.6 and 2.7 releases require kwargs to be bytes
|
||||
try:
|
||||
(lambda x: x)(**{'x': 0})
|
||||
def _testfunc(x):
|
||||
pass
|
||||
_testfunc(**{'x': 0})
|
||||
except TypeError:
|
||||
def compat_kwargs(kwargs):
|
||||
return dict((bytes(k), v) for k, v in kwargs.items())
|
||||
|
@@ -4,6 +4,7 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
@@ -28,14 +29,17 @@ class HlsFD(FileDownloader):
|
||||
if check_executable(program, ['-version']):
|
||||
break
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
cmd = [program] + args
|
||||
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
ffpp.check_version()
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (cmd[0], fsize))
|
||||
self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@@ -45,8 +49,8 @@ class HlsFD(FileDownloader):
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'%s exited with code %d' % (program, retval))
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (program, retval))
|
||||
return False
|
||||
|
||||
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
@@ -106,7 +108,7 @@ class HttpFD(FileDownloader):
|
||||
self.report_retry(count, retries)
|
||||
|
||||
if count > retries:
|
||||
self.report_error(u'giving up after %s retries' % retries)
|
||||
self.report_error('giving up after %s retries' % retries)
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
@@ -124,10 +126,10 @@ class HttpFD(FileDownloader):
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
max_data_len = self.params.get("max_filesize", None)
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
@@ -151,13 +153,13 @@ class HttpFD(FileDownloader):
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to open for writing: %s' % str(err))
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'unable to write data: %s' % str(err))
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
return False
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
@@ -188,10 +190,10 @@ class HttpFD(FileDownloader):
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
if tmpfilename != u'-':
|
||||
if tmpfilename != '-':
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..compat import compat_subprocess_get_DEVNULL
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
@@ -13,19 +16,23 @@ class MplayerFD(FileDownloader):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
args = [
|
||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
subprocess.call(
|
||||
['mplayer', '-h'],
|
||||
stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@@ -35,6 +42,6 @@ class MplayerFD(FileDownloader):
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
self.to_stderr('\n')
|
||||
self.report_error('mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
@@ -22,11 +24,13 @@ from .arte import (
|
||||
)
|
||||
from .audiomack import AudiomackIE
|
||||
from .auengine import AUEngineIE
|
||||
from .azubu import AzubuIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bet import BetIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
@@ -36,6 +40,7 @@ from .bpb import BpbIE
|
||||
from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .canal13cl import Canal13clIE
|
||||
@@ -46,7 +51,7 @@ from .cbsnews import CBSNewsIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .cinchcast import CinchcastIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
@@ -118,6 +123,8 @@ from .fktv import (
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .fourtube import FourTubeIE
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
from .franceinter import FranceInterIE
|
||||
from .francetv import (
|
||||
@@ -141,6 +148,7 @@ from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .glide import GlideIE
|
||||
from .globo import GloboIE
|
||||
from .godtube import GodTubeIE
|
||||
@@ -213,6 +221,7 @@ from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
@@ -239,9 +248,10 @@ from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
from .myspace import MySpaceIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvideo import MyVideoIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nbc import (
|
||||
@@ -299,6 +309,7 @@ from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .quickvid import QuickVidIE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import RaiIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
@@ -326,6 +337,7 @@ from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
@@ -373,6 +385,7 @@ from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@@ -393,6 +406,7 @@ from .thesixtyone import TheSixtyOneIE
|
||||
from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
from .tmz import TMZIE
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
@@ -412,6 +426,7 @@ from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twitch import TwitchIE
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
@@ -483,6 +498,7 @@ from .wrzuta import WrzutaIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xminus import XMinusIE
|
||||
from .xnxx import XNXXIE
|
||||
from .xvideos import XVideosIE
|
||||
from .xtube import XTubeUserIE, XTubeIE
|
||||
@@ -512,7 +528,7 @@ from .youtube import (
|
||||
YoutubeUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3SongIE,
|
||||
ZingMp3AlbumIE,
|
||||
|
@@ -1,4 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -18,15 +19,14 @@ class AcademicEarthCourseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
playlist_id = m.group('id')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, u'title')
|
||||
r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<p class="excerpt"[^>]*?>(.*?)</p>',
|
||||
webpage, u'description', fatal=False)
|
||||
webpage, 'description', fatal=False)
|
||||
urls = re.findall(
|
||||
r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">',
|
||||
webpage)
|
||||
|
@@ -15,8 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
|
||||
_TEST = {
|
||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
||||
@@ -29,9 +28,9 @@ class AddAnimeIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError) or \
|
||||
@@ -49,7 +48,7 @@ class AddAnimeIE(InfoExtractor):
|
||||
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
|
||||
redir_webpage)
|
||||
if av is None:
|
||||
raise ExtractorError(u'Cannot find redirect math task')
|
||||
raise ExtractorError('Cannot find redirect math task')
|
||||
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
|
||||
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
|
@@ -2,123 +2,147 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class AdultSwimIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
|
||||
_TEST = {
|
||||
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
|
||||
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': '4da359ec73b58df4575cd01a610ba5dc',
|
||||
'md5': '247572debc75c7652f253c8daa51a14d',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7f02ca02f5',
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
'title': 'Rick and Morty - Pilot Part 1',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': 'ffbdf55af9331c509d95350bd0cc1819',
|
||||
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7f4bd102f6',
|
||||
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
'title': 'Rick and Morty - Pilot Part 4',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
},
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'Rick and Morty - Pilot',
|
||||
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
|
||||
'playlist': [
|
||||
{
|
||||
'md5': 'b92409635540304280b4b6c36bd14a0a',
|
||||
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7fa73c02f7',
|
||||
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
},
|
||||
{
|
||||
'md5': 'e8818891d60e47b29cd89d7b0278156d',
|
||||
'info_dict': {
|
||||
'id': '8a250ba1450996e901453d7fc8ba02f8',
|
||||
'ext': 'flv',
|
||||
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
|
||||
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
|
||||
'uploader': 'Rick and Morty',
|
||||
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
|
||||
}
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
'info_dict': {
|
||||
'title': 'American Dad - Putting Francine Out of Business',
|
||||
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
|
||||
},
|
||||
}]
|
||||
|
||||
_video_extensions = {
|
||||
'3500': 'flv',
|
||||
'640': 'mp4',
|
||||
'150': 'mp4',
|
||||
'ipad': 'm3u8',
|
||||
'iphone': 'm3u8'
|
||||
}
|
||||
_video_dimensions = {
|
||||
'3500': (1280, 720),
|
||||
'640': (480, 270),
|
||||
'150': (320, 180)
|
||||
}
|
||||
@staticmethod
|
||||
def find_video_info(collection, slug):
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return video
|
||||
|
||||
@staticmethod
|
||||
def find_collection_by_linkURL(collections, linkURL):
|
||||
for collection in collections:
|
||||
if collection.get('linkURL') == linkURL:
|
||||
return collection
|
||||
|
||||
@staticmethod
|
||||
def find_collection_containing_video(collections, slug):
|
||||
for collection in collections:
|
||||
for video in collection.get('videos'):
|
||||
if video.get('slug') == slug:
|
||||
return collection, video
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_path = mobj.group('path')
|
||||
show_path = mobj.group('show_path')
|
||||
episode_path = mobj.group('episode_path')
|
||||
is_playlist = True if mobj.group('is_playlist') else False
|
||||
|
||||
webpage = self._download_webpage(url, video_path)
|
||||
episode_id = self._html_search_regex(
|
||||
r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
|
||||
webpage, 'episode_id')
|
||||
title = self._og_search_title(webpage)
|
||||
webpage = self._download_webpage(url, episode_path)
|
||||
|
||||
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
|
||||
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
|
||||
# Extract the value of `bootstrappedData` from the Javascript in the page.
|
||||
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
|
||||
|
||||
episode_el = idoc.find('.//episode')
|
||||
show_title = episode_el.attrib.get('collectionTitle')
|
||||
episode_title = episode_el.attrib.get('title')
|
||||
thumbnail = episode_el.attrib.get('thumbnailUrl')
|
||||
description = episode_el.find('./description').text.strip()
|
||||
try:
|
||||
bootstrappedData = json.loads(bootstrappedDataJS)
|
||||
except ValueError as ve:
|
||||
errmsg = '%s: Failed to parse JSON ' % episode_path
|
||||
raise ExtractorError(errmsg, cause=ve)
|
||||
|
||||
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
|
||||
# NOTE: We are only downloading one video (the current one) not the playlist
|
||||
if is_playlist:
|
||||
collections = bootstrappedData['playlists']['collections']
|
||||
collection = self.find_collection_by_linkURL(collections, show_path)
|
||||
video_info = self.find_video_info(collection, episode_path)
|
||||
|
||||
show_title = video_info['showTitle']
|
||||
segment_ids = [video_info['videoPlaybackID']]
|
||||
else:
|
||||
collections = bootstrappedData['show']['collections']
|
||||
collection, video_info = self.find_collection_containing_video(collections, episode_path)
|
||||
|
||||
show = bootstrappedData['show']
|
||||
show_title = show['title']
|
||||
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||
|
||||
episode_id = video_info['id']
|
||||
episode_title = video_info['title']
|
||||
episode_description = video_info['description']
|
||||
episode_duration = video_info.get('duration')
|
||||
|
||||
entries = []
|
||||
segment_els = episode_el.findall('./segments/segment')
|
||||
for part_num, segment_id in enumerate(segment_ids):
|
||||
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
|
||||
|
||||
for part_num, segment_el in enumerate(segment_els):
|
||||
segment_id = segment_el.attrib.get('id')
|
||||
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
|
||||
thumbnail = segment_el.attrib.get('thumbnailUrl')
|
||||
duration = segment_el.attrib.get('duration')
|
||||
segment_title = '%s - %s' % (show_title, episode_title)
|
||||
if len(segment_ids) > 1:
|
||||
segment_title += ' Part %d' % (part_num + 1)
|
||||
|
||||
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
|
||||
idoc = self._download_xml(
|
||||
segment_url, segment_title,
|
||||
'Downloading segment information', 'Unable to download segment information')
|
||||
|
||||
segment_duration = idoc.find('.//trt').text.strip()
|
||||
|
||||
formats = []
|
||||
file_els = idoc.findall('.//files/file')
|
||||
|
||||
for file_el in file_els:
|
||||
bitrate = file_el.attrib.get('bitrate')
|
||||
type = file_el.attrib.get('type')
|
||||
width, height = self._video_dimensions.get(bitrate, (None, None))
|
||||
ftype = file_el.attrib.get('type')
|
||||
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (bitrate, type),
|
||||
'url': file_el.text,
|
||||
'ext': self._video_extensions.get(bitrate, 'mp4'),
|
||||
'format_id': '%s_%s' % (bitrate, ftype),
|
||||
'url': file_el.text.strip(),
|
||||
# The bitrate may not be a number (for example: 'iphone')
|
||||
'tbr': int(bitrate) if bitrate.isdigit() else None,
|
||||
'height': height,
|
||||
'width': width
|
||||
'quality': 1 if ftype == 'hd' else -1
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
@@ -127,18 +151,16 @@ class AdultSwimIE(InfoExtractor):
|
||||
'id': segment_id,
|
||||
'title': segment_title,
|
||||
'formats': formats,
|
||||
'uploader': show_title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'description': description
|
||||
'duration': segment_duration,
|
||||
'description': episode_description
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': episode_id,
|
||||
'display_id': video_path,
|
||||
'display_id': episode_path,
|
||||
'entries': entries,
|
||||
'title': '%s %s' % (show_title, episode_title),
|
||||
'description': description,
|
||||
'thumbnail': thumbnail
|
||||
'title': '%s - %s' % (show_title, episode_title),
|
||||
'description': episode_description,
|
||||
'duration': episode_duration
|
||||
}
|
||||
|
@@ -1,5 +1,4 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -26,8 +25,7 @@ class AparatIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
@@ -40,15 +38,15 @@ class AparatIE(InfoExtractor):
|
||||
for i, video_url in enumerate(video_urls):
|
||||
req = HEADRequest(video_url)
|
||||
res = self._request_webpage(
|
||||
req, video_id, note=u'Testing video URL %d' % i, errnote=False)
|
||||
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
||||
if res:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'No working video URLs found')
|
||||
raise ExtractorError('No working video URLs found')
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
|
||||
r'\s+image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -80,7 +80,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
s = '<html>' + s + u'</html>'
|
||||
s = '<html>%s</html>' % s
|
||||
return s
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
|
@@ -24,17 +24,17 @@ class AudiomackIE(InfoExtractor):
|
||||
},
|
||||
# hosted on soundcloud via audiomack
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
|
||||
'file': '172419696.mp3',
|
||||
'info_dict':
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '172419696',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
|
||||
'title': 'Young Thug ft Lil Wayne - Take Kare',
|
||||
"upload_date": "20141016",
|
||||
"description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
|
||||
"uploader": "Young Thug World"
|
||||
'uploader': 'Young Thug World',
|
||||
'upload_date': '20141016',
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
93
youtube_dl/extractor/azubu.py
Normal file
93
youtube_dl/extractor/azubu.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
|
||||
'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
|
||||
'info_dict': {
|
||||
'id': '15575',
|
||||
'ext': 'mp4',
|
||||
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
|
||||
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1417523507.334,
|
||||
'upload_date': '20141202',
|
||||
'duration': 9988.7,
|
||||
'uploader': 'GSL',
|
||||
'uploader_id': 414310,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
|
||||
'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
|
||||
'info_dict': {
|
||||
'id': '9344',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
|
||||
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1410530893.320,
|
||||
'upload_date': '20140912',
|
||||
'duration': 172.385,
|
||||
'uploader': 'FnaticTV',
|
||||
'uploader_id': 272749,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
|
||||
|
||||
title = data['title'].strip()
|
||||
description = data['description']
|
||||
thumbnail = data['thumbnail']
|
||||
view_count = data['view_count']
|
||||
uploader = data['user']['username']
|
||||
uploader_id = data['user']['id']
|
||||
|
||||
stream_params = json.loads(data['stream_params'])
|
||||
|
||||
timestamp = float_or_none(stream_params['creationDate'], 1000)
|
||||
duration = float_or_none(stream_params['length'], 1000)
|
||||
|
||||
renditions = stream_params.get('renditions') or []
|
||||
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
|
||||
if video:
|
||||
renditions.append(video)
|
||||
|
||||
formats = [{
|
||||
'url': fmt['url'],
|
||||
'width': fmt['frameWidth'],
|
||||
'height': fmt['frameHeight'],
|
||||
'vbr': float_or_none(fmt['encodingRate'], 1000),
|
||||
'filesize': fmt['size'],
|
||||
'vcodec': fmt['videoCodec'],
|
||||
'container': fmt['videoContainer'],
|
||||
} for fmt in renditions if fmt['url']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -18,7 +18,7 @@ class BambuserIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://bambuser.com/v/4050584',
|
||||
# MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
|
||||
# u'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
'info_dict': {
|
||||
'id': '4050584',
|
||||
'ext': 'flv',
|
||||
|
@@ -1,9 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
@@ -55,7 +56,22 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
|
||||
'info_dict': {
|
||||
'id': 'b03k3pb7',
|
||||
'ext': 'flv',
|
||||
'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
|
||||
'description': '2. Invasion',
|
||||
'duration': 3600,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
@@ -102,6 +118,10 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('id')), expected=True)
|
||||
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media')
|
||||
|
||||
def _extract_connections(self, media):
|
||||
@@ -158,54 +178,73 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
subtitles[lang] = srt
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
group_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
if re.search(r'id="emp-error" class="notinuk">', webpage):
|
||||
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||
expected=True)
|
||||
|
||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||
'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
def _download_media_selector(self, programme_id):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().encode('utf-8'))
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
raise
|
||||
|
||||
formats = []
|
||||
subtitles = None
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
return formats, subtitles
|
||||
|
||||
media_selection = self._download_xml(
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s' % programme_id,
|
||||
programme_id, 'Downloading media selection XML')
|
||||
def _real_extract(self, url):
|
||||
group_id = self._match_id(url)
|
||||
|
||||
for media in self._extract_medias(media_selection):
|
||||
kind = media.get('kind')
|
||||
if kind == 'audio':
|
||||
formats.extend(self._extract_audio(media, programme_id))
|
||||
elif kind == 'video':
|
||||
formats.extend(self._extract_video(media, programme_id))
|
||||
elif kind == 'captions':
|
||||
subtitles = self._extract_captions(media, programme_id)
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
|
||||
if programme_id:
|
||||
player = self._download_json(
|
||||
'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
|
||||
group_id)['jsConf']['player']
|
||||
title = player['title']
|
||||
description = player['subtitle']
|
||||
duration = player['duration']
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
else:
|
||||
playlist = self._download_xml(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % group_id,
|
||||
group_id, 'Downloading playlist XML')
|
||||
|
||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % group_id
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % group_id
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % group_id
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (group_id, reason)
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind != 'programme' and kind != 'radioProgramme':
|
||||
continue
|
||||
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text
|
||||
description = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary').text
|
||||
programme_id = item.get('identifier')
|
||||
duration = int(item.get('duration'))
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(programme_id, subtitles)
|
||||
|
@@ -10,15 +10,15 @@ from ..utils import url_basename
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
|
||||
'md5': '41ad01222b8442089a55528fec43ec01',
|
||||
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
|
||||
'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
|
||||
'info_dict': {
|
||||
'id': '36370',
|
||||
'id': '37127',
|
||||
'ext': 'mp4',
|
||||
'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
|
||||
'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
|
||||
'upload_date': '20140814',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
|
||||
'title': 'What are you passionate about – Marley Blaze',
|
||||
'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
|
||||
'upload_date': '20141205',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
year = mobj.group('year')
|
||||
month = mobj.group('month')
|
||||
day = mobj.group('day')
|
||||
upload_date = year + month + day
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"'file':\s*'([^']+)'",
|
||||
webpage, 'URL base')
|
||||
|
||||
video_id = url_basename(video_url)
|
||||
video_id = video_id.split('_')[0]
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
video_id = url_basename(video_url).split('_')[0]
|
||||
upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'upload_date': upload_date,
|
||||
|
108
youtube_dl/extractor/bet.py
Normal file
108
youtube_dl/extractor/bet.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class BetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
|
||||
'info_dict': {
|
||||
'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
|
||||
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
|
||||
'ext': 'flv',
|
||||
'title': 'BET News Presents: A Conversation With President Obama',
|
||||
'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
|
||||
'duration': 1534,
|
||||
'timestamp': 1418075340,
|
||||
'upload_date': '20141208',
|
||||
'uploader': 'admin',
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
|
||||
'info_dict': {
|
||||
'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
|
||||
'display_id': 'justice-for-ferguson-a-community-reacts',
|
||||
'ext': 'flv',
|
||||
'title': 'Justice for Ferguson: A Community Reacts',
|
||||
'description': 'A BET News special.',
|
||||
'duration': 1696,
|
||||
'timestamp': 1416942360,
|
||||
'upload_date': '20141125',
|
||||
'uploader': 'admin',
|
||||
'thumbnail': 're:(?i)^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_url = compat_urllib_parse.unquote(self._search_regex(
|
||||
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
|
||||
webpage, 'media URL'))
|
||||
|
||||
mrss = self._download_xml(media_url, display_id)
|
||||
|
||||
item = mrss.find('./channel/item')
|
||||
|
||||
NS_MAP = {
|
||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
'ka': 'http://kickapps.com/karss',
|
||||
}
|
||||
|
||||
title = xpath_text(item, './title', 'title')
|
||||
description = xpath_text(
|
||||
item, './description', 'description', fatal=False)
|
||||
|
||||
video_id = xpath_text(item, './guid', 'video id', fatal=False)
|
||||
|
||||
timestamp = parse_iso8601(xpath_text(
|
||||
item, xpath_with_ns('./dc:date', NS_MAP),
|
||||
'upload date', fatal=False))
|
||||
uploader = xpath_text(
|
||||
item, xpath_with_ns('./dc:creator', NS_MAP),
|
||||
'uploader', fatal=False)
|
||||
|
||||
media_content = item.find(
|
||||
xpath_with_ns('./media:content', NS_MAP))
|
||||
duration = int_or_none(media_content.get('duration'))
|
||||
smil_url = media_content.get('url')
|
||||
|
||||
thumbnail = media_content.find(
|
||||
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
|
||||
|
||||
formats = self._extract_smil_formats(smil_url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -4,13 +4,17 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
compat_urlparse,
|
||||
clean_html,
|
||||
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@@ -64,7 +68,39 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'uploader': 'redvsblue',
|
||||
'uploader_id': '792887',
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://blip.tv/play/gbk766dkj4Yn',
|
||||
'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
|
||||
'info_dict': {
|
||||
'id': '1749452',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20090208',
|
||||
'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
|
||||
'title': 'Nostalgia Critic: Transformers',
|
||||
'timestamp': 1234068723,
|
||||
'uploader': 'NostalgiaCritic',
|
||||
'uploader_id': '246467',
|
||||
}
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/pull/4404
|
||||
'note': 'Audio only',
|
||||
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
|
||||
'md5': '76c0a56f24e769ceaab21fbb6416a351',
|
||||
'info_dict': {
|
||||
'id': '7103299',
|
||||
'ext': 'flv',
|
||||
'title': 'Weekly Manga Recap: Kingdom',
|
||||
'description': 'And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay?',
|
||||
'timestamp': 1417660321,
|
||||
'upload_date': '20141204',
|
||||
'uploader': 'The Rollo T',
|
||||
'uploader_id': '407429',
|
||||
'duration': 7251,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -74,11 +110,13 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
# See https://github.com/rg3/youtube-dl/issues/857 and
|
||||
# https://github.com/rg3/youtube-dl/issues/4197
|
||||
if lookup_id:
|
||||
info_page = self._download_webpage(
|
||||
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
video_id = self._search_regex(r'config\.id\s*=\s*"([0-9]+)', info_page, 'video_id')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
urlh = self._request_webpage(
|
||||
'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
url = compat_urlparse.urlparse(urlh.geturl())
|
||||
qs = compat_urlparse.parse_qs(url.query)
|
||||
mobj = re.match(self._VALID_URL, qs['file'][0])
|
||||
|
||||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||
|
||||
@@ -114,7 +152,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
msg = self._download_webpage(
|
||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||
video_id, 'Resolving URL for %s' % role)
|
||||
real_url = compat_urlparse.parse_qs(msg)['message'][0]
|
||||
real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
|
||||
|
||||
media_type = media_content.get('type')
|
||||
if media_type == 'text/srt' or url.endswith('.srt'):
|
||||
@@ -129,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'url': real_url,
|
||||
'format_id': role,
|
||||
'format_note': media_type,
|
||||
'vcodec': media_content.get(blip('vcodec')),
|
||||
'vcodec': media_content.get(blip('vcodec')) or 'none',
|
||||
'acodec': media_content.get(blip('acodec')),
|
||||
'filesize': media_content.get('filesize'),
|
||||
'width': int(media_content.get('width')),
|
||||
'height': int(media_content.get('height')),
|
||||
'width': int_or_none(media_content.get('width')),
|
||||
'height': int_or_none(media_content.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -14,7 +14,6 @@ class BreakIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
|
||||
'md5': '33aa4ff477ecd124d18d7b5d23b87ce5',
|
||||
'info_dict': {
|
||||
'id': '2468056',
|
||||
'ext': 'mp4',
|
||||
|
@@ -265,6 +265,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
url = rend['defaultURL']
|
||||
if not url:
|
||||
continue
|
||||
ext = None
|
||||
if rend['remote']:
|
||||
url_comp = compat_urllib_parse_urlparse(url)
|
||||
if url_comp.path.endswith('.m3u8'):
|
||||
@@ -276,7 +277,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
# akamaihd.net, but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
else:
|
||||
if ext is None:
|
||||
ext = determine_ext(url)
|
||||
size = rend.get('size')
|
||||
formats.append({
|
||||
|
74
youtube_dl/extractor/buzzfeed.py
Normal file
74
youtube_dl/extractor/buzzfeed.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BuzzFeedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia',
|
||||
'info_dict': {
|
||||
'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss',
|
||||
'title': 'This Angry Ram Destroys A Punching Bag Like A Boss',
|
||||
'description': 'Rambro!',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'aVCR29aE_OQ',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141024',
|
||||
'uploader_id': 'Buddhanz1',
|
||||
'description': 'He likes to stay in shape with his heavy bag, he wont stop until its on the ground\n\nFollow Angry Ram on Facebook for regular updates -\nhttps://www.facebook.com/pages/Angry-Ram/1436897249899558?ref=hl',
|
||||
'uploader': 'Buddhanz',
|
||||
'title': 'Angry Ram destroys a punching bag',
|
||||
}
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia',
|
||||
'params': {
|
||||
'skip_download': True, # Got enough YouTube download tests
|
||||
},
|
||||
'info_dict': {
|
||||
'description': 'Munchkin the Teddy Bear is back !',
|
||||
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'mVmBL8B-In0',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20141124',
|
||||
'uploader_id': 'CindysMunchkin',
|
||||
'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
|
||||
'uploader': 'Munchkin the Shih Tzu',
|
||||
'title': 'Munchkin the Teddy Bear gets her exercise',
|
||||
},
|
||||
}]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
all_buckets = re.findall(
|
||||
r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'',
|
||||
webpage)
|
||||
|
||||
entries = []
|
||||
for bd_json in all_buckets:
|
||||
bd = json.loads(bd_json)
|
||||
video = bd.get('video') or bd.get('progload_video')
|
||||
if not video:
|
||||
continue
|
||||
entries.append(self.url_result(video['url']))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'entries': entries,
|
||||
}
|
@@ -45,4 +45,4 @@ class CBSIE(InfoExtractor):
|
||||
real_id = self._search_regex(
|
||||
r"video\.settings\.pid\s*=\s*'([^']+)';",
|
||||
webpage, 'real video ID')
|
||||
return self.url_result(u'theplatform:%s' % real_id)
|
||||
return self.url_result('theplatform:%s' % real_id)
|
||||
|
@@ -236,16 +236,17 @@ class Channel9IE(InfoExtractor):
|
||||
if contents is None:
|
||||
return contents
|
||||
|
||||
session_meta = {'session_code': self._extract_session_code(html),
|
||||
'session_day': self._extract_session_day(html),
|
||||
'session_room': self._extract_session_room(html),
|
||||
'session_speakers': self._extract_session_speakers(html),
|
||||
}
|
||||
session_meta = {
|
||||
'session_code': self._extract_session_code(html),
|
||||
'session_day': self._extract_session_day(html),
|
||||
'session_room': self._extract_session_room(html),
|
||||
'session_speakers': self._extract_session_speakers(html),
|
||||
}
|
||||
|
||||
for content in contents:
|
||||
content.update(session_meta)
|
||||
|
||||
return contents
|
||||
return self.playlist_result(contents)
|
||||
|
||||
def _extract_list(self, content_path):
|
||||
rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS')
|
||||
|
52
youtube_dl/extractor/cinchcast.py
Normal file
52
youtube_dl/extractor/cinchcast.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class CinchcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.cinchcast\.com/.*?assetId=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
# Actual test is run in generic, look for undergroundwellness
|
||||
'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
doc = self._download_xml(
|
||||
'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
item = doc.find('.//item')
|
||||
title = xpath_text(item, './title', fatal=True)
|
||||
date_str = xpath_text(
|
||||
item, './{http://developer.longtailvideo.com/trac/}date')
|
||||
upload_date = unified_strdate(date_str, day_first=False)
|
||||
# duration is present but wrong
|
||||
formats = []
|
||||
formats.append({
|
||||
'format_id': 'main',
|
||||
'url': item.find(
|
||||
'./{http://search.yahoo.com/mrss/}content').attrib['url'],
|
||||
})
|
||||
backup_url = xpath_text(
|
||||
item, './{http://developer.longtailvideo.com/trac/}backupContent')
|
||||
if backup_url:
|
||||
formats.append({
|
||||
'preference': 2, # seems to be more reliable
|
||||
'format_id': 'backup',
|
||||
'url': backup_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
@@ -24,7 +24,7 @@ class ClipfishIE(InfoExtractor):
|
||||
'title': 'FIFA 14 - E3 2013 Trailer',
|
||||
'duration': 82,
|
||||
},
|
||||
u'skip': 'Blocked in the US'
|
||||
'skip': 'Blocked in the US'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -34,7 +34,7 @@ class ClipfishIE(InfoExtractor):
|
||||
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||
(video_id, int(time.time())))
|
||||
doc = self._download_xml(
|
||||
info_url, video_id, note=u'Downloading info page')
|
||||
info_url, video_id, note='Downloading info page')
|
||||
title = doc.find('title').text
|
||||
video_url = doc.find('filename').text
|
||||
if video_url is None:
|
||||
|
@@ -15,23 +15,24 @@ class CNETIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/'
|
||||
_TEST = {
|
||||
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
|
||||
'md5': '041233212a0d06b179c87cbcca1577b8',
|
||||
'info_dict': {
|
||||
'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Hands-on with Microsoft Windows 8.1 Update',
|
||||
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
|
||||
'thumbnail': 're:^http://.*/flmswindows8.jpg$',
|
||||
'uploader_id': 'sarah.mitroff@cbsinteractive.com',
|
||||
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
|
||||
'uploader': 'Sarah Mitroff',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires rtmpdump',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data_json = self._html_search_regex(
|
||||
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
@@ -42,37 +43,31 @@ class CNETIE(InfoExtractor):
|
||||
if not vdata:
|
||||
raise ExtractorError('Cannot find video data')
|
||||
|
||||
mpx_account = data['config']['players']['default']['mpx_account']
|
||||
vid = vdata['files']['rtmp']
|
||||
tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid)
|
||||
|
||||
video_id = vdata['id']
|
||||
title = vdata.get('headline')
|
||||
if title is None:
|
||||
title = vdata.get('title')
|
||||
if title is None:
|
||||
raise ExtractorError('Cannot find title!')
|
||||
description = vdata.get('dek')
|
||||
thumbnail = vdata.get('image', {}).get('path')
|
||||
author = vdata.get('author')
|
||||
if author:
|
||||
uploader = '%s %s' % (author['firstName'], author['lastName'])
|
||||
uploader_id = author.get('email')
|
||||
uploader_id = author.get('id')
|
||||
else:
|
||||
uploader = None
|
||||
uploader_id = None
|
||||
|
||||
formats = [{
|
||||
'format_id': '%s-%s-%s' % (
|
||||
f['type'], f['format'],
|
||||
int_or_none(f.get('bitrate'), 1000, default='')),
|
||||
'url': f['uri'],
|
||||
'tbr': int_or_none(f.get('bitrate'), 1000),
|
||||
} for f in vdata['files']['data']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': tp_link,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -13,6 +13,7 @@ import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -117,6 +118,7 @@ class InfoExtractor(object):
|
||||
|
||||
The following fields are optional:
|
||||
|
||||
alt_title: A secondary title of the video.
|
||||
display_id An alternative identifier for the video, not necessarily
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
@@ -128,7 +130,7 @@ class InfoExtractor(object):
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
@@ -157,8 +159,8 @@ class InfoExtractor(object):
|
||||
|
||||
|
||||
_type "playlist" indicates multiple videos.
|
||||
There must be a key "entries", which is a list or a PagedList object, each
|
||||
element of which is a valid dictionary under this specfication.
|
||||
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "title" and "id" attributes with the same
|
||||
semantics as videos (see above).
|
||||
@@ -173,9 +175,10 @@ class InfoExtractor(object):
|
||||
_type "url" indicates that the video must be extracted from another
|
||||
location, possibly by a different extractor. Its only required key is:
|
||||
"url" - the next URL to extract.
|
||||
|
||||
Additionally, it may have properties believed to be identical to the
|
||||
resolved entity, for example "title" if the title of the referred video is
|
||||
The key "ie_key" can be set to the class name (minus the trailing "IE",
|
||||
e.g. "Youtube") if the extractor class is known in advance.
|
||||
Additionally, the dictionary may have any properties of the resolved entity
|
||||
known in advance, for example "title" if the title of the referred video is
|
||||
known ahead of time.
|
||||
|
||||
|
||||
@@ -296,9 +299,11 @@ class InfoExtractor(object):
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
||||
return (content, urlh)
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||
if m:
|
||||
encoding = m.group(1)
|
||||
@@ -387,6 +392,10 @@ class InfoExtractor(object):
|
||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||
if (not fatal) and json_string is False:
|
||||
return None
|
||||
return self._parse_json(
|
||||
json_string, video_id, transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
||||
if transform_source:
|
||||
json_string = transform_source(json_string)
|
||||
try:
|
||||
@@ -436,7 +445,7 @@ class InfoExtractor(object):
|
||||
return video_info
|
||||
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None):
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
|
||||
"""Returns a playlist"""
|
||||
video_info = {'_type': 'playlist',
|
||||
'entries': entries}
|
||||
@@ -444,6 +453,8 @@ class InfoExtractor(object):
|
||||
video_info['id'] = playlist_id
|
||||
if playlist_title:
|
||||
video_info['title'] = playlist_title
|
||||
if playlist_description:
|
||||
video_info['description'] = playlist_description
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
@@ -787,6 +798,49 @@ class InfoExtractor(object):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
# TODO: improve extraction
|
||||
def _extract_smil_formats(self, smil_url, video_id):
|
||||
smil = self._download_xml(
|
||||
smil_url, video_id, 'Downloading SMIL file',
|
||||
'Unable to download SMIL file')
|
||||
|
||||
base = smil.find('./head/meta').get('base')
|
||||
|
||||
formats = []
|
||||
rtmp_count = 0
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
if not proto:
|
||||
if base:
|
||||
if base.startswith('rtmp'):
|
||||
proto = 'rtmp'
|
||||
elif base.startswith('http'):
|
||||
proto = 'http'
|
||||
ext = video.get('ext')
|
||||
if proto == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
|
||||
elif proto == 'rtmp':
|
||||
rtmp_count += 1
|
||||
streamer = video.get('streamer') or base
|
||||
formats.append({
|
||||
'url': streamer,
|
||||
'play_path': src,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
@@ -815,6 +869,12 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
||||
cookie = compat_cookiejar.Cookie(
|
||||
0, name, value, None, None, domain, None,
|
||||
None, '/', True, False, expire_time, '', None, None, None)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -125,7 +125,7 @@ class EightTracksIE(InfoExtractor):
|
||||
info = {
|
||||
'id': compat_str(track_data['id']),
|
||||
'url': track_data['track_file_stream_url'],
|
||||
'title': track_data['performer'] + u' - ' + track_data['name'],
|
||||
'title': track_data['performer'] + ' - ' + track_data['name'],
|
||||
'raw_title': track_data['name'],
|
||||
'uploader_id': data['user']['login'],
|
||||
'ext': 'm4a',
|
||||
|
@@ -13,9 +13,10 @@ from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
limit_length,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '637842556329505',
|
||||
'ext': 'mp4',
|
||||
'duration': 38,
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
}
|
||||
}, {
|
||||
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'duration': int(video_data['video_duration']),
|
||||
'thumbnail': video_data['thumbnail_src'],
|
||||
'duration': int_or_none(video_data.get('video_duration')),
|
||||
'thumbnail': video_data.get('thumbnail_src'),
|
||||
}
|
||||
|
48
youtube_dl/extractor/foxgay.py
Normal file
48
youtube_dl/extractor/foxgay.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FoxgayIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
|
||||
_TEST = {
|
||||
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
|
||||
'md5': '80d72beab5d04e1655a56ad37afe6841',
|
||||
'info_dict': {
|
||||
'id': '2582',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:6122f7ae0fc6b21ebdf59c5e083ce25a',
|
||||
'description': 'md5:5e51dc4405f1fd315f7927daed2ce5cf',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(?P<title>.*?)</title>',
|
||||
webpage, 'title', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'<div class="ico_desc"><h2>(?P<description>.*?)</h2>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
# Find the URL for the iFrame which contains the actual video.
|
||||
iframe = self._download_webpage(
|
||||
self._html_search_regex(r'iframe src="(?P<frame>.*?)"', webpage, 'video frame'),
|
||||
video_id)
|
||||
video_url = self._html_search_regex(
|
||||
r"v_path = '(?P<vid>http://.*?)'", iframe, 'url')
|
||||
thumb_url = self._html_search_regex(
|
||||
r"t_path = '(?P<thumb>http://.*?)'", iframe, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'description': description,
|
||||
'thumbnail': thumb_url,
|
||||
'age_limit': 18,
|
||||
}
|
94
youtube_dl/extractor/foxnews.py
Normal file
94
youtube_dl/extractor/foxnews.py
Normal file
@@ -0,0 +1,94 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FoxNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
|
||||
'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
|
||||
'info_dict': {
|
||||
'id': '3937480',
|
||||
'ext': 'flv',
|
||||
'title': 'Frozen in Time',
|
||||
'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
|
||||
'duration': 265,
|
||||
'timestamp': 1304411491,
|
||||
'upload_date': '20110503',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
|
||||
'md5': '5846c64a1ea05ec78175421b8323e2df',
|
||||
'info_dict': {
|
||||
'id': '3922535568001',
|
||||
'ext': 'mp4',
|
||||
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
|
||||
'description': "Congressman discusses the president's executive action",
|
||||
'duration': 292,
|
||||
'timestamp': 1417662047,
|
||||
'upload_date': '20141204',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id)
|
||||
|
||||
item = video['channel']['item']
|
||||
title = item['title']
|
||||
description = item['description']
|
||||
timestamp = parse_iso8601(item['dc-date'])
|
||||
|
||||
media_group = item['media-group']
|
||||
duration = None
|
||||
formats = []
|
||||
for media in media_group['media-content']:
|
||||
attributes = media['@attributes']
|
||||
video_url = attributes['url']
|
||||
if video_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
|
||||
elif video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
|
||||
elif not video_url.endswith('.smil'):
|
||||
duration = int_or_none(attributes.get('duration'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': media['media-category']['@attributes']['label'],
|
||||
'preference': 1,
|
||||
'vbr': int_or_none(attributes.get('bitrate')),
|
||||
'filesize': int_or_none(attributes.get('fileSize'))
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
media_thumbnail = media_group['media-thumbnail']['@attributes']
|
||||
thumbnails = [{
|
||||
'url': media_thumbnail['url'],
|
||||
'width': int_or_none(media_thumbnail.get('width')),
|
||||
'height': int_or_none(media_thumbnail.get('height')),
|
||||
}] if media_thumbnail else []
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@@ -40,8 +40,6 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
else:
|
||||
georestricted = False
|
||||
|
||||
|
||||
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
|
@@ -11,7 +11,7 @@ class GamekingsIE(InfoExtractor):
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
u'info_dict': {
|
||||
'info_dict': {
|
||||
'id': '20130811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
|
@@ -445,6 +445,39 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||
}
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
'info_dict': {
|
||||
'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
},
|
||||
# Direct link with incorrect MIME type
|
||||
{
|
||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||
'md5': '4ccbebe5f36706d85221f204d7eb5913',
|
||||
'info_dict': {
|
||||
'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
|
||||
'id': '5_Lennart_Poettering_-_Systemd',
|
||||
'ext': 'webm',
|
||||
'title': '5_Lennart_Poettering_-_Systemd',
|
||||
'upload_date': '20141120',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'URL could be a direct video link, returning it as such.'
|
||||
]
|
||||
},
|
||||
# Cinchcast embed
|
||||
{
|
||||
'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
|
||||
'info_dict': {
|
||||
'id': '7141703',
|
||||
'ext': 'mp3',
|
||||
'upload_date': '20141126',
|
||||
'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -598,10 +631,28 @@ class GenericIE(InfoExtractor):
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||
|
||||
if full_response:
|
||||
webpage = self._webpage_read_content(full_response, url, video_id)
|
||||
else:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if not full_response:
|
||||
full_response = self._request_webpage(url, video_id)
|
||||
|
||||
# Maybe it's a direct link to a video?
|
||||
# Be careful not to download the whole thing!
|
||||
first_bytes = full_response.read(512)
|
||||
if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')):
|
||||
self._downloader.report_warning(
|
||||
'URL could be a direct video link, returning it as such.')
|
||||
upload_date = unified_strdate(
|
||||
head_response.headers.get('Last-Modified'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(url_basename(url))[0],
|
||||
'direct': True,
|
||||
'url': url,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
webpage = self._webpage_read_content(
|
||||
full_response, url, video_id, prefix=first_bytes)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed?
|
||||
@@ -702,6 +753,12 @@ class GenericIE(InfoExtractor):
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for lazyYT YouTube embed
|
||||
matches = re.findall(
|
||||
r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
@@ -914,6 +971,13 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'SBS')
|
||||
|
||||
# Look for embedded Cinchcast player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Cinchcast')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
|
||||
webpage)
|
||||
|
81
youtube_dl/extractor/giantbomb.py
Normal file
81
youtube_dl/extractor/giantbomb.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
qualities,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GiantBombIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
||||
'md5': '57badeface303ecf6b98b812de1b9018',
|
||||
'info_dict': {
|
||||
'id': '2300-9782',
|
||||
'display_id': 'quick-look-destiny-the-dark-below',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quick Look: Destiny: The Dark Below',
|
||||
'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
|
||||
'duration': 2399,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
video = json.loads(unescapeHTML(self._search_regex(
|
||||
r'data-video="([^"]+)"', webpage, 'data-video')))
|
||||
|
||||
duration = int_or_none(video.get('lengthSeconds'))
|
||||
|
||||
quality = qualities([
|
||||
'f4m_low', 'progressive_low', 'f4m_high',
|
||||
'progressive_high', 'f4m_hd', 'progressive_hd'])
|
||||
|
||||
formats = []
|
||||
for format_id, video_url in video['videoStreams'].items():
|
||||
if format_id == 'f4m_stream':
|
||||
continue
|
||||
if video_url.endswith('.f4m'):
|
||||
f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
|
||||
if f4m_formats:
|
||||
f4m_formats[0]['quality'] = quality(format_id)
|
||||
formats.extend(f4m_formats)
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
youtube_id = video.get('youtubeID')
|
||||
if youtube_id:
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -17,7 +17,6 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
'title': 'Suricate - Le Poker',
|
||||
'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.goldenmoustache.com/le-lab-tout-effacer-mc-fly-et-carlito-55249/',
|
||||
@@ -28,7 +27,6 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
'title': 'Le LAB - Tout Effacer (Mc Fly et Carlito)',
|
||||
'description': 'md5:9b7fbf11023fb2250bd4b185e3de3b2a',
|
||||
'thumbnail': 're:^https?://.*\.(?:png|jpg)$',
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -42,9 +40,6 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
r'<title>(.*?)(?: - Golden Moustache)?</title>', webpage, 'title')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<strong>([0-9]+)</strong>\s*VUES</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -53,5 +48,4 @@ class GoldenMoustacheIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
@@ -9,14 +9,15 @@ from ..utils import (
|
||||
determine_ext,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GorillaVidIE(InfoExtractor):
|
||||
IE_DESC = 'GorillaVid.in, daclips.in and movpod.in'
|
||||
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<host>(?:www\.)?
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in))/
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
|
||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||
'''
|
||||
|
||||
@@ -49,6 +50,16 @@ class GorillaVidIE(InfoExtractor):
|
||||
'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
}
|
||||
}, {
|
||||
# video with countdown timeout
|
||||
'url': 'http://fastvideo.in/1qmdn1lmsmbw',
|
||||
'md5': '8b87ec3f6564a3108a0e8e66594842ba',
|
||||
'info_dict': {
|
||||
'id': '1qmdn1lmsmbw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Man of Steel - Trailer',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://movpod.in/0wguyyxi1yca',
|
||||
'only_matching': True,
|
||||
@@ -71,6 +82,12 @@ class GorillaVidIE(InfoExtractor):
|
||||
''', webpage))
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
countdown = int_or_none(self._search_regex(
|
||||
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
|
||||
webpage, 'countdown', default=None))
|
||||
if countdown:
|
||||
self._sleep(countdown, video_id)
|
||||
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
@@ -78,9 +95,13 @@ class GorillaVidIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||
|
||||
title = self._search_regex(r'style="z-index: [0-9]+;">([^<]+)</span>', webpage, 'title')
|
||||
video_url = self._search_regex(r'file\s*:\s*\'(http[^\']+)\',', webpage, 'file url')
|
||||
thumbnail = self._search_regex(r'image\s*:\s*\'(http[^\']+)\',', webpage, 'thumbnail', fatal=False)
|
||||
title = self._search_regex(
|
||||
r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
video_url = self._search_regex(
|
||||
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
@@ -2,57 +2,52 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class GoshgayIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
|
||||
_VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.goshgay.com/video4116282',
|
||||
'md5': '268b9f3c3229105c57859e166dd72b03',
|
||||
'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video',
|
||||
'md5': '027fcc54459dff0feb0bc06a7aeda680',
|
||||
'info_dict': {
|
||||
'id': '4116282',
|
||||
'id': '299069',
|
||||
'ext': 'flv',
|
||||
'title': 'md5:089833a4790b5e103285a07337f245bf',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'title': 'DIESEL SFW XXX Video',
|
||||
'thumbnail': 're:^http://.*\.jpg$',
|
||||
'duration': 79,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2>(.*?)<', webpage, 'title')
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span class="duration">\s*-?\s*(.*?)</span>',
|
||||
webpage, 'duration', fatal=False))
|
||||
family_friendly = self._html_search_meta(
|
||||
'isFamilyFriendly', webpage, default='false')
|
||||
config_url = self._search_regex(
|
||||
r"'config'\s*:\s*'([^']+)'", webpage, 'config URL')
|
||||
|
||||
config = self._download_xml(
|
||||
config_url, video_id, 'Downloading player config XML')
|
||||
|
||||
if config is None:
|
||||
raise ExtractorError('Missing config XML')
|
||||
if config.tag != 'config':
|
||||
raise ExtractorError('Missing config attribute')
|
||||
fns = config.findall('file')
|
||||
if len(fns) < 1:
|
||||
raise ExtractorError('Missing media URI')
|
||||
video_url = fns[0].text
|
||||
|
||||
url_comp = compat_urlparse.urlparse(url)
|
||||
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
|
||||
flashvars = compat_parse_qs(self._html_search_regex(
|
||||
r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"',
|
||||
webpage, 'flashvars'))
|
||||
thumbnail = flashvars.get('url_bigthumb', [None])[0]
|
||||
video_url = flashvars['flv_url'][0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': ref,
|
||||
'duration': duration,
|
||||
'age_limit': 0 if family_friendly == 'true' else 18,
|
||||
}
|
||||
|
@@ -2,9 +2,8 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class HelsinkiIE(InfoExtractor):
|
||||
@@ -24,39 +23,21 @@ class HelsinkiIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
formats = []
|
||||
|
||||
mobj = re.search(r'file=((\w+):[^&]+)', webpage)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'ext': mobj.group(2),
|
||||
'play_path': mobj.group(1),
|
||||
'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
|
||||
'player_url': 'http://video.helsinki.fi/player.swf',
|
||||
'format_note': 'sd',
|
||||
'quality': 0,
|
||||
})
|
||||
|
||||
mobj = re.search(r'hd\.file=((\w+):[^&]+)', webpage)
|
||||
if mobj:
|
||||
formats.append({
|
||||
'ext': mobj.group(2),
|
||||
'play_path': mobj.group(1),
|
||||
'url': 'rtmp://flashvideo.it.helsinki.fi/vod/',
|
||||
'player_url': 'http://video.helsinki.fi/player.swf',
|
||||
'format_note': 'hd',
|
||||
'quality': 1,
|
||||
})
|
||||
|
||||
params = self._parse_json(self._html_search_regex(
|
||||
r'(?s)jwplayer\("player"\).setup\((\{.*?\})\);',
|
||||
webpage, 'player code'), video_id, transform_source=js_to_json)
|
||||
formats = [{
|
||||
'url': s['file'],
|
||||
'ext': 'mp4',
|
||||
} for s in params['sources']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage).replace('Video: ', ''),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,12 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
)
|
||||
@@ -16,25 +17,24 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
|
||||
'file': '1435540.mp3',
|
||||
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
|
||||
'info_dict': {
|
||||
'id': '1435540',
|
||||
'ext': 'mp3',
|
||||
'title': 'Freddie Gibbs - Lay It Down'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
webpage_src = self._download_webpage(url, video_id)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url_base64 = self._search_regex(
|
||||
r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)
|
||||
r'data-path="(.*?)"', webpage, 'video URL', default=None)
|
||||
|
||||
if video_url_base64 is None:
|
||||
video_url = self._search_regex(
|
||||
r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')
|
||||
r'"contentUrl" content="(.*?)"', webpage, 'content URL')
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
reqdata = compat_urllib_parse.urlencode([
|
||||
@@ -59,11 +59,11 @@ class HotNewHipHopIE(InfoExtractor):
|
||||
if video_url.endswith('.html'):
|
||||
raise ExtractorError('Redirect failed')
|
||||
|
||||
video_title = self._og_search_title(webpage_src).strip()
|
||||
video_title = self._og_search_title(webpage).strip()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage_src),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -1,12 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import random
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class HowStuffWorksIE(InfoExtractor):
|
||||
@@ -16,98 +16,74 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
||||
'info_dict': {
|
||||
'id': '450221',
|
||||
'display_id': 'cool-jobs-iditarod-musher',
|
||||
'ext': 'flv',
|
||||
'title': 'Cool Jobs - Iditarod Musher',
|
||||
'description': 'md5:82bb58438a88027b8186a1fccb365f90',
|
||||
'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.',
|
||||
'display_id': 'cool-jobs-iditarod-musher',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 161,
|
||||
},
|
||||
'params': {
|
||||
# md5 is not consistent
|
||||
'skip_download': True
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm',
|
||||
'info_dict': {
|
||||
'id': '453464',
|
||||
'display_id': 'survival-zone-food-and-water-in-the-savanna',
|
||||
'ext': 'mp4',
|
||||
'title': 'Survival Zone: Food and Water In the Savanna',
|
||||
'description': 'md5:7e1c89f6411434970c15fa094170c371',
|
||||
'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.',
|
||||
'display_id': 'survival-zone-food-and-water-in-the-savanna',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# md5 is not consistent
|
||||
'skip_download': True
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
|
||||
'info_dict': {
|
||||
'id': '440011',
|
||||
'display_id': 'sword-swallowing-1-by-dan-meyer',
|
||||
'ext': 'flv',
|
||||
'title': 'Sword Swallowing #1 by Dan Meyer',
|
||||
'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
|
||||
'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>',
|
||||
'display_id': 'sword-swallowing-1-by-dan-meyer',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# md5 is not consistent
|
||||
'skip_download': True
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
clip_js = self._search_regex(
|
||||
r'(?s)var clip = ({.*?});', webpage, 'clip info')
|
||||
clip_info = self._parse_json(
|
||||
clip_js, display_id, transform_source=js_to_json)
|
||||
|
||||
content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
|
||||
|
||||
mp4 = self._search_regex(
|
||||
r'''(?xs)var\s+clip\s*=\s*{\s*
|
||||
.+?\s*
|
||||
content_id\s*:\s*%s\s*,\s*
|
||||
.+?\s*
|
||||
mp4\s*:\s*\[(.*?),?\]\s*
|
||||
};\s*
|
||||
videoData\.push\(clip\);''' % content_id,
|
||||
webpage, 'mp4', fatal=False, default=None)
|
||||
|
||||
smil = self._download_xml(
|
||||
'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
|
||||
content_id, 'Downloading video SMIL')
|
||||
|
||||
http_base = find_xpath_attr(
|
||||
smil,
|
||||
'./{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
|
||||
'name',
|
||||
'httpBase').get('content')
|
||||
|
||||
def random_string(str_len=0):
|
||||
return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
|
||||
|
||||
URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
|
||||
|
||||
video_id = clip_info['content_id']
|
||||
formats = []
|
||||
m3u8_url = clip_info.get('m3u8')
|
||||
if m3u8_url:
|
||||
formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
||||
for video in clip_info.get('mp4', []):
|
||||
formats.append({
|
||||
'url': video['src'],
|
||||
'format_id': video['bitrate'],
|
||||
'vbr': int(video['bitrate'].rstrip('k')),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
smil = self._download_xml(
|
||||
'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id,
|
||||
video_id, 'Downloading video SMIL')
|
||||
|
||||
http_base = find_xpath_attr(
|
||||
smil,
|
||||
'./{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
|
||||
'name',
|
||||
'httpBase').get('content')
|
||||
|
||||
URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A'
|
||||
|
||||
if mp4:
|
||||
for video in json.loads('[%s]' % mp4):
|
||||
bitrate = video['bitrate']
|
||||
fmt = {
|
||||
'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
|
||||
'format_id': bitrate,
|
||||
}
|
||||
m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
|
||||
if m:
|
||||
fmt['vbr'] = int(m.group('vbr'))
|
||||
formats.append(fmt)
|
||||
else:
|
||||
for video in smil.findall(
|
||||
'.//{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
|
||||
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||
'./{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
|
||||
vbr = int_or_none(video.attrib['system-bitrate'], scale=1000)
|
||||
formats.append({
|
||||
'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
|
||||
'format_id': '%dk' % vbr,
|
||||
@@ -116,19 +92,12 @@ class HowStuffWorksIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' : HowStuffWorks'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'id': '%s' % video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'title': unescapeHTML(clip_info['clip_title']),
|
||||
'description': unescapeHTML(clip_info.get('caption')),
|
||||
'thumbnail': clip_info.get('video_still_url'),
|
||||
'duration': clip_info.get('duration'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
72
youtube_dl/extractor/minhateca.py
Normal file
72
youtube_dl/extractor/minhateca.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
class MinhatecaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
|
||||
_TEST = {
|
||||
'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
|
||||
'info_dict': {
|
||||
'id': '125848331',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'filesize_approx': 1530000,
|
||||
'duration': 9,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
token = self._html_search_regex(
|
||||
r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
|
||||
webpage, 'request token')
|
||||
token_data = [
|
||||
('fileId', video_id),
|
||||
('__RequestVerificationToken', token),
|
||||
]
|
||||
req = compat_urllib_request.Request(
|
||||
'http://minhateca.com.br/action/License/Download',
|
||||
data=compat_urllib_parse.urlencode(token_data))
|
||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
data = self._download_json(
|
||||
req, video_id, note='Downloading metadata')
|
||||
|
||||
video_url = data['redirectUrl']
|
||||
title_str = self._html_search_regex(
|
||||
r'<h1.*?>(.*?)</h1>', webpage, 'title')
|
||||
title, _, ext = title_str.rpartition('.')
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<p class="fileSize">(.*?)</p>',
|
||||
webpage, 'file size approximation', fatal=False))
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
|
||||
webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<p class="downloadsCounter">([0-9]+)</p>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': ext,
|
||||
'filesize_approx': filesize_approx,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@@ -70,7 +70,7 @@ class MixcloudIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
|
||||
PREFIX = (
|
||||
r'<div class="cloudcast-play-button-container[^"]*?"'
|
||||
r'<span class="play-button[^"]*?"'
|
||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||
title = self._html_search_regex(
|
||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||
|
@@ -49,7 +49,7 @@ class MooshareIE(InfoExtractor):
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||
|
@@ -164,7 +164,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if mgid is None or ':' not in mgid:
|
||||
mgid = self._search_regex(
|
||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||
webpage, u'mgid')
|
||||
webpage, 'mgid')
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
|
||||
|
@@ -1,64 +1,65 @@
|
||||
import re
|
||||
import json
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class MuzuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)'
|
||||
IE_NAME = u'muzu.tv'
|
||||
IE_NAME = 'muzu.tv'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
u'file': u'1981454.mp4',
|
||||
u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
u'info_dict': {
|
||||
u'title': u'Cat Walk (Original Mix)',
|
||||
u'description': u'md5:90e868994de201b2570e4e5854e19420',
|
||||
u'uploader': u'MarcAshken featuring SOS',
|
||||
'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/',
|
||||
'md5': '98f8b2c7bc50578d6a0364fff2bfb000',
|
||||
'info_dict': {
|
||||
'id': '1981454',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cat Walk (Original Mix)',
|
||||
'description': 'md5:90e868994de201b2570e4e5854e19420',
|
||||
'uploader': 'MarcAshken featuring SOS',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info_data = compat_urllib_parse.urlencode({'format': 'json',
|
||||
'url': url,
|
||||
})
|
||||
video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, u'Downloading video info')
|
||||
info = json.loads(video_info_page)
|
||||
info_data = compat_urllib_parse.urlencode({
|
||||
'format': 'json',
|
||||
'url': url,
|
||||
})
|
||||
info = self._download_json(
|
||||
'http://www.muzu.tv/api/oembed/?%s' % info_data,
|
||||
video_id, 'Downloading video info')
|
||||
|
||||
player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, u'Downloading player info')
|
||||
video_info = json.loads(player_info_page)['videos'][0]
|
||||
player_info = self._download_json(
|
||||
'http://player.muzu.tv/player/playerInit?ai=%s' % video_id,
|
||||
video_id, 'Downloading player info')
|
||||
video_info = player_info['videos'][0]
|
||||
for quality in ['1080', '720', '480', '360']:
|
||||
if video_info.get('v%s' % quality):
|
||||
break
|
||||
|
||||
data = compat_urllib_parse.urlencode({'ai': video_id,
|
||||
# Even if each time you watch a video the hash changes,
|
||||
# it seems to work for different videos, and it will work
|
||||
# even if you use any non empty string as a hash
|
||||
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
|
||||
'device': 'web',
|
||||
'qv': quality,
|
||||
})
|
||||
video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, u'Downloading video url')
|
||||
video_url_info = json.loads(video_url_page)
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'ai': video_id,
|
||||
# Even if each time you watch a video the hash changes,
|
||||
# it seems to work for different videos, and it will work
|
||||
# even if you use any non empty string as a hash
|
||||
'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k',
|
||||
'device': 'web',
|
||||
'qv': quality,
|
||||
})
|
||||
video_url_info = self._download_json(
|
||||
'http://player.muzu.tv/player/requestVideo?%s' % data,
|
||||
video_id, 'Downloading video url')
|
||||
video_url = video_url_info['url']
|
||||
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'description': info['description'],
|
||||
'uploader': info['author_name'],
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'thumbnail': info['thumbnail_url'],
|
||||
'description': info['description'],
|
||||
'uploader': info['author_name'],
|
||||
}
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -7,6 +8,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class MySpaceIE(InfoExtractor):
|
||||
@@ -14,33 +16,58 @@ class MySpaceIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://myspace.com/coldplay/video/viva-la-vida/100008689',
|
||||
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
|
||||
'info_dict': {
|
||||
'id': '100008689',
|
||||
'id': '109594919',
|
||||
'ext': 'flv',
|
||||
'title': 'Viva La Vida',
|
||||
'description': 'The official Viva La Vida video, directed by Hype Williams',
|
||||
'uploader': 'Coldplay',
|
||||
'uploader_id': 'coldplay',
|
||||
'title': 'Little Big Town',
|
||||
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
|
||||
'uploader': 'Five Minutes to the Stage',
|
||||
'uploader_id': 'fiveminutestothestage',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# song
|
||||
# songs
|
||||
{
|
||||
'url': 'https://myspace.com/spiderbags/music/song/darkness-in-my-heart-39008454-27041242',
|
||||
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
|
||||
'info_dict': {
|
||||
'id': '39008454',
|
||||
'id': '93388656',
|
||||
'ext': 'flv',
|
||||
'title': 'Darkness In My Heart',
|
||||
'uploader_id': 'spiderbags',
|
||||
'title': 'Of weakened soul...',
|
||||
'uploader': 'Killsorrow',
|
||||
'uploader_id': 'killsorrow',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'add_ie': ['Vevo'],
|
||||
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
|
||||
'info_dict': {
|
||||
'id': 'USZM20600099',
|
||||
'ext': 'mp4',
|
||||
'title': 'Animal I Have Become',
|
||||
'uploader': 'Three Days Grace',
|
||||
'timestamp': int,
|
||||
'upload_date': '20060502',
|
||||
},
|
||||
'skip': 'VEVO is only available in some countries',
|
||||
}, {
|
||||
'add_ie': ['Youtube'],
|
||||
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
|
||||
'info_dict': {
|
||||
'id': 'ypWvQgnJrSU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starset - First Light',
|
||||
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
|
||||
'uploader': 'Jacob Soren',
|
||||
'uploader_id': 'SorenPromotions',
|
||||
'upload_date': '20140725',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
@@ -48,16 +75,41 @@ class MySpaceIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'playerSwf":"([^"?]*)', webpage, 'player URL')
|
||||
|
||||
if mobj.group('mediatype').startswith('music/song'):
|
||||
# songs don't store any useful info in the 'context' variable
|
||||
song_data = self._search_regex(
|
||||
r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
|
||||
webpage, 'song_data', default=None, group=0)
|
||||
if song_data is None:
|
||||
# some songs in an album are not playable
|
||||
self.report_warning(
|
||||
'%s: No downloadable song on this page' % video_id)
|
||||
return
|
||||
|
||||
def search_data(name):
|
||||
return self._search_regex(
|
||||
r'data-%s="(.*?)"' % name, webpage, name)
|
||||
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
|
||||
song_data, name, default='', group='data')
|
||||
streamUrl = search_data('stream-url')
|
||||
if not streamUrl:
|
||||
vevo_id = search_data('vevo-id')
|
||||
youtube_id = search_data('youtube-id')
|
||||
if vevo_id:
|
||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||
return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
|
||||
elif youtube_id:
|
||||
self.to_screen('Youtube video detected: %s' % youtube_id)
|
||||
return self.url_result(youtube_id, ie='Youtube')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Found song but don\'t know how to download it')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'uploader': search_data('artist-name'),
|
||||
'uploader_id': search_data('artist-username'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
@@ -79,6 +131,50 @@ class MySpaceIE(InfoExtractor):
|
||||
info.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
'player_url': player_url,
|
||||
'ext': 'flv',
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class MySpaceAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'MySpace:album'
|
||||
_VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
|
||||
'info_dict': {
|
||||
'title': 'Transmissions',
|
||||
'id': '19455773',
|
||||
},
|
||||
'playlist_count': 14,
|
||||
'skip': 'this album is only available in some countries',
|
||||
}, {
|
||||
'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
|
||||
'info_dict': {
|
||||
'title': 'The Demo',
|
||||
'id': '18596029',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
display_id = mobj.group('title') + playlist_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError(
|
||||
'%s: No songs found, try using proxy' % display_id,
|
||||
expected=True)
|
||||
entries = [
|
||||
self.url_result(t_path, ie=MySpaceIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': entries,
|
||||
}
|
||||
|
29
youtube_dl/extractor/myvidster.py
Normal file
29
youtube_dl/extractor/myvidster.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MyVidsterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?myvidster\.com/video/(?P<id>\d+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.myvidster.com/video/32059805/Hot_chemistry_with_raw_love_making',
|
||||
'md5': '95296d0231c1363222c3441af62dc4ca',
|
||||
'info_dict': {
|
||||
'id': '3685814',
|
||||
'title': 'md5:7d8427d6d02c4fbcef50fe269980c749',
|
||||
'upload_date': '20141027',
|
||||
'uploader_id': 'utkualp',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['XHamster'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return self.url_result(self._html_search_regex(
|
||||
r'rel="videolink" href="(?P<real_url>.*)">',
|
||||
webpage, 'real video url'))
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_end,
|
||||
@@ -10,8 +8,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class NBAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
|
||||
'info_dict': {
|
||||
@@ -21,12 +19,13 @@ class NBAIE(InfoExtractor):
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
|
||||
@@ -37,7 +36,7 @@ class NBAIE(InfoExtractor):
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(
|
||||
self._html_search_meta('duration', webpage, 'duration', fatal=False))
|
||||
self._html_search_meta('duration', webpage, 'duration'))
|
||||
|
||||
return {
|
||||
'id': shortened_video_id,
|
||||
|
@@ -2,11 +2,15 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse
|
||||
)
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -22,9 +26,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
initial_video_url = info['publishPoint']
|
||||
if info['formats'] == '1':
|
||||
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
|
||||
filename, ext = os.path.splitext(parsed_url.path)
|
||||
path = '%s_sd%s' % (filename, ext)
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'type': 'fvod',
|
||||
'path': initial_video_url.replace('.mp4', '_sd.mp4'),
|
||||
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
|
||||
})
|
||||
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
|
||||
path_doc = self._download_xml(
|
||||
@@ -71,6 +78,17 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
'duration': 0,
|
||||
'upload_date': '20141011',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
|
||||
'md5': 'c78fc64ea01777e426cfc202b746c825',
|
||||
'info_dict': {
|
||||
'id': '58665',
|
||||
'ext': 'flv',
|
||||
'title': 'Classic Game In Six - April 22, 1979',
|
||||
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
|
||||
'duration': 400,
|
||||
'upload_date': '20100129'
|
||||
},
|
||||
}, {
|
||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||
'only_matching': True,
|
||||
@@ -88,7 +106,7 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com:videocenter'
|
||||
IE_DESC = 'NHL videocenter category'
|
||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
|
||||
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
|
||||
_TEST = {
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
|
||||
'info_dict': {
|
||||
@@ -122,7 +140,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||
response = self._download_webpage(request_url, playlist_title)
|
||||
response = self._fix_json(response)
|
||||
if not response.strip():
|
||||
self._downloader.report_warning(u'Got an empty reponse, trying '
|
||||
self._downloader.report_warning('Got an empty reponse, trying '
|
||||
'adding the "newvideos" parameter')
|
||||
response = self._download_webpage(request_url + '&newvideos=true',
|
||||
playlist_title)
|
||||
|
@@ -20,6 +20,7 @@ class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
|
||||
_SUB_LANG_TEMPLATE = '&sub_lang=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
@@ -60,10 +61,12 @@ class NocoIE(InfoExtractor):
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id, note):
|
||||
def _call_api(self, path, video_id, note, sub_lang=None):
|
||||
ts = compat_str(int(time.time() * 1000))
|
||||
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||
if sub_lang:
|
||||
url += self._SUB_LANG_TEMPLATE % sub_lang
|
||||
|
||||
resp = self._download_json(url, video_id, note)
|
||||
|
||||
@@ -91,31 +94,34 @@ class NocoIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
|
||||
for lang, lang_dict in medias['fr']['video_list'].items():
|
||||
for format_id, fmt in lang_dict['quality_list'].items():
|
||||
format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
|
||||
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id)
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id_extended,
|
||||
lang if lang != 'none' else None)
|
||||
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id,
|
||||
'width': fmt['res_width'],
|
||||
'height': fmt['res_lines'],
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'format_id': format_id_extended,
|
||||
'width': fmt['res_width'],
|
||||
'height': fmt['res_lines'],
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):
|
||||
'rtmp_conn': 'B:1',
|
||||
'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
|
||||
'page_url': 'http://www.ntv.ru',
|
||||
'flash_ver': 'LNX 11,2,202,341',
|
||||
'flash_version': 'LNX 11,2,202,341',
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
'filesize': int(size.text),
|
||||
|
@@ -4,6 +4,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
@@ -28,6 +30,11 @@ class PlayvidIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m_error = re.search(
|
||||
r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage)
|
||||
if m_error:
|
||||
raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
|
||||
|
||||
video_title = None
|
||||
duration = None
|
||||
video_thumbnail = None
|
||||
|
@@ -8,7 +8,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -45,13 +44,18 @@ class PornHdIE(InfoExtractor):
|
||||
thumbnail = self._search_regex(
|
||||
r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
|
||||
|
||||
quality = qualities(['SD', 'HD'])
|
||||
formats = [{
|
||||
'url': source['file'],
|
||||
'format_id': '%s-%s' % (source['label'], determine_ext(source['file'])),
|
||||
'quality': quality(source['label']),
|
||||
} for source in json.loads(js_to_json(self._search_regex(
|
||||
r"(?s)'sources'\s*:\s*(\[.+?\])", webpage, 'sources')))]
|
||||
quality = qualities(['sd', 'hd'])
|
||||
sources = json.loads(js_to_json(self._search_regex(
|
||||
r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources')))
|
||||
formats = []
|
||||
for container, s in sources.items():
|
||||
for qname, video_url in s.items():
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'container': container,
|
||||
'format_id': '%s-%s' % (container, qname),
|
||||
'quality': quality(qname),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Im Interview: Kai Wiesinger',
|
||||
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
|
||||
'upload_date': '20140225',
|
||||
'upload_date': '20140203',
|
||||
'duration': 522.56,
|
||||
},
|
||||
'params': {
|
||||
@@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
|
||||
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
|
||||
'upload_date': '20140225',
|
||||
'upload_date': '20141014',
|
||||
'duration': 2410.44,
|
||||
},
|
||||
'params': {
|
||||
@@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
|
||||
'info_dict': {
|
||||
'id': '439664',
|
||||
'title': 'Episode 8 - Ganze Folge - Playlist',
|
||||
'description': 'md5:63b8963e71f481782aeea877658dec84',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
},
|
||||
]
|
||||
|
||||
_CLIPID_REGEXES = [
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
r'clip[iI]d=(\d+)',
|
||||
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
|
||||
]
|
||||
_TITLE_REGEXES = [
|
||||
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||
@@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
|
||||
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
|
||||
]
|
||||
_PAGE_TYPE_REGEXES = [
|
||||
r'<meta name="page_type" content="([^"]+)">',
|
||||
r"'itemType'\s*:\s*'([^']*)'",
|
||||
]
|
||||
_PLAYLIST_ID_REGEXES = [
|
||||
r'content[iI]d=(\d+)',
|
||||
r"'itemId'\s*:\s*'([^']*)'",
|
||||
]
|
||||
_PLAYLIST_CLIP_REGEXES = [
|
||||
r'(?s)data-qvt=.+?<a href="([^"]+)"',
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
def _extract_clip(self, url, webpage):
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
|
||||
access_token = 'testclient'
|
||||
@@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _extract_playlist(self, url, webpage):
|
||||
playlist_id = self._html_search_regex(
|
||||
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
|
||||
for regex in self._PLAYLIST_CLIP_REGEXES:
|
||||
playlist_clips = re.findall(regex, webpage)
|
||||
if playlist_clips:
|
||||
title = self._html_search_regex(
|
||||
self._TITLE_REGEXES, webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||
entries = [
|
||||
self.url_result(
|
||||
re.match('(.+?//.+?)/', url).group(1) + clip_path,
|
||||
'ProSiebenSat1')
|
||||
for clip_path in playlist_clips]
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
page_type = self._search_regex(
|
||||
self._PAGE_TYPE_REGEXES, webpage,
|
||||
'page type', default='clip').lower()
|
||||
if page_type == 'clip':
|
||||
return self._extract_clip(url, webpage)
|
||||
elif page_type == 'playlist':
|
||||
return self._extract_playlist(url, webpage)
|
||||
|
55
youtube_dl/extractor/radiode.py
Normal file
55
youtube_dl/extractor/radiode.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RadioDeIE(InfoExtractor):
|
||||
IE_NAME = 'radio.de'
|
||||
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
|
||||
_TEST = {
|
||||
'url': 'http://ndr2.radio.de/',
|
||||
'md5': '3b4cdd011bc59174596b6145cda474a4',
|
||||
'info_dict': {
|
||||
'id': 'ndr2',
|
||||
'ext': 'mp3',
|
||||
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:591c49c702db1a33751625ebfb67f273',
|
||||
'thumbnail': 're:^https?://.*\.png',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
radio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, radio_id)
|
||||
|
||||
broadcast = json.loads(self._search_regex(
|
||||
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
|
||||
webpage, 'broadcast'))
|
||||
|
||||
title = self._live_title(broadcast['name'])
|
||||
description = broadcast.get('description') or broadcast.get('shortDescription')
|
||||
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
|
||||
|
||||
formats = [{
|
||||
'url': stream['streamUrl'],
|
||||
'ext': stream['streamContentFormat'].lower(),
|
||||
'acodec': stream['streamContentFormat'],
|
||||
'abr': stream['bitRate'],
|
||||
'asr': stream['sampleRate']
|
||||
} for stream in broadcast['streamUrls']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': radio_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,7 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@@ -9,32 +7,23 @@ class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.redtube.com/66418',
|
||||
'file': '66418.mp4',
|
||||
# md5 varies from time to time, as in
|
||||
# https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
|
||||
#'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
'info_dict': {
|
||||
'id': '66418',
|
||||
'ext': 'mp4',
|
||||
"title": "Sucked on a toilet",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
video_extension = 'mp4'
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
|
||||
|
||||
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
|
||||
webpage, u'title')
|
||||
|
||||
webpage, 'title')
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
# No self-labeling, but they describe themselves as
|
||||
@@ -44,7 +33,7 @@ class RedTubeIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': video_extension,
|
||||
'ext': 'mp4',
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': age_limit,
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
class RTSIE(InfoExtractor):
|
||||
IE_DESC = 'RTS.ch'
|
||||
_VALID_URL = r'^https?://(?:www\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-.*?\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?rts\.ch/(?:(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html|play/tv/[^/]+/video/(?P<display_id_new>.+?)\?id=(?P<id_new>[0-9]+))'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -23,6 +23,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': '753b877968ad8afaeddccc374d4256a5',
|
||||
'info_dict': {
|
||||
'id': '3449373',
|
||||
'display_id': 'les-enfants-terribles',
|
||||
'ext': 'mp4',
|
||||
'duration': 1488,
|
||||
'title': 'Les Enfants Terribles',
|
||||
@@ -30,7 +31,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Divers',
|
||||
'upload_date': '19680921',
|
||||
'timestamp': -40280400,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -38,6 +40,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
|
||||
'info_dict': {
|
||||
'id': '5624067',
|
||||
'display_id': 'entre-ciel-et-mer',
|
||||
'ext': 'mp4',
|
||||
'duration': 3720,
|
||||
'title': 'Les yeux dans les cieux - Mon homard au Canada',
|
||||
@@ -45,7 +48,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Passe-moi les jumelles',
|
||||
'upload_date': '20140404',
|
||||
'timestamp': 1396635300,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -53,6 +57,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': 'b4326fecd3eb64a458ba73c73e91299d',
|
||||
'info_dict': {
|
||||
'id': '5745975',
|
||||
'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski',
|
||||
'ext': 'mp4',
|
||||
'duration': 48,
|
||||
'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski',
|
||||
@@ -60,7 +65,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Hockey',
|
||||
'upload_date': '20140403',
|
||||
'timestamp': 1396556882,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Blocked outside Switzerland',
|
||||
},
|
||||
@@ -69,6 +75,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': '9bb06503773c07ce83d3cbd793cebb91',
|
||||
'info_dict': {
|
||||
'id': '5745356',
|
||||
'display_id': 'londres-cachee-par-un-epais-smog',
|
||||
'ext': 'mp4',
|
||||
'duration': 33,
|
||||
'title': 'Londres cachée par un épais smog',
|
||||
@@ -76,7 +83,8 @@ class RTSIE(InfoExtractor):
|
||||
'uploader': 'Le Journal en continu',
|
||||
'upload_date': '20140403',
|
||||
'timestamp': 1396537322,
|
||||
'thumbnail': 're:^https?://.*\.image'
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -84,6 +92,7 @@ class RTSIE(InfoExtractor):
|
||||
'md5': 'dd8ef6a22dff163d063e2a52bc8adcae',
|
||||
'info_dict': {
|
||||
'id': '5706148',
|
||||
'display_id': 'urban-hippie-de-damien-krisl-03-04-2014',
|
||||
'ext': 'mp3',
|
||||
'duration': 123,
|
||||
'title': '"Urban Hippie", de Damien Krisl',
|
||||
@@ -92,22 +101,44 @@ class RTSIE(InfoExtractor):
|
||||
'timestamp': 1396551600,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
|
||||
'md5': '968777c8779e5aa2434be96c54e19743',
|
||||
'info_dict': {
|
||||
'id': '6348260',
|
||||
'display_id': 'le-19h30',
|
||||
'ext': 'mp4',
|
||||
'duration': 1796,
|
||||
'title': 'Le 19h30',
|
||||
'description': '',
|
||||
'uploader': 'Le 19h30',
|
||||
'upload_date': '20141201',
|
||||
'timestamp': 1417458600,
|
||||
'thumbnail': 're:^https?://.*\.image',
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rts.ch/play/tv/le-19h30/video/le-chantier-du-nouveau-parlement-vaudois-a-permis-une-trouvaille-historique?id=6348280',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
video_id = m.group('id') or m.group('id_new')
|
||||
display_id = m.group('display_id') or m.group('display_id_new')
|
||||
|
||||
def download_json(internal_id):
|
||||
return self._download_json(
|
||||
'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
|
||||
video_id)
|
||||
display_id)
|
||||
|
||||
all_info = download_json(video_id)
|
||||
|
||||
# video_id extracted out of URL is not always a real id
|
||||
if 'video' not in all_info and 'audio' not in all_info:
|
||||
page = self._download_webpage(url, video_id)
|
||||
page = self._download_webpage(url, display_id)
|
||||
internal_id = self._html_search_regex(
|
||||
r'<(?:video|audio) data-id="([0-9]+)"', page,
|
||||
'internal video id')
|
||||
@@ -143,6 +174,7 @@ class RTSIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'title': info['title'],
|
||||
'description': info.get('intro'),
|
||||
|
@@ -53,6 +53,7 @@ class RutubeIE(InfoExtractor):
|
||||
m3u8_url = options['video_balancer'].get('m3u8')
|
||||
if m3u8_url is None:
|
||||
raise ExtractorError('Couldn\'t find m3u8 manifest url')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
'id': video['id'],
|
||||
@@ -60,8 +61,7 @@ class RutubeIE(InfoExtractor):
|
||||
'description': video['description'],
|
||||
'duration': video['duration'],
|
||||
'view_count': video['hits'],
|
||||
'url': m3u8_url,
|
||||
'ext': 'mp4',
|
||||
'formats': formats,
|
||||
'thumbnail': video['thumbnail_url'],
|
||||
'uploader': author.get('name'),
|
||||
'uploader_id': compat_str(author['id']) if author else None,
|
||||
|
@@ -5,61 +5,27 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'id': '19911',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'id': '521be8ef82b16',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
}
|
||||
]
|
||||
class ScreenwaveMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage)
|
||||
if not mobj:
|
||||
raise ExtractorError('Can\'t extract embed url and video id')
|
||||
playerdata_url = mobj.group('embed_url')
|
||||
video_id = mobj.group('video_id')
|
||||
full_video_id = mobj.group('full_video_id')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
|
||||
video_id = self._match_id(url)
|
||||
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
|
||||
|
||||
vidtitle = self._search_regex(
|
||||
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
|
||||
vidurl = self._search_regex(
|
||||
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
|
||||
r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/')
|
||||
|
||||
videolist_url = None
|
||||
|
||||
@@ -67,7 +33,7 @@ class CinemassacreIE(InfoExtractor):
|
||||
if mobj:
|
||||
videoserver = mobj.group('videoserver')
|
||||
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
|
||||
vidid = mobj.group('vidid') if mobj else full_video_id
|
||||
vidid = mobj.group('vidid') if mobj else video_id
|
||||
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
|
||||
else:
|
||||
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
|
||||
@@ -85,34 +51,128 @@ class CinemassacreIE(InfoExtractor):
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'), scale=1000)
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate // 1000 if bitrate else None,
|
||||
'tbr': bitrate,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate // 1000 if bitrate else None,
|
||||
'abr': bitrate,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
formats = [{
|
||||
'url': vidurl,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'title': vidtitle,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-19911',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-521be8ef82b16',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
webpage, 'player data URL')
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
'url': playerdata_url,
|
||||
}
|
||||
|
||||
|
||||
class TeamFourIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
|
||||
'info_dict': {
|
||||
'id': 'TeamFourStar-5292a02f20bfa',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130401',
|
||||
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
|
||||
'title': 'A Moment With TFS Episode 4',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
webpage, 'player data URL')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
|
||||
webpage, 'title')
|
||||
video_date = unified_strdate(self._html_search_regex(
|
||||
r'<div class="heroheadingdate">(?P<date>.+?)</div>',
|
||||
webpage, 'date', fatal=False))
|
||||
video_description = self._html_search_regex(
|
||||
r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
'url': playerdata_url,
|
||||
}
|
@@ -39,7 +39,7 @@ class SlideshareIE(InfoExtractor):
|
||||
ext = info['jsplayer']['video_extension']
|
||||
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+(?:style="[^"]*"\s+)?class="description.*?"[^>]*>(.*?)</p>', webpage,
|
||||
r'<p\s+(?:style="[^"]*"\s+)?class=".*?description.*?"[^>]*>(.*?)</p>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
return {
|
||||
|
@@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
||||
|
||||
if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
|
||||
raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
|
||||
raise ExtractorError(
|
||||
'Broadcast %s does not exist' % broadcast_id, expected=True)
|
||||
|
||||
# Adult content
|
||||
if re.search('EroConfirmText">', broadcast_page) is not None:
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError('Erotic broadcasts allowed only for registered users, '
|
||||
'use --username and --password options to provide account credentials.', expected=True)
|
||||
raise ExtractorError(
|
||||
'Erotic broadcasts allowed only for registered users, '
|
||||
'use --username and --password options to provide account credentials.',
|
||||
expected=True)
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
@@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
|
||||
request = compat_urllib_request.Request(
|
||||
broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
|
||||
broadcast_page = self._download_webpage(
|
||||
request, broadcast_id, 'Logging in and confirming age')
|
||||
|
||||
if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
|
||||
raise ExtractorError('Unable to log in: bad username or password', expected=True)
|
||||
@@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
adult_content = False
|
||||
|
||||
ticket = self._html_search_regex(
|
||||
'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
|
||||
r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
|
||||
broadcast_page, 'broadcast ticket')
|
||||
|
||||
url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
|
||||
@@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
if broadcast_password:
|
||||
url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
|
||||
|
||||
broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
|
||||
broadcast_json_page = self._download_webpage(
|
||||
url, broadcast_id, 'Downloading broadcast JSON')
|
||||
|
||||
try:
|
||||
broadcast_json = json.loads(broadcast_json_page)
|
||||
|
||||
protected_broadcast = broadcast_json['_pass_protected'] == 1
|
||||
if protected_broadcast and not broadcast_password:
|
||||
raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
|
||||
raise ExtractorError(
|
||||
'This broadcast is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
broadcast_offline = broadcast_json['is_play'] == 0
|
||||
if broadcast_offline:
|
||||
raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
|
||||
|
||||
rtmp_url = broadcast_json['_server']
|
||||
if not rtmp_url.startswith('rtmp://'):
|
||||
mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
|
||||
if not mobj:
|
||||
raise ExtractorError('Unexpected broadcast rtmp URL')
|
||||
|
||||
broadcast_playpath = broadcast_json['_streamName']
|
||||
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
|
||||
broadcast_thumbnail = broadcast_json['_imgURL']
|
||||
broadcast_title = broadcast_json['title']
|
||||
broadcast_title = self._live_title(broadcast_json['title'])
|
||||
broadcast_description = broadcast_json['description']
|
||||
broadcaster_nick = broadcast_json['nick']
|
||||
broadcaster_login = broadcast_json['login']
|
||||
@@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
'ext': 'flv',
|
||||
'play_path': broadcast_playpath,
|
||||
'player_url': 'http://pics.smotri.com/broadcast_play.swf',
|
||||
'app': broadcast_app,
|
||||
'rtmp_live': True,
|
||||
'rtmp_conn': rtmp_conn
|
||||
'rtmp_conn': rtmp_conn,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@@ -1,4 +1,5 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
@@ -11,13 +12,14 @@ class SohuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||
u'file': u'382479172.mp4',
|
||||
u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
|
||||
u'info_dict': {
|
||||
u'title': u'MV:Far East Movement《The Illest》',
|
||||
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||
'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
|
||||
'info_dict': {
|
||||
'id': '382479172',
|
||||
'ext': 'mp4',
|
||||
'title': 'MV:Far East Movement《The Illest》',
|
||||
},
|
||||
u'skip': u'Only available from China',
|
||||
'skip': 'Only available from China',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -26,11 +28,11 @@ class SohuIE(InfoExtractor):
|
||||
if mytv:
|
||||
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
|
||||
else:
|
||||
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||
data_url = base_data_url + str(vid_id)
|
||||
data_json = self._download_webpage(
|
||||
data_url, video_id,
|
||||
note=u'Downloading JSON data for ' + str(vid_id))
|
||||
note='Downloading JSON data for ' + str(vid_id))
|
||||
return json.loads(data_json)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -39,11 +41,11 @@ class SohuIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
|
||||
webpage, u'video title')
|
||||
webpage, 'video title')
|
||||
title = raw_title.partition('-')[0].strip()
|
||||
|
||||
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
|
||||
u'video path')
|
||||
'video path')
|
||||
data = _fetch_data(vid, mytv)
|
||||
|
||||
QUALITIES = ('ori', 'super', 'high', 'nor')
|
||||
@@ -51,7 +53,7 @@ class SohuIE(InfoExtractor):
|
||||
for q in QUALITIES
|
||||
if data['data'][q + 'Vid'] != 0]
|
||||
if not vid_ids:
|
||||
raise ExtractorError(u'No formats available for this video')
|
||||
raise ExtractorError('No formats available for this video')
|
||||
|
||||
# For now, we just pick the highest available quality
|
||||
vid_id = vid_ids[-1]
|
||||
@@ -69,7 +71,7 @@ class SohuIE(InfoExtractor):
|
||||
(allot, prot, clipsURL[i], su[i]))
|
||||
part_str = self._download_webpage(
|
||||
part_url, video_id,
|
||||
note=u'Downloading part %d of %d' % (i + 1, part_count))
|
||||
note='Downloading part %d of %d' % (i + 1, part_count))
|
||||
|
||||
part_info = part_str.split('|')
|
||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
||||
|
@@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||
(?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)
|
||||
(?:/?\?secret_token=(?P<secret_token>[^&]+))?)
|
||||
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
|
@@ -33,5 +33,6 @@ class SpaceIE(InfoExtractor):
|
||||
# Other videos works fine with the info from the object
|
||||
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if brightcove_url is None:
|
||||
raise ExtractorError(u'The webpage does not contain a video', expected=True)
|
||||
raise ExtractorError(
|
||||
'The webpage does not contain a video', expected=True)
|
||||
return self.url_result(brightcove_url, BrightcoveIE.ie_key())
|
||||
|
@@ -1,7 +1,8 @@
|
||||
from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@@ -17,10 +18,10 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
sub_lang_list = self._get_available_subtitles(video_id, webpage)
|
||||
auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
|
||||
sub_lang = ",".join(list(sub_lang_list.keys()))
|
||||
self.to_screen(u'%s: Available subtitles for video: %s' %
|
||||
self.to_screen('%s: Available subtitles for video: %s' %
|
||||
(video_id, sub_lang))
|
||||
auto_lang = ",".join(auto_captions_list.keys())
|
||||
self.to_screen(u'%s: Available automatic captions for video: %s' %
|
||||
self.to_screen('%s: Available automatic captions for video: %s' %
|
||||
(video_id, auto_lang))
|
||||
|
||||
def extract_subtitles(self, video_id, webpage):
|
||||
@@ -51,7 +52,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
sub_lang_list = {}
|
||||
for sub_lang in requested_langs:
|
||||
if sub_lang not in available_subs_list:
|
||||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
|
||||
self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
|
||||
continue
|
||||
sub_lang_list[sub_lang] = available_subs_list[sub_lang]
|
||||
|
||||
@@ -70,10 +71,10 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
try:
|
||||
sub = self._download_subtitle_url(sub_lang, url)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
|
||||
return
|
||||
if not sub:
|
||||
self._downloader.report_warning(u'Did not fetch video subtitles')
|
||||
self._downloader.report_warning('Did not fetch video subtitles')
|
||||
return
|
||||
return sub
|
||||
|
||||
@@ -94,5 +95,5 @@ class SubtitlesInfoExtractor(InfoExtractor):
|
||||
Must be redefined by the subclasses that support automatic captions,
|
||||
otherwise it will return {}
|
||||
"""
|
||||
self._downloader.report_warning(u'Automatic Captions not supported by this server')
|
||||
self._downloader.report_warning('Automatic Captions not supported by this server')
|
||||
return {}
|
||||
|
@@ -4,10 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_filesize
|
||||
|
||||
|
||||
class TagesschauIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?:sendung/ts|video/video)(?P<id>-?[0-9]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
|
||||
@@ -19,6 +20,16 @@ class TagesschauIE(InfoExtractor):
|
||||
'description': 'md5:69da3c61275b426426d711bde96463ab',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||
'md5': '3c54c1f6243d279b706bde660ceec633',
|
||||
'info_dict': {
|
||||
'id': '5727',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:695c01bfd98b7e313c501386327aea59',
|
||||
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
}
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
@@ -28,42 +39,82 @@ class TagesschauIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if video_id.startswith('-'):
|
||||
display_id = video_id.strip('-')
|
||||
else:
|
||||
display_id = video_id
|
||||
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id.lstrip('-')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerpage = self._download_webpage(
|
||||
'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
|
||||
display_id, 'Downloading player page')
|
||||
player_url = self._html_search_meta(
|
||||
'twitter:player', webpage, 'player URL', default=None)
|
||||
if player_url:
|
||||
playerpage = self._download_webpage(
|
||||
player_url, display_id, 'Downloading player page')
|
||||
|
||||
medias = re.findall(
|
||||
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
|
||||
playerpage)
|
||||
|
||||
formats = []
|
||||
for url, ext, res in medias:
|
||||
f = {
|
||||
'format_id': res + '_' + ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
medias = re.findall(
|
||||
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
|
||||
playerpage)
|
||||
formats = []
|
||||
for url, ext, res in medias:
|
||||
f = {
|
||||
'format_id': res + '_' + ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
|
||||
title = self._og_search_title(webpage).strip()
|
||||
description = self._og_search_description(webpage).strip()
|
||||
else:
|
||||
download_text = self._search_regex(
|
||||
r'(?s)<p>Wir bieten dieses Video in folgenden Formaten zum Download an:</p>\s*<div class="controls">(.*?)</div>\s*<p>',
|
||||
webpage, 'download links')
|
||||
links = re.finditer(
|
||||
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
|
||||
download_text)
|
||||
formats = []
|
||||
for l in links:
|
||||
format_id = self._search_regex(
|
||||
r'.*/[^/.]+\.([^/]+)\.[^/.]+', l.group('url'), 'format ID')
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'url': l.group('url'),
|
||||
'format_name': l.group('name'),
|
||||
}
|
||||
m = re.match(
|
||||
r'''(?x)
|
||||
Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
|
||||
(?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
|
||||
(?P<vbr>[0-9]+)kbps&\#10;
|
||||
Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
|
||||
Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
|
||||
l.group('title'))
|
||||
if m:
|
||||
format.update({
|
||||
'format_note': m.group('audio_desc'),
|
||||
'vcodec': m.group('vcodec'),
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
'abr': int(m.group('abr')),
|
||||
'vbr': int(m.group('vbr')),
|
||||
'filesize_approx': parse_filesize(m.group('filesize_approx')),
|
||||
})
|
||||
formats.append(format)
|
||||
thumbnail_fn = self._search_regex(
|
||||
r'(?s)<img alt="Sendungsbild".*?src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<p class="teasertext">(.*?)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
title = self._html_search_regex(
|
||||
r'<span class="headline".*?>(.*?)</span>', webpage, 'title')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
|
||||
thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': self._og_search_title(webpage).strip(),
|
||||
'thumbnail': 'http://www.tagesschau.de' + thumbnail,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage).strip(),
|
||||
'description': description,
|
||||
}
|
||||
|
62
youtube_dl/extractor/tass.py
Normal file
62
youtube_dl/extractor/tass.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class TassIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tass.ru/obschestvo/1586870',
|
||||
'md5': '3b4cdd011bc59174596b6145cda474a4',
|
||||
'info_dict': {
|
||||
'id': '1586870',
|
||||
'ext': 'mp4',
|
||||
'title': 'Посетителям московского зоопарка показали красную панду',
|
||||
'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://itar-tass.com/obschestvo/1600009',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
sources = json.loads(js_to_json(self._search_regex(
|
||||
r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))
|
||||
|
||||
quality = qualities(['sd', 'hd'])
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
video_url = source.get('file')
|
||||
if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
|
||||
continue
|
||||
label = source.get('label')
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': label,
|
||||
'quality': quality(label),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
@@ -199,8 +199,9 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
config_json = self._html_search_regex(
|
||||
r"data-config='([^']+)", webpage, 'config')
|
||||
config = json.loads(config_json)
|
||||
r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>',
|
||||
webpage, 'config')
|
||||
config = json.loads(config_json)['config']
|
||||
video_url = config['video']['url']
|
||||
thumbnail = config.get('image', {}).get('url')
|
||||
|
||||
|
32
youtube_dl/extractor/tmz.py
Normal file
32
youtube_dl/extractor/tmz.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TMZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tmz\.com/videos/(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.tmz.com/videos/0_okj015ty/',
|
||||
'md5': '791204e3bf790b1426cb2db0706184c0',
|
||||
'info_dict': {
|
||||
'id': '0_okj015ty',
|
||||
'url': 'http://tmz.vo.llnwd.net/o28/2014-03/13/0_okj015ty_0_rt8ro3si_2.mp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kim Kardashian\'s Boobs Unlock a Mystery!',
|
||||
'description': 'Did Kim Kardasain try to one-up Khloe by one-upping Kylie??? Or is she just showing off her amazing boobs?',
|
||||
'thumbnail': 'http://cdnbakmi.kaltura.com/p/591531/sp/59153100/thumbnail/entry_id/0_okj015ty/version/100002/acv/182/width/640',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._html_search_meta('VideoURL', webpage, fatal=True),
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._html_search_meta('ThumbURL', webpage),
|
||||
}
|
@@ -1,28 +1,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TriluliluIE(InfoExtractor):
|
||||
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?trilulilu\.ro/video-[^/]+/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
|
||||
u'file': u"big-buck-bunny-1.mp4",
|
||||
u'info_dict': {
|
||||
u"title": u"Big Buck Bunny",
|
||||
u"description": u":) pentru copilul din noi",
|
||||
'url': 'http://www.trilulilu.ro/video-animatie/big-buck-bunny-1',
|
||||
'info_dict': {
|
||||
'id': 'big-buck-bunny-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': ':) pentru copilul din noi',
|
||||
},
|
||||
# Server ignores Range headers (--test)
|
||||
u"params": {
|
||||
u"skip_download": True
|
||||
'params': {
|
||||
'skip_download': True
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
@@ -30,20 +30,20 @@ class TriluliluIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
log_str = self._search_regex(
|
||||
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
|
||||
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, 'log info')
|
||||
log = json.loads(log_str)
|
||||
|
||||
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||
u'video-formats2' % log)
|
||||
format_url = ('http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||
'video-formats2' % log)
|
||||
format_doc = self._download_xml(
|
||||
format_url, video_id,
|
||||
note=u'Downloading formats',
|
||||
errnote=u'Error while downloading formats')
|
||||
note='Downloading formats',
|
||||
errnote='Error while downloading formats')
|
||||
|
||||
video_url_template = (
|
||||
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||
u'&source=site&hash=%(hash)s&username=%(userid)s&'
|
||||
u'key=ministhebest&format=%%s&sig=&exp=' %
|
||||
'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||
'&source=site&hash=%(hash)s&username=%(userid)s&'
|
||||
'key=ministhebest&format=%%s&sig=&exp=' %
|
||||
log)
|
||||
formats = [
|
||||
{
|
||||
|
@@ -73,7 +73,7 @@ class TudouIE(InfoExtractor):
|
||||
result = []
|
||||
len_parts = len(parts)
|
||||
if len_parts > 1:
|
||||
self.to_screen(u'%s: found %s parts' % (video_id, len_parts))
|
||||
self.to_screen('%s: found %s parts' % (video_id, len_parts))
|
||||
for part in parts:
|
||||
part_id = part['k']
|
||||
final_url = self._url_for_id(part_id, quality)
|
||||
|
@@ -19,6 +19,7 @@ class TuneInIE(InfoExtractor):
|
||||
|tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
|
||||
)
|
||||
'''
|
||||
_API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
|
||||
|
||||
_INFO_DICT = {
|
||||
'id': '34682',
|
||||
@@ -56,13 +57,10 @@ class TuneInIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
station_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, station_id, note='Downloading station webpage')
|
||||
station_info = self._download_json(
|
||||
self._API_URL_TEMPLATE.format(station_id),
|
||||
station_id, note='Downloading station JSON')
|
||||
|
||||
payload = self._html_search_regex(
|
||||
r'(?m)TuneIn\.payload\s*=\s*(\{[^$]+?)$', webpage, 'JSON data')
|
||||
json_data = json.loads(payload)
|
||||
station_info = json_data['Station']['broadcast']
|
||||
title = station_info['Title']
|
||||
thumbnail = station_info.get('Logo')
|
||||
location = station_info.get('Location')
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user