Compare commits
197 Commits
2015.01.23
...
2015.02.02
Author | SHA1 | Date | |
---|---|---|---|
3a4cca687f | |||
7d3d06a16c | |||
c21b1fbeeb | |||
f920ce295e | |||
7a7bd19c45 | |||
8f4b58d70e | |||
3fd45e03bf | |||
869b4aeff4 | |||
cc9ca3ba6e | |||
ea71034bd3 | |||
9fffd0469f | |||
ae7773942e | |||
469a64cebf | |||
aae3fdcfae | |||
6a66904f8e | |||
78271e3319 | |||
92bf0bcdf8 | |||
1283204917 | |||
6789defea9 | |||
acf2a6e97b | |||
8cfb6efe6f | |||
04edb9caf5 | |||
044131ba21 | |||
0a7055c90d | |||
9e3f19919a | |||
4a3da4ebdb | |||
027008b14e | |||
c6df692466 | |||
acf757f42e | |||
dd8982f19c | |||
654bd52f58 | |||
a9551e9020 | |||
4e980275b5 | |||
c172440ac5 | |||
e332772531 | |||
437cac8cc1 | |||
9f281cacd2 | |||
748a0fab8a | |||
c1f06d6307 | |||
c4e817ce4a | |||
9a3e5e6955 | |||
228d30ed06 | |||
057c0609fc | |||
17d2712d9c | |||
fc09240e24 | |||
146303136f | |||
96aded8d3d | |||
2886be15aa | |||
ca0f500ecf | |||
29aef5a33c | |||
9158b2b301 | |||
0196149c5b | |||
8f9312c387 | |||
439b9a9e9b | |||
8c72beb25e | |||
1ee94db2d0 | |||
e77d2975af | |||
e41b1f7385 | |||
cd596028d6 | |||
cc57bd33a8 | |||
6d593c3276 | |||
91755ee384 | |||
0692ef86ef | |||
439d9be27d | |||
b80505a409 | |||
e4c17d7274 | |||
2c58674e0e | |||
ef1269fb07 | |||
e525d9a3df | |||
20b4492c71 | |||
dee3f73787 | |||
d543bdc351 | |||
c7ff0c6422 | |||
01c46659c4 | |||
b04b885271 | |||
dc35bfd2d5 | |||
70fca8d694 | |||
a52c633536 | |||
7b6c60393e | |||
83e7a314b4 | |||
749f2ca044 | |||
5468ff4d91 | |||
1d2daaea63 | |||
52585fd6dc | |||
c03844a4ec | |||
6449cd807e | |||
e2a08185c6 | |||
5d6677ca28 | |||
5a8a29cfea | |||
c1708b89c0 | |||
83fddfd493 | |||
1798791df1 | |||
6ebb0dca9f | |||
cf8d6ec865 | |||
f452f72c6b | |||
3198291f26 | |||
02c1d5e285 | |||
ec4161a57d | |||
03d8d4df38 | |||
03d2d6d51b | |||
83fda3c000 | |||
4fe8495a23 | |||
a16f6643f0 | |||
adc0ae3ceb | |||
7bb3ceb4c7 | |||
75a4fc5b72 | |||
87673cd438 | |||
f345fe9db7 | |||
e683a48d0e | |||
a7a14d9586 | |||
219337990b | |||
376a770cc4 | |||
7e500dbd93 | |||
affd04a45d | |||
c84130e865 | |||
4f264c02c7 | |||
d205476103 | |||
367cc95aa7 | |||
206dba27a4 | |||
dcf53d4408 | |||
63be3b8989 | |||
18b4e9e79d | |||
cb454b333d | |||
e0d9f85aee | |||
b04fbd789c | |||
aad9556414 | |||
48a1e5141a | |||
0865f397ae | |||
796df3c631 | |||
a28383834b | |||
3a0d2f520a | |||
6348ad12a0 | |||
fe7710cbcc | |||
2103d038b3 | |||
6ca85be6f8 | |||
9f0df77ab1 | |||
e72c7e4123 | |||
2b1bd292ae | |||
71e7da6533 | |||
80a49d3d7b | |||
d862a4f94f | |||
a57e8ce658 | |||
96a53167fa | |||
6d2749aac4 | |||
b1b0b1ca30 | |||
3dee7826e7 | |||
c9326b38b8 | |||
d4f64cabf4 | |||
fe41ddbb28 | |||
ee69b99af6 | |||
767ff0a2d1 | |||
8604e882a8 | |||
cc1237f484 | |||
37f4ce538a | |||
7d346331b5 | |||
e1ccc04e9f | |||
881e6a1f5c | |||
baeaeffce5 | |||
c14e88f0f5 | |||
8940b8608e | |||
ec82d85acd | |||
cfb56d1af3 | |||
1e10802990 | |||
6695916045 | |||
7906d199a1 | |||
1070711d60 | |||
4b405cfc6e | |||
e5660ee6ae | |||
8011fba3ae | |||
587a9c2749 | |||
e1554a407d | |||
3fcfb8e9fa | |||
384b62028a | |||
b95aab8482 | |||
fc2d6abfe7 | |||
27de5625d4 | |||
6aa4f54d66 | |||
222516d97d | |||
a055469faf | |||
fdaaaaa878 | |||
12d1fb5aa9 | |||
48f00d15b1 | |||
3e055aa5c3 | |||
6896a52721 | |||
5779b3e1fe | |||
62cd676c74 | |||
0c17278843 | |||
d229ee70da | |||
26e274666d | |||
ebd46aed51 | |||
e793f7671c | |||
c2e64f71d0 | |||
0920e5830f | |||
bf7fa94ec7 | |||
5a000b45b3 | |||
40b1cbafac | |||
4231235cda |
@ -4,6 +4,9 @@ python:
|
|||||||
- "2.7"
|
- "2.7"
|
||||||
- "3.3"
|
- "3.3"
|
||||||
- "3.4"
|
- "3.4"
|
||||||
|
before_install:
|
||||||
|
- sudo apt-get update -qq
|
||||||
|
- sudo apt-get install -yqq rtmpdump
|
||||||
script: nosetests test --verbose
|
script: nosetests test --verbose
|
||||||
notifications:
|
notifications:
|
||||||
email:
|
email:
|
||||||
|
4
AUTHORS
4
AUTHORS
@ -104,3 +104,7 @@ Ondřej Caletka
|
|||||||
Dinesh S
|
Dinesh S
|
||||||
Johan K. Jensen
|
Johan K. Jensen
|
||||||
Yen Chi Hsuan
|
Yen Chi Hsuan
|
||||||
|
Enam Mijbah Noor
|
||||||
|
David Luhmer
|
||||||
|
Shaya Goldberg
|
||||||
|
Paul Hartmann
|
||||||
|
50
README.md
50
README.md
@ -93,6 +93,14 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
## Video Selection:
|
## Video Selection:
|
||||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||||
--playlist-end NUMBER playlist video to end at (default is last)
|
--playlist-end NUMBER playlist video to end at (default is last)
|
||||||
|
--playlist-items ITEM_SPEC playlist video items to download. Specify
|
||||||
|
indices of the videos in the playlist
|
||||||
|
seperated by commas like: "--playlist-items
|
||||||
|
1,2,5,8" if you want to download videos
|
||||||
|
indexed 1, 2, 5, 8 in the playlist. You can
|
||||||
|
specify range: "--playlist-items
|
||||||
|
1-3,7,10-13", it will download the videos
|
||||||
|
at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||||
--match-title REGEX download only matching titles (regex or
|
--match-title REGEX download only matching titles (regex or
|
||||||
caseless sub-string)
|
caseless sub-string)
|
||||||
--reject-title REGEX skip download for matching titles (regex or
|
--reject-title REGEX skip download for matching titles (regex or
|
||||||
@ -124,7 +132,8 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
## Download Options:
|
## Download Options:
|
||||||
-r, --rate-limit LIMIT maximum download rate in bytes per second
|
-r, --rate-limit LIMIT maximum download rate in bytes per second
|
||||||
(e.g. 50K or 4.2M)
|
(e.g. 50K or 4.2M)
|
||||||
-R, --retries RETRIES number of retries (default is 10)
|
-R, --retries RETRIES number of retries (default is 10), or
|
||||||
|
"infinite".
|
||||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
|
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
|
||||||
(default is 1024)
|
(default is 1024)
|
||||||
--no-resize-buffer do not automatically adjust the buffer
|
--no-resize-buffer do not automatically adjust the buffer
|
||||||
@ -132,6 +141,11 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
automatically resized from an initial value
|
automatically resized from an initial value
|
||||||
of SIZE.
|
of SIZE.
|
||||||
--playlist-reverse Download playlist videos in reverse order
|
--playlist-reverse Download playlist videos in reverse order
|
||||||
|
--xattr-set-filesize (experimental) set file xattribute
|
||||||
|
ytdl.filesize with expected filesize
|
||||||
|
--external-downloader COMMAND (experimental) Use the specified external
|
||||||
|
downloader. Currently supports
|
||||||
|
aria2c,curl,wget
|
||||||
|
|
||||||
## Filesystem Options:
|
## Filesystem Options:
|
||||||
-a, --batch-file FILE file containing URLs to download ('-' for
|
-a, --batch-file FILE file containing URLs to download ('-' for
|
||||||
@ -191,7 +205,6 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--write-info-json write video metadata to a .info.json file
|
--write-info-json write video metadata to a .info.json file
|
||||||
--write-annotations write video annotations to a .annotation
|
--write-annotations write video annotations to a .annotation
|
||||||
file
|
file
|
||||||
--write-thumbnail write thumbnail image to disk
|
|
||||||
--load-info FILE json file containing the video information
|
--load-info FILE json file containing the video information
|
||||||
(created with the "--write-json" option)
|
(created with the "--write-json" option)
|
||||||
--cookies FILE file to read cookies from and dump cookie
|
--cookies FILE file to read cookies from and dump cookie
|
||||||
@ -206,6 +219,12 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--no-cache-dir Disable filesystem caching
|
--no-cache-dir Disable filesystem caching
|
||||||
--rm-cache-dir Delete all filesystem cache files
|
--rm-cache-dir Delete all filesystem cache files
|
||||||
|
|
||||||
|
## Thumbnail images:
|
||||||
|
--write-thumbnail write thumbnail image to disk
|
||||||
|
--write-all-thumbnails write all thumbnail image formats to disk
|
||||||
|
--list-thumbnails Simulate and list all available thumbnail
|
||||||
|
formats
|
||||||
|
|
||||||
## Verbosity / Simulation Options:
|
## Verbosity / Simulation Options:
|
||||||
-q, --quiet activates quiet mode
|
-q, --quiet activates quiet mode
|
||||||
--no-warnings Ignore warnings
|
--no-warnings Ignore warnings
|
||||||
@ -273,9 +292,9 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
video results by putting a condition in
|
video results by putting a condition in
|
||||||
brackets, as in -f "best[height=720]" (or
|
brackets, as in -f "best[height=720]" (or
|
||||||
-f "[filesize>10M]"). This works for
|
-f "[filesize>10M]"). This works for
|
||||||
filesize, height, width, tbr, abr, and vbr
|
filesize, height, width, tbr, abr, vbr, and
|
||||||
and the comparisons <, <=, >, >=, =, != .
|
fps and the comparisons <, <=, >, >=, =, !=
|
||||||
Formats for which the value is not known
|
. Formats for which the value is not known
|
||||||
are excluded unless you put a question mark
|
are excluded unless you put a question mark
|
||||||
(?) after the operator. You can combine
|
(?) after the operator. You can combine
|
||||||
format filters, so -f "[height <=?
|
format filters, so -f "[height <=?
|
||||||
@ -349,11 +368,11 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
--add-metadata write metadata to the video file
|
--add-metadata write metadata to the video file
|
||||||
--xattrs write metadata to the video file's xattrs
|
--xattrs write metadata to the video file's xattrs
|
||||||
(using dublin core and xdg standards)
|
(using dublin core and xdg standards)
|
||||||
--fixup POLICY (experimental) Automatically correct known
|
--fixup POLICY Automatically correct known faults of the
|
||||||
faults of the file. One of never (do
|
file. One of never (do nothing), warn (only
|
||||||
nothing), warn (only emit a warning),
|
emit a warning), detect_or_warn(the
|
||||||
detect_or_warn(check whether we can do
|
default; fix file if we can, warn
|
||||||
anything about it, warn otherwise
|
otherwise)
|
||||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||||
postprocessors (default)
|
postprocessors (default)
|
||||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||||
@ -506,9 +525,16 @@ From then on, after restarting your shell, you will be able to access both youtu
|
|||||||
|
|
||||||
Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
|
Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
|
||||||
|
|
||||||
|
### How do I download a video starting with a `-` ?
|
||||||
|
|
||||||
|
Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
|
||||||
|
|
||||||
|
youtube-dl -- -wNyEUrxzFU
|
||||||
|
youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU"
|
||||||
|
|
||||||
### How can I detect whether a given URL is supported by youtube-dl?
|
### How can I detect whether a given URL is supported by youtube-dl?
|
||||||
|
|
||||||
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
|
||||||
|
|
||||||
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
|
||||||
|
|
||||||
@ -586,7 +612,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8).
|
||||||
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/__init__.py
|
$ git add youtube_dl/extractor/__init__.py
|
||||||
|
@ -2,5 +2,5 @@
|
|||||||
universal = True
|
universal = True
|
||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build
|
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
|
||||||
ignore = E501
|
ignore = E501
|
||||||
|
@ -140,7 +140,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
# Are checkable fields missing from the test case definition?
|
# Are checkable fields missing from the test case definition?
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
for key, value in got_dict.items()
|
for key, value in got_dict.items()
|
||||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||||
if missing_keys:
|
if missing_keys:
|
||||||
def _repr(v):
|
def _repr(v):
|
||||||
@ -148,9 +148,15 @@ def expect_info_dict(self, got_dict, expected_dict):
|
|||||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
|
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
|
||||||
else:
|
else:
|
||||||
return repr(v)
|
return repr(v)
|
||||||
info_dict_str = ''.join(
|
info_dict_str = ''
|
||||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
if len(missing_keys) != len(expected_dict):
|
||||||
for k, v in test_info_dict.items())
|
info_dict_str += ''.join(
|
||||||
|
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||||
|
for k, v in test_info_dict.items() if k not in missing_keys)
|
||||||
|
info_dict_str += '\n'
|
||||||
|
info_dict_str += ''.join(
|
||||||
|
' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k]))
|
||||||
|
for k in missing_keys)
|
||||||
write_string(
|
write_string(
|
||||||
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
|
@ -89,7 +89,7 @@ def generator(test_case):
|
|||||||
|
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
info_dict = tc.get('info_dict', {})
|
info_dict = tc.get('info_dict', {})
|
||||||
if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
if not (info_dict.get('id') and info_dict.get('ext')):
|
||||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||||
|
|
||||||
if 'skip' in test_case:
|
if 'skip' in test_case:
|
||||||
@ -116,7 +116,7 @@ def generator(test_case):
|
|||||||
expect_warnings(ydl, test_case.get('expected_warnings', []))
|
expect_warnings(ydl, test_case.get('expected_warnings', []))
|
||||||
|
|
||||||
def get_tc_filename(tc):
|
def get_tc_filename(tc):
|
||||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
return ydl.prepare_filename(tc.get('info_dict', {}))
|
||||||
|
|
||||||
res_dict = None
|
res_dict = None
|
||||||
|
|
||||||
|
72
test/test_http.py
Normal file
72
test/test_http.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl import YoutubeDL
|
||||||
|
from youtube_dl.compat import compat_http_server
|
||||||
|
import ssl
|
||||||
|
import threading
|
||||||
|
|
||||||
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == '/video.html':
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
||||||
|
elif self.path == '/vid.mp4':
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'video/mp4')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
|
||||||
|
class FakeLogger(object):
|
||||||
|
def debug(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def warning(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def error(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestHTTP(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
self.httpd = compat_http_server.HTTPServer(
|
||||||
|
('localhost', 0), HTTPTestRequestHandler)
|
||||||
|
self.httpd.socket = ssl.wrap_socket(
|
||||||
|
self.httpd.socket, certfile=certfn, server_side=True)
|
||||||
|
self.port = self.httpd.socket.getsockname()[1]
|
||||||
|
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||||
|
self.server_thread.daemon = True
|
||||||
|
self.server_thread.start()
|
||||||
|
|
||||||
|
def test_nocheckcertificate(self):
|
||||||
|
if sys.version_info >= (2, 7, 9): # No certificate checking anyways
|
||||||
|
ydl = YoutubeDL({'logger': FakeLogger()})
|
||||||
|
self.assertRaises(
|
||||||
|
Exception,
|
||||||
|
ydl.extract_info, 'https://localhost:%d/video.html' % self.port)
|
||||||
|
|
||||||
|
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
||||||
|
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||||
|
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
95
test/test_jsinterp.py
Normal file
95
test/test_jsinterp.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.jsinterp import JSInterpreter
|
||||||
|
|
||||||
|
|
||||||
|
class TestJSInterpreter(unittest.TestCase):
|
||||||
|
def test_basic(self):
|
||||||
|
jsi = JSInterpreter('function x(){;}')
|
||||||
|
self.assertEqual(jsi.call_function('x'), None)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function x3(){return 42;}')
|
||||||
|
self.assertEqual(jsi.call_function('x3'), 42)
|
||||||
|
|
||||||
|
def test_calc(self):
|
||||||
|
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
||||||
|
self.assertEqual(jsi.call_function('x4', 3), 7)
|
||||||
|
|
||||||
|
def test_empty_return(self):
|
||||||
|
jsi = JSInterpreter('function f(){return; y()}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), None)
|
||||||
|
|
||||||
|
def test_morespace(self):
|
||||||
|
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
|
||||||
|
self.assertEqual(jsi.call_function('x', 3), 7)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 2)
|
||||||
|
|
||||||
|
def test_strange_chars(self):
|
||||||
|
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
|
||||||
|
self.assertEqual(jsi.call_function('$_xY1', 20), 21)
|
||||||
|
|
||||||
|
def test_operators(self):
|
||||||
|
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 32)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 17)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 2)
|
||||||
|
|
||||||
|
def test_array_access(self):
|
||||||
|
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
||||||
|
|
||||||
|
def test_parens(self):
|
||||||
|
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 7)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 9)
|
||||||
|
|
||||||
|
def test_assignments(self):
|
||||||
|
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 31)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 51)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), -11)
|
||||||
|
|
||||||
|
def test_comments(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
var x = /* 1 + */ 2;
|
||||||
|
var y = /* 30
|
||||||
|
* 40 */ 50;
|
||||||
|
return x + y;
|
||||||
|
}
|
||||||
|
''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), 52)
|
||||||
|
|
||||||
|
def test_precedence(self):
|
||||||
|
jsi = JSInterpreter('''
|
||||||
|
function x() {
|
||||||
|
var a = [10, 20, 30, 40, 50];
|
||||||
|
var b = 6;
|
||||||
|
a[0]=a[b%a.length];
|
||||||
|
return a;
|
||||||
|
}''')
|
||||||
|
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -52,6 +52,7 @@ from youtube_dl.utils import (
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
|
render_table,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -237,6 +238,8 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_duration('5 s'), 5)
|
self.assertEqual(parse_duration('5 s'), 5)
|
||||||
self.assertEqual(parse_duration('3 min'), 180)
|
self.assertEqual(parse_duration('3 min'), 180)
|
||||||
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
self.assertEqual(parse_duration('2.5 hours'), 9000)
|
||||||
|
self.assertEqual(parse_duration('02:03:04'), 7384)
|
||||||
|
self.assertEqual(parse_duration('01:02:03:04'), 93784)
|
||||||
|
|
||||||
def test_fix_xml_ampersands(self):
|
def test_fix_xml_ampersands(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -370,6 +373,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
on = js_to_json('{"abc": true}')
|
on = js_to_json('{"abc": true}')
|
||||||
self.assertEqual(json.loads(on), {'abc': True})
|
self.assertEqual(json.loads(on), {'abc': True})
|
||||||
|
|
||||||
|
# Ignore JavaScript code as well
|
||||||
|
on = js_to_json('''{
|
||||||
|
"x": 1,
|
||||||
|
y: "a",
|
||||||
|
z: some.code
|
||||||
|
}''')
|
||||||
|
d = json.loads(on)
|
||||||
|
self.assertEqual(d['x'], 1)
|
||||||
|
self.assertEqual(d['y'], 'a')
|
||||||
|
|
||||||
def test_clean_html(self):
|
def test_clean_html(self):
|
||||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||||
@ -434,5 +447,15 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
|||||||
self.assertTrue(is_html( # UTF-32-LE
|
self.assertTrue(is_html( # UTF-32-LE
|
||||||
b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
|
b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00'))
|
||||||
|
|
||||||
|
def test_render_table(self):
|
||||||
|
self.assertEqual(
|
||||||
|
render_table(
|
||||||
|
['a', 'bcd'],
|
||||||
|
[[123, 4], [9999, 51]]),
|
||||||
|
'a bcd\n'
|
||||||
|
'123 4\n'
|
||||||
|
'9999 51')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
52
test/testcert.pem
Normal file
52
test/testcert.pem
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
-----BEGIN PRIVATE KEY-----
|
||||||
|
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB
|
||||||
|
HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp
|
||||||
|
Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h
|
||||||
|
A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev
|
||||||
|
mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J
|
||||||
|
aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP
|
||||||
|
tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54
|
||||||
|
BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4
|
||||||
|
Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd
|
||||||
|
63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk
|
||||||
|
ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE
|
||||||
|
8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw
|
||||||
|
XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA
|
||||||
|
G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o
|
||||||
|
zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN
|
||||||
|
8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP
|
||||||
|
gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ
|
||||||
|
XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28
|
||||||
|
Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/
|
||||||
|
98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ
|
||||||
|
1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S
|
||||||
|
DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN
|
||||||
|
Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB
|
||||||
|
FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD
|
||||||
|
pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z
|
||||||
|
VJua1Wn8HKxnXMI61DhTCSo=
|
||||||
|
-----END PRIVATE KEY-----
|
||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD
|
||||||
|
VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG
|
||||||
|
A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl
|
||||||
|
c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw
|
||||||
|
aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw
|
||||||
|
CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb
|
||||||
|
MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs
|
||||||
|
IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh
|
||||||
|
Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA
|
||||||
|
zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU
|
||||||
|
YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq
|
||||||
|
O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8
|
||||||
|
A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh
|
||||||
|
KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/
|
||||||
|
Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd
|
||||||
|
ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD
|
||||||
|
AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x
|
||||||
|
5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb
|
||||||
|
PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6
|
||||||
|
cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu
|
||||||
|
SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe
|
||||||
|
Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK
|
||||||
|
-----END CERTIFICATE-----
|
@ -25,6 +25,7 @@ if os.name == 'nt':
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
|
compat_basestring,
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
@ -54,8 +55,10 @@ from .utils import (
|
|||||||
PostProcessingError,
|
PostProcessingError,
|
||||||
platform_name,
|
platform_name,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
|
render_table,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
|
std_headers,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
takewhile_inclusive,
|
takewhile_inclusive,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
@ -73,6 +76,7 @@ from .extractor import get_info_extractor, gen_extractors
|
|||||||
from .downloader import get_suitable_downloader
|
from .downloader import get_suitable_downloader
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
from .postprocessor import (
|
from .postprocessor import (
|
||||||
|
FFmpegFixupM4aPP,
|
||||||
FFmpegFixupStretchedPP,
|
FFmpegFixupStretchedPP,
|
||||||
FFmpegMergerPP,
|
FFmpegMergerPP,
|
||||||
FFmpegPostProcessor,
|
FFmpegPostProcessor,
|
||||||
@ -134,6 +138,7 @@ class YoutubeDL(object):
|
|||||||
nooverwrites: Prevent overwriting files.
|
nooverwrites: Prevent overwriting files.
|
||||||
playliststart: Playlist item to start at.
|
playliststart: Playlist item to start at.
|
||||||
playlistend: Playlist item to end at.
|
playlistend: Playlist item to end at.
|
||||||
|
playlist_items: Specific indices of playlist to download.
|
||||||
playlistreverse: Download playlist items in reverse order.
|
playlistreverse: Download playlist items in reverse order.
|
||||||
matchtitle: Download only matching titles.
|
matchtitle: Download only matching titles.
|
||||||
rejecttitle: Reject downloads for matching titles.
|
rejecttitle: Reject downloads for matching titles.
|
||||||
@ -143,6 +148,7 @@ class YoutubeDL(object):
|
|||||||
writeinfojson: Write the video description to a .info.json file
|
writeinfojson: Write the video description to a .info.json file
|
||||||
writeannotations: Write the video annotations to a .annotations.xml file
|
writeannotations: Write the video annotations to a .annotations.xml file
|
||||||
writethumbnail: Write the thumbnail image to a file
|
writethumbnail: Write the thumbnail image to a file
|
||||||
|
write_all_thumbnails: Write all thumbnail formats to files
|
||||||
writesubtitles: Write the video subtitles to a file
|
writesubtitles: Write the video subtitles to a file
|
||||||
writeautomaticsub: Write the automatic subtitles to a file
|
writeautomaticsub: Write the automatic subtitles to a file
|
||||||
allsubtitles: Downloads all the subtitles of the video
|
allsubtitles: Downloads all the subtitles of the video
|
||||||
@ -193,11 +199,12 @@ class YoutubeDL(object):
|
|||||||
postprocessor.
|
postprocessor.
|
||||||
progress_hooks: A list of functions that get called on download
|
progress_hooks: A list of functions that get called on download
|
||||||
progress, with a dictionary with the entries
|
progress, with a dictionary with the entries
|
||||||
* filename: The final filename
|
* status: One of "downloading" and "finished".
|
||||||
* status: One of "downloading" and "finished"
|
Check this first and ignore unknown values.
|
||||||
|
|
||||||
The dict may also have some of the following entries:
|
|
||||||
|
|
||||||
|
If status is one of "downloading" or "finished", the
|
||||||
|
following properties may also be present:
|
||||||
|
* filename: The final filename (always present)
|
||||||
* downloaded_bytes: Bytes on disk
|
* downloaded_bytes: Bytes on disk
|
||||||
* total_bytes: Size of the whole file, None if unknown
|
* total_bytes: Size of the whole file, None if unknown
|
||||||
* tmpfilename: The filename we're currently writing to
|
* tmpfilename: The filename we're currently writing to
|
||||||
@ -213,17 +220,21 @@ class YoutubeDL(object):
|
|||||||
- "never": do nothing
|
- "never": do nothing
|
||||||
- "warn": only emit a warning
|
- "warn": only emit a warning
|
||||||
- "detect_or_warn": check whether we can do anything
|
- "detect_or_warn": check whether we can do anything
|
||||||
about it, warn otherwise
|
about it, warn otherwise (default)
|
||||||
source_address: (Experimental) Client-side IP address to bind to.
|
source_address: (Experimental) Client-side IP address to bind to.
|
||||||
call_home: Boolean, true iff we are allowed to contact the
|
call_home: Boolean, true iff we are allowed to contact the
|
||||||
youtube-dl servers for debugging.
|
youtube-dl servers for debugging.
|
||||||
sleep_interval: Number of seconds to sleep before each download.
|
sleep_interval: Number of seconds to sleep before each download.
|
||||||
|
external_downloader: Executable of the external downloader to call.
|
||||||
|
listformats: Print an overview of available video formats and exit.
|
||||||
|
list_thumbnails: Print a table of all thumbnails and exit.
|
||||||
|
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
|
||||||
noresizebuffer, retries, continuedl, noprogress, consoletitle
|
noresizebuffer, retries, continuedl, noprogress, consoletitle,
|
||||||
|
xattr_set_filesize.
|
||||||
|
|
||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||||
@ -533,6 +544,11 @@ class YoutubeDL(object):
|
|||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
tmpl = compat_expanduser(outtmpl)
|
tmpl = compat_expanduser(outtmpl)
|
||||||
filename = tmpl % template_dict
|
filename = tmpl % template_dict
|
||||||
|
# Temporary fix for #4787
|
||||||
|
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||||
|
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||||
|
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||||
|
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||||
return filename
|
return filename
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||||
@ -696,24 +712,51 @@ class YoutubeDL(object):
|
|||||||
if playlistend == -1:
|
if playlistend == -1:
|
||||||
playlistend = None
|
playlistend = None
|
||||||
|
|
||||||
|
playlistitems_str = self.params.get('playlist_items', None)
|
||||||
|
playlistitems = None
|
||||||
|
if playlistitems_str is not None:
|
||||||
|
def iter_playlistitems(format):
|
||||||
|
for string_segment in format.split(','):
|
||||||
|
if '-' in string_segment:
|
||||||
|
start, end = string_segment.split('-')
|
||||||
|
for item in range(int(start), int(end) + 1):
|
||||||
|
yield int(item)
|
||||||
|
else:
|
||||||
|
yield int(string_segment)
|
||||||
|
playlistitems = iter_playlistitems(playlistitems_str)
|
||||||
|
|
||||||
ie_entries = ie_result['entries']
|
ie_entries = ie_result['entries']
|
||||||
if isinstance(ie_entries, list):
|
if isinstance(ie_entries, list):
|
||||||
n_all_entries = len(ie_entries)
|
n_all_entries = len(ie_entries)
|
||||||
entries = ie_entries[playliststart:playlistend]
|
if playlistitems:
|
||||||
|
entries = [ie_entries[i - 1] for i in playlistitems]
|
||||||
|
else:
|
||||||
|
entries = ie_entries[playliststart:playlistend]
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
||||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||||
elif isinstance(ie_entries, PagedList):
|
elif isinstance(ie_entries, PagedList):
|
||||||
entries = ie_entries.getslice(
|
if playlistitems:
|
||||||
playliststart, playlistend)
|
entries = []
|
||||||
|
for item in playlistitems:
|
||||||
|
entries.extend(ie_entries.getslice(
|
||||||
|
item - 1, item
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
entries = ie_entries.getslice(
|
||||||
|
playliststart, playlistend)
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
"[%s] playlist %s: Downloading %d videos" %
|
"[%s] playlist %s: Downloading %d videos" %
|
||||||
(ie_result['extractor'], playlist, n_entries))
|
(ie_result['extractor'], playlist, n_entries))
|
||||||
else: # iterable
|
else: # iterable
|
||||||
entries = list(itertools.islice(
|
if playlistitems:
|
||||||
ie_entries, playliststart, playlistend))
|
entry_list = list(ie_entries)
|
||||||
|
entries = [entry_list[i - 1] for i in playlistitems]
|
||||||
|
else:
|
||||||
|
entries = list(itertools.islice(
|
||||||
|
ie_entries, playliststart, playlistend))
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
"[%s] playlist %s: Downloading %d videos" %
|
"[%s] playlist %s: Downloading %d videos" %
|
||||||
@ -783,7 +826,7 @@ class YoutubeDL(object):
|
|||||||
'!=': operator.ne,
|
'!=': operator.ne,
|
||||||
}
|
}
|
||||||
operator_rex = re.compile(r'''(?x)\s*\[
|
operator_rex = re.compile(r'''(?x)\s*\[
|
||||||
(?P<key>width|height|tbr|abr|vbr|filesize)
|
(?P<key>width|height|tbr|abr|vbr|filesize|fps)
|
||||||
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
|
||||||
\]$
|
\]$
|
||||||
@ -863,6 +906,42 @@ class YoutubeDL(object):
|
|||||||
return matches[-1]
|
return matches[-1]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _calc_headers(self, info_dict):
|
||||||
|
res = std_headers.copy()
|
||||||
|
|
||||||
|
add_headers = info_dict.get('http_headers')
|
||||||
|
if add_headers:
|
||||||
|
res.update(add_headers)
|
||||||
|
|
||||||
|
cookies = self._calc_cookies(info_dict)
|
||||||
|
if cookies:
|
||||||
|
res['Cookie'] = cookies
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _calc_cookies(self, info_dict):
|
||||||
|
class _PseudoRequest(object):
|
||||||
|
def __init__(self, url):
|
||||||
|
self.url = url
|
||||||
|
self.headers = {}
|
||||||
|
self.unverifiable = False
|
||||||
|
|
||||||
|
def add_unredirected_header(self, k, v):
|
||||||
|
self.headers[k] = v
|
||||||
|
|
||||||
|
def get_full_url(self):
|
||||||
|
return self.url
|
||||||
|
|
||||||
|
def is_unverifiable(self):
|
||||||
|
return self.unverifiable
|
||||||
|
|
||||||
|
def has_header(self, h):
|
||||||
|
return h in self.headers
|
||||||
|
|
||||||
|
pr = _PseudoRequest(info_dict['url'])
|
||||||
|
self.cookiejar.add_cookie_header(pr)
|
||||||
|
return pr.headers.get('Cookie')
|
||||||
|
|
||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
||||||
@ -877,9 +956,14 @@ class YoutubeDL(object):
|
|||||||
info_dict['playlist_index'] = None
|
info_dict['playlist_index'] = None
|
||||||
|
|
||||||
thumbnails = info_dict.get('thumbnails')
|
thumbnails = info_dict.get('thumbnails')
|
||||||
|
if thumbnails is None:
|
||||||
|
thumbnail = info_dict.get('thumbnail')
|
||||||
|
if thumbnail:
|
||||||
|
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
|
||||||
if thumbnails:
|
if thumbnails:
|
||||||
thumbnails.sort(key=lambda t: (
|
thumbnails.sort(key=lambda t: (
|
||||||
t.get('width'), t.get('height'), t.get('url')))
|
t.get('preference'), t.get('width'), t.get('height'),
|
||||||
|
t.get('id'), t.get('url')))
|
||||||
for t in thumbnails:
|
for t in thumbnails:
|
||||||
if 'width' in t and 'height' in t:
|
if 'width' in t and 'height' in t:
|
||||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||||
@ -931,6 +1015,11 @@ class YoutubeDL(object):
|
|||||||
# Automatically determine file extension if missing
|
# Automatically determine file extension if missing
|
||||||
if 'ext' not in format:
|
if 'ext' not in format:
|
||||||
format['ext'] = determine_ext(format['url']).lower()
|
format['ext'] = determine_ext(format['url']).lower()
|
||||||
|
# Add HTTP headers, so that external programs can use them from the
|
||||||
|
# json output
|
||||||
|
full_format_info = info_dict.copy()
|
||||||
|
full_format_info.update(format)
|
||||||
|
format['http_headers'] = self._calc_headers(full_format_info)
|
||||||
|
|
||||||
format_limit = self.params.get('format_limit', None)
|
format_limit = self.params.get('format_limit', None)
|
||||||
if format_limit:
|
if format_limit:
|
||||||
@ -946,9 +1035,12 @@ class YoutubeDL(object):
|
|||||||
# element in the 'formats' field in info_dict is info_dict itself,
|
# element in the 'formats' field in info_dict is info_dict itself,
|
||||||
# wich can't be exported to json
|
# wich can't be exported to json
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
if self.params.get('listformats', None):
|
if self.params.get('listformats'):
|
||||||
self.list_formats(info_dict)
|
self.list_formats(info_dict)
|
||||||
return
|
return
|
||||||
|
if self.params.get('list_thumbnails'):
|
||||||
|
self.list_thumbnails(info_dict)
|
||||||
|
return
|
||||||
|
|
||||||
req_format = self.params.get('format')
|
req_format = self.params.get('format')
|
||||||
if req_format is None:
|
if req_format is None:
|
||||||
@ -983,7 +1075,8 @@ class YoutubeDL(object):
|
|||||||
selected_format = {
|
selected_format = {
|
||||||
'requested_formats': formats_info,
|
'requested_formats': formats_info,
|
||||||
'format': rf,
|
'format': rf,
|
||||||
'ext': formats_info[0]['ext'],
|
'format_id': '%s+%s' % (formats_info[0].get('format_id'),
|
||||||
|
formats_info[1].get('format_id')),
|
||||||
'width': formats_info[0].get('width'),
|
'width': formats_info[0].get('width'),
|
||||||
'height': formats_info[0].get('height'),
|
'height': formats_info[0].get('height'),
|
||||||
'resolution': formats_info[0].get('resolution'),
|
'resolution': formats_info[0].get('resolution'),
|
||||||
@ -1044,7 +1137,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
self._num_downloads += 1
|
self._num_downloads += 1
|
||||||
|
|
||||||
filename = self.prepare_filename(info_dict)
|
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
||||||
|
|
||||||
# Forced printings
|
# Forced printings
|
||||||
if self.params.get('forcetitle', False):
|
if self.params.get('forcetitle', False):
|
||||||
@ -1069,10 +1162,7 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('forceformat', False):
|
if self.params.get('forceformat', False):
|
||||||
self.to_stdout(info_dict['format'])
|
self.to_stdout(info_dict['format'])
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
info_dict['_filename'] = filename
|
|
||||||
self.to_stdout(json.dumps(info_dict))
|
self.to_stdout(json.dumps(info_dict))
|
||||||
if self.params.get('dump_single_json', False):
|
|
||||||
info_dict['_filename'] = filename
|
|
||||||
|
|
||||||
# Do nothing else if in simulate mode
|
# Do nothing else if in simulate mode
|
||||||
if self.params.get('simulate', False):
|
if self.params.get('simulate', False):
|
||||||
@ -1155,35 +1245,18 @@ class YoutubeDL(object):
|
|||||||
self.report_error('Cannot write metadata to JSON file ' + infofn)
|
self.report_error('Cannot write metadata to JSON file ' + infofn)
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.params.get('writethumbnail', False):
|
self._write_thumbnails(info_dict, filename)
|
||||||
if info_dict.get('thumbnail') is not None:
|
|
||||||
thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
|
|
||||||
thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
|
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
|
||||||
self.to_screen('[%s] %s: Thumbnail is already present' %
|
|
||||||
(info_dict['extractor'], info_dict['id']))
|
|
||||||
else:
|
|
||||||
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
|
||||||
(info_dict['extractor'], info_dict['id']))
|
|
||||||
try:
|
|
||||||
uf = self.urlopen(info_dict['thumbnail'])
|
|
||||||
with open(thumb_filename, 'wb') as thumbf:
|
|
||||||
shutil.copyfileobj(uf, thumbf)
|
|
||||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
|
||||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
self.report_warning('Unable to download thumbnail "%s": %s' %
|
|
||||||
(info_dict['thumbnail'], compat_str(err)))
|
|
||||||
|
|
||||||
if not self.params.get('skip_download', False):
|
if not self.params.get('skip_download', False):
|
||||||
try:
|
try:
|
||||||
def dl(name, info):
|
def dl(name, info):
|
||||||
fd = get_suitable_downloader(info)(self, self.params)
|
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
||||||
for ph in self._progress_hooks:
|
for ph in self._progress_hooks:
|
||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
return fd.download(name, info)
|
return fd.download(name, info)
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
downloaded = []
|
downloaded = []
|
||||||
success = True
|
success = True
|
||||||
@ -1219,11 +1292,12 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if success:
|
if success:
|
||||||
# Fixup content
|
# Fixup content
|
||||||
|
fixup_policy = self.params.get('fixup')
|
||||||
|
if fixup_policy is None:
|
||||||
|
fixup_policy = 'detect_or_warn'
|
||||||
|
|
||||||
stretched_ratio = info_dict.get('stretched_ratio')
|
stretched_ratio = info_dict.get('stretched_ratio')
|
||||||
if stretched_ratio is not None and stretched_ratio != 1:
|
if stretched_ratio is not None and stretched_ratio != 1:
|
||||||
fixup_policy = self.params.get('fixup')
|
|
||||||
if fixup_policy is None:
|
|
||||||
fixup_policy = 'detect_or_warn'
|
|
||||||
if fixup_policy == 'warn':
|
if fixup_policy == 'warn':
|
||||||
self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
|
self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
|
||||||
info_dict['id'], stretched_ratio))
|
info_dict['id'], stretched_ratio))
|
||||||
@ -1237,7 +1311,23 @@ class YoutubeDL(object):
|
|||||||
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
|
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
|
||||||
info_dict['id'], stretched_ratio))
|
info_dict['id'], stretched_ratio))
|
||||||
else:
|
else:
|
||||||
assert fixup_policy == 'ignore'
|
assert fixup_policy in ('ignore', 'never')
|
||||||
|
|
||||||
|
if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
|
||||||
|
if fixup_policy == 'warn':
|
||||||
|
self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
|
||||||
|
info_dict['id']))
|
||||||
|
elif fixup_policy == 'detect_or_warn':
|
||||||
|
fixup_pp = FFmpegFixupM4aPP(self)
|
||||||
|
if fixup_pp.available:
|
||||||
|
info_dict.setdefault('__postprocessors', [])
|
||||||
|
info_dict['__postprocessors'].append(fixup_pp)
|
||||||
|
else:
|
||||||
|
self.report_warning(
|
||||||
|
'%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
|
||||||
|
info_dict['id']))
|
||||||
|
else:
|
||||||
|
assert fixup_policy in ('ignore', 'never')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.post_process(filename, info_dict)
|
self.post_process(filename, info_dict)
|
||||||
@ -1439,8 +1529,26 @@ class YoutubeDL(object):
|
|||||||
header_line = line({
|
header_line = line({
|
||||||
'format_id': 'format code', 'ext': 'extension',
|
'format_id': 'format code', 'ext': 'extension',
|
||||||
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
|
||||||
self.to_screen('[info] Available formats for %s:\n%s\n%s' %
|
self.to_screen(
|
||||||
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
'[info] Available formats for %s:\n%s\n%s' %
|
||||||
|
(info_dict['id'], header_line, '\n'.join(formats_s)))
|
||||||
|
|
||||||
|
def list_thumbnails(self, info_dict):
|
||||||
|
thumbnails = info_dict.get('thumbnails')
|
||||||
|
if not thumbnails:
|
||||||
|
tn_url = info_dict.get('thumbnail')
|
||||||
|
if tn_url:
|
||||||
|
thumbnails = [{'id': '0', 'url': tn_url}]
|
||||||
|
else:
|
||||||
|
self.to_screen(
|
||||||
|
'[info] No thumbnails present for %s' % info_dict['id'])
|
||||||
|
return
|
||||||
|
|
||||||
|
self.to_screen(
|
||||||
|
'[info] Thumbnails for %s:' % info_dict['id'])
|
||||||
|
self.to_screen(render_table(
|
||||||
|
['ID', 'width', 'height', 'URL'],
|
||||||
|
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
|
||||||
|
|
||||||
def urlopen(self, req):
|
def urlopen(self, req):
|
||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
@ -1451,7 +1559,7 @@ class YoutubeDL(object):
|
|||||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||||
# To work around aforementioned issue we will replace request's original URL with
|
# To work around aforementioned issue we will replace request's original URL with
|
||||||
# percent-encoded one
|
# percent-encoded one
|
||||||
req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
|
req_is_string = isinstance(req, compat_basestring)
|
||||||
url = req if req_is_string else req.get_full_url()
|
url = req if req_is_string else req.get_full_url()
|
||||||
url_escaped = escape_url(url)
|
url_escaped = escape_url(url)
|
||||||
|
|
||||||
@ -1586,3 +1694,39 @@ class YoutubeDL(object):
|
|||||||
if encoding is None:
|
if encoding is None:
|
||||||
encoding = preferredencoding()
|
encoding = preferredencoding()
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
|
def _write_thumbnails(self, info_dict, filename):
|
||||||
|
if self.params.get('writethumbnail', False):
|
||||||
|
thumbnails = info_dict.get('thumbnails')
|
||||||
|
if thumbnails:
|
||||||
|
thumbnails = [thumbnails[-1]]
|
||||||
|
elif self.params.get('write_all_thumbnails', False):
|
||||||
|
thumbnails = info_dict.get('thumbnails')
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not thumbnails:
|
||||||
|
# No thumbnails present, so return immediately
|
||||||
|
return
|
||||||
|
|
||||||
|
for t in thumbnails:
|
||||||
|
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||||
|
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
||||||
|
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||||
|
thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||||
|
|
||||||
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||||
|
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||||
|
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||||
|
else:
|
||||||
|
self.to_screen('[%s] %s: Downloading thumbnail %s...' %
|
||||||
|
(info_dict['extractor'], info_dict['id'], thumb_display_id))
|
||||||
|
try:
|
||||||
|
uf = self.urlopen(t['url'])
|
||||||
|
with open(thumb_filename, 'wb') as thumbf:
|
||||||
|
shutil.copyfileobj(uf, thumbf)
|
||||||
|
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
|
||||||
|
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
self.report_warning('Unable to download thumbnail "%s": %s' %
|
||||||
|
(t['url'], compat_str(err)))
|
||||||
|
@ -143,10 +143,13 @@ def _real_main(argv=None):
|
|||||||
parser.error('invalid max_filesize specified')
|
parser.error('invalid max_filesize specified')
|
||||||
opts.max_filesize = numeric_limit
|
opts.max_filesize = numeric_limit
|
||||||
if opts.retries is not None:
|
if opts.retries is not None:
|
||||||
try:
|
if opts.retries in ('inf', 'infinite'):
|
||||||
opts.retries = int(opts.retries)
|
opts_retries = float('inf')
|
||||||
except (TypeError, ValueError):
|
else:
|
||||||
parser.error('invalid retry count specified')
|
try:
|
||||||
|
opts_retries = int(opts.retries)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
parser.error('invalid retry count specified')
|
||||||
if opts.buffersize is not None:
|
if opts.buffersize is not None:
|
||||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||||
if numeric_buffersize is None:
|
if numeric_buffersize is None:
|
||||||
@ -238,6 +241,12 @@ def _real_main(argv=None):
|
|||||||
'verboseOutput': opts.verbose,
|
'verboseOutput': opts.verbose,
|
||||||
'exec_cmd': opts.exec_cmd,
|
'exec_cmd': opts.exec_cmd,
|
||||||
})
|
})
|
||||||
|
if opts.xattr_set_filesize:
|
||||||
|
try:
|
||||||
|
import xattr
|
||||||
|
xattr # Confuse flake8
|
||||||
|
except ImportError:
|
||||||
|
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
@ -268,7 +277,7 @@ def _real_main(argv=None):
|
|||||||
'ignoreerrors': opts.ignoreerrors,
|
'ignoreerrors': opts.ignoreerrors,
|
||||||
'ratelimit': opts.ratelimit,
|
'ratelimit': opts.ratelimit,
|
||||||
'nooverwrites': opts.nooverwrites,
|
'nooverwrites': opts.nooverwrites,
|
||||||
'retries': opts.retries,
|
'retries': opts_retries,
|
||||||
'buffersize': opts.buffersize,
|
'buffersize': opts.buffersize,
|
||||||
'noresizebuffer': opts.noresizebuffer,
|
'noresizebuffer': opts.noresizebuffer,
|
||||||
'continuedl': opts.continue_dl,
|
'continuedl': opts.continue_dl,
|
||||||
@ -286,6 +295,7 @@ def _real_main(argv=None):
|
|||||||
'writeannotations': opts.writeannotations,
|
'writeannotations': opts.writeannotations,
|
||||||
'writeinfojson': opts.writeinfojson,
|
'writeinfojson': opts.writeinfojson,
|
||||||
'writethumbnail': opts.writethumbnail,
|
'writethumbnail': opts.writethumbnail,
|
||||||
|
'write_all_thumbnails': opts.write_all_thumbnails,
|
||||||
'writesubtitles': opts.writesubtitles,
|
'writesubtitles': opts.writesubtitles,
|
||||||
'writeautomaticsub': opts.writeautomaticsub,
|
'writeautomaticsub': opts.writeautomaticsub,
|
||||||
'allsubtitles': opts.allsubtitles,
|
'allsubtitles': opts.allsubtitles,
|
||||||
@ -330,6 +340,10 @@ def _real_main(argv=None):
|
|||||||
'source_address': opts.source_address,
|
'source_address': opts.source_address,
|
||||||
'call_home': opts.call_home,
|
'call_home': opts.call_home,
|
||||||
'sleep_interval': opts.sleep_interval,
|
'sleep_interval': opts.sleep_interval,
|
||||||
|
'external_downloader': opts.external_downloader,
|
||||||
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
|
'playlist_items': opts.playlist_items,
|
||||||
|
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
@ -347,7 +361,9 @@ def _real_main(argv=None):
|
|||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
|
||||||
parser.error('you must provide at least one URL')
|
parser.error(
|
||||||
|
'You must provide at least one URL.\n'
|
||||||
|
'Type youtube-dl --help to see a list of all options.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if opts.load_info_filename is not None:
|
if opts.load_info_filename is not None:
|
||||||
|
@ -71,6 +71,11 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||||
|
|
||||||
|
try:
|
||||||
|
import http.server as compat_http_server
|
||||||
|
except ImportError:
|
||||||
|
import BaseHTTPServer as compat_http_server
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -109,6 +114,26 @@ except ImportError:
|
|||||||
string += pct_sequence.decode(encoding, errors)
|
string += pct_sequence.decode(encoding, errors)
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_str = unicode # Python 2
|
||||||
|
except NameError:
|
||||||
|
compat_str = str
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_basestring = basestring # Python 2
|
||||||
|
except NameError:
|
||||||
|
compat_basestring = str
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_chr = unichr # Python 2
|
||||||
|
except NameError:
|
||||||
|
compat_chr = chr
|
||||||
|
|
||||||
|
try:
|
||||||
|
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||||
|
except ImportError: # Python 2.6
|
||||||
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.parse import parse_qs as compat_parse_qs
|
from urllib.parse import parse_qs as compat_parse_qs
|
||||||
@ -118,7 +143,7 @@ except ImportError: # Python 2
|
|||||||
|
|
||||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
encoding='utf-8', errors='replace'):
|
encoding='utf-8', errors='replace'):
|
||||||
qs, _coerce_result = qs, unicode
|
qs, _coerce_result = qs, compat_str
|
||||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
r = []
|
r = []
|
||||||
for name_value in pairs:
|
for name_value in pairs:
|
||||||
@ -157,21 +182,6 @@ except ImportError: # Python 2
|
|||||||
parsed_result[name] = [value]
|
parsed_result[name] = [value]
|
||||||
return parsed_result
|
return parsed_result
|
||||||
|
|
||||||
try:
|
|
||||||
compat_str = unicode # Python 2
|
|
||||||
except NameError:
|
|
||||||
compat_str = str
|
|
||||||
|
|
||||||
try:
|
|
||||||
compat_chr = unichr # Python 2
|
|
||||||
except NameError:
|
|
||||||
compat_chr = chr
|
|
||||||
|
|
||||||
try:
|
|
||||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
|
||||||
except ImportError: # Python 2.6
|
|
||||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from shlex import quote as shlex_quote
|
from shlex import quote as shlex_quote
|
||||||
except ImportError: # Python < 3.3
|
except ImportError: # Python < 3.3
|
||||||
@ -357,6 +367,7 @@ def workaround_optparse_bug9161():
|
|||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
|
'compat_basestring',
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
'compat_expanduser',
|
'compat_expanduser',
|
||||||
@ -365,6 +376,7 @@ __all__ = [
|
|||||||
'compat_html_entities',
|
'compat_html_entities',
|
||||||
'compat_html_parser',
|
'compat_html_parser',
|
||||||
'compat_http_client',
|
'compat_http_client',
|
||||||
|
'compat_http_server',
|
||||||
'compat_kwargs',
|
'compat_kwargs',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
|
@ -1,35 +1,41 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
|
from .external import get_external_downloader
|
||||||
|
from .f4m import F4mFD
|
||||||
from .hls import HlsFD
|
from .hls import HlsFD
|
||||||
from .hls import NativeHlsFD
|
from .hls import NativeHlsFD
|
||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .mplayer import MplayerFD
|
from .mplayer import MplayerFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
from .f4m import F4mFD
|
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_protocol,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PROTOCOL_MAP = {
|
||||||
|
'rtmp': RtmpFD,
|
||||||
|
'm3u8_native': NativeHlsFD,
|
||||||
|
'm3u8': HlsFD,
|
||||||
|
'mms': MplayerFD,
|
||||||
|
'rtsp': MplayerFD,
|
||||||
|
'f4m': F4mFD,
|
||||||
|
}
|
||||||
|
|
||||||
def get_suitable_downloader(info_dict):
|
|
||||||
|
def get_suitable_downloader(info_dict, params={}):
|
||||||
"""Get the downloader class that can handle the info dict."""
|
"""Get the downloader class that can handle the info dict."""
|
||||||
url = info_dict['url']
|
protocol = determine_protocol(info_dict)
|
||||||
protocol = info_dict.get('protocol')
|
info_dict['protocol'] = protocol
|
||||||
|
|
||||||
|
external_downloader = params.get('external_downloader')
|
||||||
|
if external_downloader is not None:
|
||||||
|
ed = get_external_downloader(external_downloader)
|
||||||
|
if ed.supports(info_dict):
|
||||||
|
return ed
|
||||||
|
|
||||||
|
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||||
|
|
||||||
if url.startswith('rtmp'):
|
|
||||||
return RtmpFD
|
|
||||||
if protocol == 'm3u8_native':
|
|
||||||
return NativeHlsFD
|
|
||||||
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
|
||||||
return HlsFD
|
|
||||||
if url.startswith('mms') or url.startswith('rtsp'):
|
|
||||||
return MplayerFD
|
|
||||||
if determine_ext(url) == 'f4m':
|
|
||||||
return F4mFD
|
|
||||||
else:
|
|
||||||
return HttpFD
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'get_suitable_downloader',
|
'get_suitable_downloader',
|
||||||
|
@ -25,21 +25,23 @@ class FileDownloader(object):
|
|||||||
|
|
||||||
Available options:
|
Available options:
|
||||||
|
|
||||||
verbose: Print additional info to stdout.
|
verbose: Print additional info to stdout.
|
||||||
quiet: Do not print messages to stdout.
|
quiet: Do not print messages to stdout.
|
||||||
ratelimit: Download speed limit, in bytes/sec.
|
ratelimit: Download speed limit, in bytes/sec.
|
||||||
retries: Number of times to retry for HTTP error 5xx
|
retries: Number of times to retry for HTTP error 5xx
|
||||||
buffersize: Size of download buffer in bytes.
|
buffersize: Size of download buffer in bytes.
|
||||||
noresizebuffer: Do not automatically resize the download buffer.
|
noresizebuffer: Do not automatically resize the download buffer.
|
||||||
continuedl: Try to continue downloads if possible.
|
continuedl: Try to continue downloads if possible.
|
||||||
noprogress: Do not print the progress bar.
|
noprogress: Do not print the progress bar.
|
||||||
logtostderr: Log messages to stderr instead of stdout.
|
logtostderr: Log messages to stderr instead of stdout.
|
||||||
consoletitle: Display progress in console window's titlebar.
|
consoletitle: Display progress in console window's titlebar.
|
||||||
nopart: Do not use temporary .part files.
|
nopart: Do not use temporary .part files.
|
||||||
updatetime: Use the Last-modified header to set output file timestamps.
|
updatetime: Use the Last-modified header to set output file timestamps.
|
||||||
test: Download only first bytes to test the downloader.
|
test: Download only first bytes to test the downloader.
|
||||||
min_filesize: Skip files smaller than this size
|
min_filesize: Skip files smaller than this size
|
||||||
max_filesize: Skip files larger than this size
|
max_filesize: Skip files larger than this size
|
||||||
|
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||||
|
(experimenatal)
|
||||||
|
|
||||||
Subclasses of this one must re-define the real_download method.
|
Subclasses of this one must re-define the real_download method.
|
||||||
"""
|
"""
|
||||||
@ -325,3 +327,24 @@ class FileDownloader(object):
|
|||||||
# See YoutubeDl.py (search for progress_hooks) for a description of
|
# See YoutubeDl.py (search for progress_hooks) for a description of
|
||||||
# this interface
|
# this interface
|
||||||
self._progress_hooks.append(ph)
|
self._progress_hooks.append(ph)
|
||||||
|
|
||||||
|
def _debug_cmd(self, args, subprocess_encoding, exe=None):
|
||||||
|
if not self.params.get('verbose', False):
|
||||||
|
return
|
||||||
|
|
||||||
|
if exe is None:
|
||||||
|
exe = os.path.basename(args[0])
|
||||||
|
|
||||||
|
if subprocess_encoding:
|
||||||
|
str_args = [
|
||||||
|
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||||
|
for a in args]
|
||||||
|
else:
|
||||||
|
str_args = args
|
||||||
|
try:
|
||||||
|
import pipes
|
||||||
|
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||||
|
except ImportError:
|
||||||
|
shell_quote = repr
|
||||||
|
self.to_screen('[debug] %s command line: %s' % (
|
||||||
|
exe, shell_quote(str_args)))
|
||||||
|
126
youtube_dl/downloader/external.py
Normal file
126
youtube_dl/downloader/external.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from .common import FileDownloader
|
||||||
|
from ..utils import (
|
||||||
|
encodeFilename,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalFD(FileDownloader):
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
self.report_destination(filename)
|
||||||
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
|
retval = self._call_downloader(tmpfilename, info_dict)
|
||||||
|
if retval == 0:
|
||||||
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
|
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||||
|
self.try_rename(tmpfilename, filename)
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': fsize,
|
||||||
|
'total_bytes': fsize,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.to_stderr('\n')
|
||||||
|
self.report_error('%s exited with code %d' % (
|
||||||
|
self.get_basename(), retval))
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_basename(cls):
|
||||||
|
return cls.__name__[:-2].lower()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def exe(self):
|
||||||
|
return self.params.get('external_downloader')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def supports(cls, info_dict):
|
||||||
|
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
|
||||||
|
|
||||||
|
def _source_address(self, command_option):
|
||||||
|
source_address = self.params.get('source_address')
|
||||||
|
if source_address is None:
|
||||||
|
return []
|
||||||
|
return [command_option, source_address]
|
||||||
|
|
||||||
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
|
""" Either overwrite this or implement _make_cmd """
|
||||||
|
cmd = self._make_cmd(tmpfilename, info_dict)
|
||||||
|
|
||||||
|
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||||
|
# Windows subprocess module does not actually support Unicode
|
||||||
|
# on Python 2.x
|
||||||
|
# See http://stackoverflow.com/a/9951851/35070
|
||||||
|
subprocess_encoding = sys.getfilesystemencoding()
|
||||||
|
cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
|
||||||
|
else:
|
||||||
|
subprocess_encoding = None
|
||||||
|
self._debug_cmd(cmd, subprocess_encoding)
|
||||||
|
|
||||||
|
p = subprocess.Popen(
|
||||||
|
cmd, stderr=subprocess.PIPE)
|
||||||
|
_, stderr = p.communicate()
|
||||||
|
if p.returncode != 0:
|
||||||
|
self.to_stderr(stderr)
|
||||||
|
return p.returncode
|
||||||
|
|
||||||
|
|
||||||
|
class CurlFD(ExternalFD):
|
||||||
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
|
cmd = [self.exe, '-o', tmpfilename]
|
||||||
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._source_address('--interface')
|
||||||
|
cmd += ['--', info_dict['url']]
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
|
class WgetFD(ExternalFD):
|
||||||
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
||||||
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._source_address('--bind-address')
|
||||||
|
cmd += ['--', info_dict['url']]
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
|
class Aria2cFD(ExternalFD):
|
||||||
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
|
cmd = [
|
||||||
|
self.exe, '-c',
|
||||||
|
'--min-split-size', '1M', '--max-connection-per-server', '4']
|
||||||
|
dn = os.path.dirname(tmpfilename)
|
||||||
|
if dn:
|
||||||
|
cmd += ['--dir', dn]
|
||||||
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._source_address('--interface')
|
||||||
|
cmd += ['--', info_dict['url']]
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
_BY_NAME = dict(
|
||||||
|
(klass.get_basename(), klass)
|
||||||
|
for name, klass in globals().items()
|
||||||
|
if name.endswith('FD') and name != 'ExternalFD'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def list_external_downloaders():
|
||||||
|
return sorted(_BY_NAME.keys())
|
||||||
|
|
||||||
|
|
||||||
|
def get_external_downloader(external_downloader):
|
||||||
|
""" Given the name of the executable, see whether we support the given
|
||||||
|
downloader . """
|
||||||
|
bn = os.path.basename(external_downloader)
|
||||||
|
return _BY_NAME[bn]
|
@ -177,13 +177,12 @@ def build_fragments_list(boot_info):
|
|||||||
""" Return a list of (segment, fragment) for each fragment in the video """
|
""" Return a list of (segment, fragment) for each fragment in the video """
|
||||||
res = []
|
res = []
|
||||||
segment_run_table = boot_info['segments'][0]
|
segment_run_table = boot_info['segments'][0]
|
||||||
# I've only found videos with one segment
|
|
||||||
segment_run_entry = segment_run_table['segment_run'][0]
|
|
||||||
n_frags = segment_run_entry[1]
|
|
||||||
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
|
fragment_run_entry_table = boot_info['fragments'][0]['fragments']
|
||||||
first_frag_number = fragment_run_entry_table[0]['first']
|
first_frag_number = fragment_run_entry_table[0]['first']
|
||||||
for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)):
|
fragments_counter = itertools.count(first_frag_number)
|
||||||
res.append((1, frag_number))
|
for segment, fragments_count in segment_run_table['segment_run']:
|
||||||
|
for _ in range(fragments_count):
|
||||||
|
res.append((segment, next(fragments_counter)))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@ -231,6 +230,23 @@ class F4mFD(FileDownloader):
|
|||||||
A downloader for f4m manifests or AdobeHDS.
|
A downloader for f4m manifests or AdobeHDS.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def _get_unencrypted_media(self, doc):
|
||||||
|
media = doc.findall(_add_ns('media'))
|
||||||
|
if not media:
|
||||||
|
self.report_error('No media found')
|
||||||
|
for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
|
||||||
|
doc.findall(_add_ns('drmAdditionalHeaderSet'))):
|
||||||
|
# If id attribute is missing it's valid for all media nodes
|
||||||
|
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
|
||||||
|
if 'id' not in e.attrib:
|
||||||
|
self.report_error('Missing ID in f4m DRM')
|
||||||
|
media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
|
||||||
|
'drmAdditionalHeaderSetId' not in e.attrib,
|
||||||
|
media))
|
||||||
|
if not media:
|
||||||
|
self.report_error('Unsupported DRM')
|
||||||
|
return media
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
man_url = info_dict['url']
|
man_url = info_dict['url']
|
||||||
requested_bitrate = info_dict.get('tbr')
|
requested_bitrate = info_dict.get('tbr')
|
||||||
@ -249,7 +265,8 @@ class F4mFD(FileDownloader):
|
|||||||
)
|
)
|
||||||
|
|
||||||
doc = etree.fromstring(manifest)
|
doc = etree.fromstring(manifest)
|
||||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||||
|
for f in self._get_unencrypted_media(doc)]
|
||||||
if requested_bitrate is None:
|
if requested_bitrate is None:
|
||||||
# get the best format
|
# get the best format
|
||||||
formats = sorted(formats, key=lambda f: f[0])
|
formats = sorted(formats, key=lambda f: f[0])
|
||||||
|
@ -11,6 +11,7 @@ from ..compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
encodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -21,23 +22,22 @@ class HlsFD(FileDownloader):
|
|||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
args = [
|
|
||||||
'-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
|
||||||
'-bsf:a', 'aac_adtstoasc',
|
|
||||||
encodeFilename(tmpfilename, for_subprocess=True)]
|
|
||||||
|
|
||||||
ffpp = FFmpegPostProcessor(downloader=self)
|
ffpp = FFmpegPostProcessor(downloader=self)
|
||||||
program = ffpp._executable
|
program = ffpp._executable
|
||||||
if program is None:
|
if program is None:
|
||||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||||
return False
|
return False
|
||||||
ffpp.check_version()
|
ffpp.check_version()
|
||||||
cmd = [program] + args
|
|
||||||
|
|
||||||
retval = subprocess.call(cmd)
|
args = [
|
||||||
|
encodeArgument(opt)
|
||||||
|
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||||
|
args.append(encodeFilename(tmpfilename, True))
|
||||||
|
|
||||||
|
retval = subprocess.call(args)
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||||
self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize))
|
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
|
||||||
self.try_rename(tmpfilename, filename)
|
self.try_rename(tmpfilename, filename)
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'downloaded_bytes': fsize,
|
'downloaded_bytes': fsize,
|
||||||
|
@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from socket import error as SocketError
|
||||||
|
import errno
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
@ -24,10 +27,6 @@ class HttpFD(FileDownloader):
|
|||||||
|
|
||||||
# Do not include the Accept-Encoding header
|
# Do not include the Accept-Encoding header
|
||||||
headers = {'Youtubedl-no-compression': 'True'}
|
headers = {'Youtubedl-no-compression': 'True'}
|
||||||
if 'user_agent' in info_dict:
|
|
||||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
|
||||||
if 'http_referer' in info_dict:
|
|
||||||
headers['Referer'] = info_dict['http_referer']
|
|
||||||
add_headers = info_dict.get('http_headers')
|
add_headers = info_dict.get('http_headers')
|
||||||
if add_headers:
|
if add_headers:
|
||||||
headers.update(add_headers)
|
headers.update(add_headers)
|
||||||
@ -103,6 +102,11 @@ class HttpFD(FileDownloader):
|
|||||||
resume_len = 0
|
resume_len = 0
|
||||||
open_mode = 'wb'
|
open_mode = 'wb'
|
||||||
break
|
break
|
||||||
|
except SocketError as e:
|
||||||
|
if e.errno != errno.ECONNRESET:
|
||||||
|
# Connection reset is no problem, just retry
|
||||||
|
raise
|
||||||
|
|
||||||
# Retry
|
# Retry
|
||||||
count += 1
|
count += 1
|
||||||
if count <= retries:
|
if count <= retries:
|
||||||
@ -161,6 +165,14 @@ class HttpFD(FileDownloader):
|
|||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
self.report_error('unable to open for writing: %s' % str(err))
|
self.report_error('unable to open for writing: %s' % str(err))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||||
|
try:
|
||||||
|
import xattr
|
||||||
|
xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
|
||||||
|
except(OSError, IOError, ImportError) as err:
|
||||||
|
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stream.write(data_block)
|
stream.write(data_block)
|
||||||
except (IOError, OSError) as err:
|
except (IOError, OSError) as err:
|
||||||
|
@ -104,6 +104,9 @@ class RtmpFD(FileDownloader):
|
|||||||
live = info_dict.get('rtmp_live', False)
|
live = info_dict.get('rtmp_live', False)
|
||||||
conn = info_dict.get('rtmp_conn', None)
|
conn = info_dict.get('rtmp_conn', None)
|
||||||
protocol = info_dict.get('rtmp_protocol', None)
|
protocol = info_dict.get('rtmp_protocol', None)
|
||||||
|
real_time = info_dict.get('rtmp_real_time', False)
|
||||||
|
no_resume = info_dict.get('no_resume', False)
|
||||||
|
continue_dl = info_dict.get('continuedl', False)
|
||||||
|
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
@ -141,7 +144,14 @@ class RtmpFD(FileDownloader):
|
|||||||
basic_args += ['--conn', conn]
|
basic_args += ['--conn', conn]
|
||||||
if protocol is not None:
|
if protocol is not None:
|
||||||
basic_args += ['--protocol', protocol]
|
basic_args += ['--protocol', protocol]
|
||||||
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
if real_time:
|
||||||
|
basic_args += ['--realtime']
|
||||||
|
|
||||||
|
args = basic_args
|
||||||
|
if not no_resume and continue_dl and not live:
|
||||||
|
args += ['--resume']
|
||||||
|
if not live and continue_dl:
|
||||||
|
args += ['--skip', '1']
|
||||||
|
|
||||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||||
# Windows subprocess module does not actually support Unicode
|
# Windows subprocess module does not actually support Unicode
|
||||||
@ -152,19 +162,7 @@ class RtmpFD(FileDownloader):
|
|||||||
else:
|
else:
|
||||||
subprocess_encoding = None
|
subprocess_encoding = None
|
||||||
|
|
||||||
if self.params.get('verbose', False):
|
self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
|
||||||
if subprocess_encoding:
|
|
||||||
str_args = [
|
|
||||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
|
||||||
for a in args]
|
|
||||||
else:
|
|
||||||
str_args = args
|
|
||||||
try:
|
|
||||||
import pipes
|
|
||||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
|
||||||
except ImportError:
|
|
||||||
shell_quote = repr
|
|
||||||
self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
|
|
||||||
|
|
||||||
RD_SUCCESS = 0
|
RD_SUCCESS = 0
|
||||||
RD_FAILED = 1
|
RD_FAILED = 1
|
||||||
|
@ -29,7 +29,6 @@ from .arte import (
|
|||||||
from .atresplayer import AtresPlayerIE
|
from .atresplayer import AtresPlayerIE
|
||||||
from .atttechchannel import ATTTechChannelIE
|
from .atttechchannel import ATTTechChannelIE
|
||||||
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
from .audiomack import AudiomackIE, AudiomackAlbumIE
|
||||||
from .auengine import AUEngineIE
|
|
||||||
from .azubu import AzubuIE
|
from .azubu import AzubuIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||||
@ -83,6 +82,7 @@ from .crunchyroll import (
|
|||||||
CrunchyrollShowPlaylistIE
|
CrunchyrollShowPlaylistIE
|
||||||
)
|
)
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
|
from .ctsnews import CtsNewsIE
|
||||||
from .dailymotion import (
|
from .dailymotion import (
|
||||||
DailymotionIE,
|
DailymotionIE,
|
||||||
DailymotionPlaylistIE,
|
DailymotionPlaylistIE,
|
||||||
@ -90,6 +90,7 @@ from .dailymotion import (
|
|||||||
)
|
)
|
||||||
from .daum import DaumIE
|
from .daum import DaumIE
|
||||||
from .dbtv import DBTVIE
|
from .dbtv import DBTVIE
|
||||||
|
from .dctp import DctpTvIE
|
||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
@ -181,6 +182,7 @@ from .heise import HeiseIE
|
|||||||
from .hellporno import HellPornoIE
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
|
from .historicfilms import HistoricFilmsIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hostingbulk import HostingBulkIE
|
from .hostingbulk import HostingBulkIE
|
||||||
@ -285,9 +287,19 @@ from .netzkino import NetzkinoIE
|
|||||||
from .nerdcubed import NerdCubedFeedIE
|
from .nerdcubed import NerdCubedFeedIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
|
from .nextmedia import (
|
||||||
|
NextMediaIE,
|
||||||
|
NextMediaActionNewsIE,
|
||||||
|
AppleDailyRealtimeNewsIE,
|
||||||
|
AppleDailyAnimationNewsIE
|
||||||
|
)
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
from .nfl import NFLIE
|
from .nfl import NFLIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import (
|
||||||
|
NHLIE,
|
||||||
|
NHLNewsIE,
|
||||||
|
NHLVideocenterIE,
|
||||||
|
)
|
||||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .noco import NocoIE
|
from .noco import NocoIE
|
||||||
@ -305,7 +317,8 @@ from .nrk import (
|
|||||||
NRKIE,
|
NRKIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
)
|
)
|
||||||
from .ntv import NTVIE
|
from .ntvde import NTVDeIE
|
||||||
|
from .ntvru import NTVRuIE
|
||||||
from .nytimes import NYTimesIE
|
from .nytimes import NYTimesIE
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .oktoberfesttv import OktoberfestTVIE
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
@ -350,6 +363,7 @@ from .rtbf import RTBFIE
|
|||||||
from .rte import RteIE
|
from .rte import RteIE
|
||||||
from .rtlnl import RtlXlIE
|
from .rtlnl import RtlXlIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
|
from .rtl2 import RTL2IE
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||||
@ -467,6 +481,7 @@ from .twitch import (
|
|||||||
TwitchVodIE,
|
TwitchVodIE,
|
||||||
TwitchProfileIE,
|
TwitchProfileIE,
|
||||||
TwitchPastBroadcastsIE,
|
TwitchPastBroadcastsIE,
|
||||||
|
TwitchBookmarksIE,
|
||||||
TwitchStreamIE,
|
TwitchStreamIE,
|
||||||
)
|
)
|
||||||
from .ubu import UbuIE
|
from .ubu import UbuIE
|
||||||
@ -545,6 +560,7 @@ from .xminus import XMinusIE
|
|||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
from .xtube import XTubeUserIE, XTubeIE
|
from .xtube import XTubeUserIE, XTubeIE
|
||||||
|
from .xuite import XuiteIE
|
||||||
from .xxxymovies import XXXYMoviesIE
|
from .xxxymovies import XXXYMoviesIE
|
||||||
from .yahoo import (
|
from .yahoo import (
|
||||||
YahooIE,
|
YahooIE,
|
||||||
|
@ -122,14 +122,15 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
playlist.append({
|
playlist.append({
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'user_agent': 'QuickTime compatible (youtube-dl)',
|
'http_headers': {
|
||||||
|
'User-Agent': 'QuickTime compatible (youtube-dl)',
|
||||||
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
|
||||||
'file': '22429276.mp4',
|
'only_matching': True,
|
||||||
'md5': '469751912f1de0816a9fc9df8336476c',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
|
|
||||||
'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
|
|
||||||
},
|
|
||||||
'skip': 'Blocked outside of Germany',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
|
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||||||
import time
|
import time
|
||||||
import hmac
|
import hmac
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@ -17,7 +17,7 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AtresPlayerIE(InfoExtractor):
|
class AtresPlayerIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -95,7 +95,7 @@ class AtresPlayerIE(InfoExtractor):
|
|||||||
for fmt in ['windows', 'android_tablet']:
|
for fmt in ['windows', 'android_tablet']:
|
||||||
request = compat_urllib_request.Request(
|
request = compat_urllib_request.Request(
|
||||||
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||||
request.add_header('Youtubedl-user-agent', self._USER_AGENT)
|
request.add_header('User-Agent', self._USER_AGENT)
|
||||||
|
|
||||||
fmt_json = self._download_json(
|
fmt_json = self._download_json(
|
||||||
request, video_id, 'Downloading %s video JSON' % fmt)
|
request, video_id, 'Downloading %s video JSON' % fmt)
|
||||||
@ -105,13 +105,22 @@ class AtresPlayerIE(InfoExtractor):
|
|||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||||
|
|
||||||
for _, video_url in fmt_json['resultObject'].items():
|
for format_id, video_url in fmt_json['resultObject'].items():
|
||||||
|
if format_id == 'token' or not video_url.startswith('http'):
|
||||||
|
continue
|
||||||
if video_url.endswith('/Manifest'):
|
if video_url.endswith('/Manifest'):
|
||||||
formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
|
if 'geodeswowsmpra3player' in video_url:
|
||||||
|
f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
||||||
|
f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
||||||
|
# this videos are protected by DRM, the f4m downloader doesn't support them
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
f4m_url = video_url[:-9] + '/manifest.f4m'
|
||||||
|
formats.extend(self._extract_f4m_formats(f4m_url, video_id))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': 'android',
|
'format_id': 'android-%s' % format_id,
|
||||||
'preference': 1,
|
'preference': 1,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@ -134,6 +143,15 @@ class AtresPlayerIE(InfoExtractor):
|
|||||||
description = xpath_text(art, './description', 'description')
|
description = xpath_text(art, './description', 'description')
|
||||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||||
|
if subtitle:
|
||||||
|
subtitles['es'] = subtitle
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -141,4 +159,5 @@ class AtresPlayerIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': self.extract_subtitles(video_id, subtitles),
|
||||||
}
|
}
|
||||||
|
@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor):
|
|||||||
# Album playlist ripped from fakeshoredrive with no metadata
|
# Album playlist ripped from fakeshoredrive with no metadata
|
||||||
{
|
{
|
||||||
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
|
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'PPP (Pistol P Project)',
|
||||||
|
'id': '837572',
|
||||||
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
|
||||||
'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
|
'id': '837577',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
|
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
'playliststart': 8,
|
'playliststart': 9,
|
||||||
'playlistend': 8,
|
'playlistend': 9,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -1,50 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_urllib_parse
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
ExtractorError,
|
|
||||||
remove_end,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class AUEngineIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'http://(?:www\.)?auengine\.com/embed\.php\?.*?file=(?P<id>[^&]+).*?'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://auengine.com/embed.php?file=lfvlytY6&w=650&h=370',
|
|
||||||
'md5': '48972bdbcf1a3a2f5533e62425b41d4f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lfvlytY6',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '[Commie]The Legend of the Legendary Heroes - 03 - Replication Eye (Alpha Stigma)[F9410F5A]'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
|
|
||||||
video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
|
|
||||||
video_url = compat_urllib_parse.unquote(video_urls[0])
|
|
||||||
thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
|
|
||||||
thumbnail = compat_urllib_parse.unquote(thumbnails[0])
|
|
||||||
|
|
||||||
if not video_url:
|
|
||||||
raise ExtractorError('Could not find video URL')
|
|
||||||
|
|
||||||
ext = '.' + determine_ext(video_url)
|
|
||||||
title = remove_end(title, ext)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'http_referer': 'http://www.auengine.com/flowplayer/flowplayer.commercial-3.2.14.swf',
|
|
||||||
}
|
|
@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
|
|||||||
class BBCCoUkIE(SubtitlesInfoExtractor):
|
class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk'
|
IE_NAME = 'bbc.co.uk'
|
||||||
IE_DESC = 'BBC iPlayer'
|
IE_DESC = 'BBC iPlayer'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -118,6 +118,9 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
|||||||
# For some weird reason, blip.tv serves a video instead of subtitles
|
# For some weird reason, blip.tv serves a video instead of subtitles
|
||||||
# when we request with a common UA
|
# when we request with a common UA
|
||||||
req = compat_urllib_request.Request(url)
|
req = compat_urllib_request.Request(url)
|
||||||
req.add_header('Youtubedl-user-agent', 'youtube-dl')
|
req.add_header('User-Agent', 'youtube-dl')
|
||||||
return self._download_webpage(req, None, note=False)
|
return self._download_webpage(req, None, note=False)
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,12 +28,10 @@ class CinchcastIE(InfoExtractor):
|
|||||||
item, './{http://developer.longtailvideo.com/trac/}date')
|
item, './{http://developer.longtailvideo.com/trac/}date')
|
||||||
upload_date = unified_strdate(date_str, day_first=False)
|
upload_date = unified_strdate(date_str, day_first=False)
|
||||||
# duration is present but wrong
|
# duration is present but wrong
|
||||||
formats = []
|
formats = [{
|
||||||
formats.append({
|
|
||||||
'format_id': 'main',
|
'format_id': 'main',
|
||||||
'url': item.find(
|
'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'],
|
||||||
'./{http://search.yahoo.com/mrss/}content').attrib['url'],
|
}]
|
||||||
})
|
|
||||||
backup_url = xpath_text(
|
backup_url = xpath_text(
|
||||||
item, './{http://developer.longtailvideo.com/trac/}backupContent')
|
item, './{http://developer.longtailvideo.com/trac/}backupContent')
|
||||||
if backup_url:
|
if backup_url:
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
|
||||||
_translation_table = {
|
_translation_table = {
|
||||||
@ -27,10 +25,10 @@ class CliphunterIE(InfoExtractor):
|
|||||||
'''
|
'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
|
||||||
'md5': 'a2ba71eebf523859fe527a61018f723e',
|
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1012420',
|
'id': '1012420',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Fun Jynx Maze solo',
|
'title': 'Fun Jynx Maze solo',
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
@ -44,39 +42,31 @@ class CliphunterIE(InfoExtractor):
|
|||||||
video_title = self._search_regex(
|
video_title = self._search_regex(
|
||||||
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
r'mediaTitle = "([^"]+)"', webpage, 'title')
|
||||||
|
|
||||||
pl_fiji = self._search_regex(
|
fmts = {}
|
||||||
r'pl_fiji = \'([^\']+)\'', webpage, 'video data')
|
for fmt in ('mp4', 'flv'):
|
||||||
pl_c_qual = self._search_regex(
|
fmt_list = self._parse_json(self._search_regex(
|
||||||
r'pl_c_qual = "(.)"', webpage, 'video quality')
|
r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id)
|
||||||
video_url = _decode(pl_fiji)
|
for f in fmt_list:
|
||||||
formats = [{
|
fmts[f['fname']] = _decode(f['sUrl'])
|
||||||
'url': video_url,
|
|
||||||
'format_id': 'default-%s' % pl_c_qual,
|
|
||||||
}]
|
|
||||||
|
|
||||||
qualities_json = self._search_regex(
|
qualities = self._parse_json(self._search_regex(
|
||||||
r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info')
|
r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id)
|
||||||
qualities_data = json.loads(qualities_json)
|
|
||||||
|
|
||||||
for i, t in enumerate(
|
formats = []
|
||||||
re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)):
|
for fname, url in fmts.items():
|
||||||
quality_id, crypted_url = t
|
|
||||||
video_url = _decode(crypted_url)
|
|
||||||
f = {
|
f = {
|
||||||
'format_id': quality_id,
|
'url': url,
|
||||||
'url': video_url,
|
|
||||||
'quality': i,
|
|
||||||
}
|
}
|
||||||
if quality_id in qualities_data:
|
if fname in qualities:
|
||||||
qd = qualities_data[quality_id]
|
qual = qualities[fname]
|
||||||
m = re.match(
|
f.update({
|
||||||
r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b>
|
'format_id': '%s_%sp' % (determine_ext(url), qual['h']),
|
||||||
\s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd)
|
'width': qual['w'],
|
||||||
if m:
|
'height': qual['h'],
|
||||||
f['width'] = int(m.group('width'))
|
'tbr': qual['br'],
|
||||||
f['height'] = int(m.group('height'))
|
})
|
||||||
f['tbr'] = int(m.group('tbr'))
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
|
@ -49,7 +49,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||||
)|
|
)|
|
||||||
(?P<interview>
|
(?P<interview>
|
||||||
extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
|
extended-interviews/(?P<interID>[0-9a-z]+)/
|
||||||
|
(?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
|
||||||
|
(?:/[^/?#]?|[?#]|$))))
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
|
||||||
@ -62,6 +64,38 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
'uploader': 'thedailyshow',
|
'uploader': 'thedailyshow',
|
||||||
'title': 'thedailyshow kristen-stewart part 1',
|
'title': 'thedailyshow kristen-stewart part 1',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sarah-chayes-extended-interview',
|
||||||
|
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||||
|
'title': 'thedailyshow Sarah Chayes Extended Interview',
|
||||||
|
},
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150129',
|
||||||
|
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||||
|
'uploader': 'thedailyshow',
|
||||||
|
'title': 'thedailyshow sarah-chayes-extended-interview part 1',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20150129',
|
||||||
|
'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
|
||||||
|
'uploader': 'thedailyshow',
|
||||||
|
'title': 'thedailyshow sarah-chayes-extended-interview part 2',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -230,6 +264,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
'id': epTitle,
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
'title': show_name + ' ' + title,
|
'title': show_name + ' ' + title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
@ -14,6 +14,7 @@ import xml.etree.ElementTree
|
|||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
|
compat_HTTPError,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
@ -26,6 +27,7 @@ from ..utils import (
|
|||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
HEADRequest,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@ -87,7 +89,8 @@ class InfoExtractor(object):
|
|||||||
* player_url SWF Player URL (used for rtmpdump).
|
* player_url SWF Player URL (used for rtmpdump).
|
||||||
* protocol The protocol that will be used for the actual
|
* protocol The protocol that will be used for the actual
|
||||||
download, lower-case.
|
download, lower-case.
|
||||||
"http", "https", "rtsp", "rtmp", "m3u8" or so.
|
"http", "https", "rtsp", "rtmp", "rtmpe",
|
||||||
|
"m3u8", or "m3u8_native".
|
||||||
* preference Order number of this format. If this field is
|
* preference Order number of this format. If this field is
|
||||||
present and not None, the formats get sorted
|
present and not None, the formats get sorted
|
||||||
by this field, regardless of all other values.
|
by this field, regardless of all other values.
|
||||||
@ -108,15 +111,17 @@ class InfoExtractor(object):
|
|||||||
(quality takes higher priority)
|
(quality takes higher priority)
|
||||||
-1 for default (order by other properties),
|
-1 for default (order by other properties),
|
||||||
-2 or smaller for less than default.
|
-2 or smaller for less than default.
|
||||||
* http_referer HTTP Referer header value to set.
|
|
||||||
* http_method HTTP method to use for the download.
|
* http_method HTTP method to use for the download.
|
||||||
* http_headers A dictionary of additional HTTP headers
|
* http_headers A dictionary of additional HTTP headers
|
||||||
to add to the request.
|
to add to the request.
|
||||||
* http_post_data Additional data to send with a POST
|
* http_post_data Additional data to send with a POST
|
||||||
request.
|
request.
|
||||||
* stretched_ratio If given and not 1, indicates that the
|
* stretched_ratio If given and not 1, indicates that the
|
||||||
video's pixels are not square.
|
video's pixels are not square.
|
||||||
width : height ratio as float.
|
width : height ratio as float.
|
||||||
|
* no_resume The server does not support resuming the
|
||||||
|
(HTTP or RTMP) download. Boolean.
|
||||||
|
|
||||||
url: Final video URL.
|
url: Final video URL.
|
||||||
ext: Video filename extension.
|
ext: Video filename extension.
|
||||||
format: The video format, defaults to ext (used for --get-format)
|
format: The video format, defaults to ext (used for --get-format)
|
||||||
@ -130,7 +135,9 @@ class InfoExtractor(object):
|
|||||||
something like "4234987", title "Dancing naked mole rats",
|
something like "4234987", title "Dancing naked mole rats",
|
||||||
and display_id "dancing-naked-mole-rats"
|
and display_id "dancing-naked-mole-rats"
|
||||||
thumbnails: A list of dictionaries, with the following entries:
|
thumbnails: A list of dictionaries, with the following entries:
|
||||||
|
* "id" (optional, string) - Thumbnail format ID
|
||||||
* "url"
|
* "url"
|
||||||
|
* "preference" (optional, int) - quality of the image
|
||||||
* "width" (optional, int)
|
* "width" (optional, int)
|
||||||
* "height" (optional, int)
|
* "height" (optional, int)
|
||||||
* "resolution" (optional, string "{width}x{height"},
|
* "resolution" (optional, string "{width}x{height"},
|
||||||
@ -712,6 +719,27 @@ class InfoExtractor(object):
|
|||||||
)
|
)
|
||||||
formats.sort(key=_formats_key)
|
formats.sort(key=_formats_key)
|
||||||
|
|
||||||
|
def _check_formats(self, formats, video_id):
|
||||||
|
if formats:
|
||||||
|
formats[:] = filter(
|
||||||
|
lambda f: self._is_valid_url(
|
||||||
|
f['url'], video_id,
|
||||||
|
item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
|
||||||
|
formats)
|
||||||
|
|
||||||
|
def _is_valid_url(self, url, video_id, item='video'):
|
||||||
|
try:
|
||||||
|
self._request_webpage(
|
||||||
|
HEADRequest(url), video_id,
|
||||||
|
'Checking %s URL' % item)
|
||||||
|
return True
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
|
self.report_warning(
|
||||||
|
'%s URL is invalid, skipping' % item, video_id)
|
||||||
|
return False
|
||||||
|
raise
|
||||||
|
|
||||||
def http_scheme(self):
|
def http_scheme(self):
|
||||||
""" Either "http:" or "https:", depending on the user's preferences """
|
""" Either "http:" or "https:", depending on the user's preferences """
|
||||||
return (
|
return (
|
||||||
|
93
youtube_dl/extractor/ctsnews.py
Normal file
93
youtube_dl/extractor/ctsnews.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_iso8601, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class CtsNewsIE(InfoExtractor):
|
||||||
|
# https connection failed (Connection reset)
|
||||||
|
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
|
||||||
|
'md5': 'a9875cb790252b08431186d741beaabe',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201501291578109',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '以色列.真主黨交火 3人死亡',
|
||||||
|
'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
|
||||||
|
'timestamp': 1422528540,
|
||||||
|
'upload_date': '20150129',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# News count not appear on page but still available in database
|
||||||
|
'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html',
|
||||||
|
'md5': '3aee7e0df7cdff94e43581f54c22619e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201309031304098',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||||
|
'description': 'md5:f183feeba3752b683827aab71adad584',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1378205880,
|
||||||
|
'upload_date': '20130903',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# With Youtube embedded video
|
||||||
|
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
|
||||||
|
'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OVbfO7d0_hQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'iPhone6熱銷 蘋果財報亮眼',
|
||||||
|
'description': 'md5:f395d4f485487bb0f992ed2c4b07aa7d',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'upload_date': '20150128',
|
||||||
|
'uploader_id': 'TBSCTS',
|
||||||
|
'uploader': '中華電視公司',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
news_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, news_id)
|
||||||
|
|
||||||
|
if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
|
||||||
|
feed_url = self._html_search_regex(
|
||||||
|
r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
|
||||||
|
page, 'feed url')
|
||||||
|
video_url = self._download_webpage(
|
||||||
|
feed_url, news_id, note='Fetching feed')
|
||||||
|
else:
|
||||||
|
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||||
|
youtube_url = self._search_regex(
|
||||||
|
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
|
||||||
|
default=None)
|
||||||
|
if not youtube_url:
|
||||||
|
raise ExtractorError('The news includes no videos!', expected=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': youtube_url,
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
}
|
||||||
|
|
||||||
|
description = self._html_search_meta('description', page)
|
||||||
|
title = self._html_search_meta('title', page)
|
||||||
|
thumbnail = self._html_search_meta('image', page)
|
||||||
|
|
||||||
|
datetime_str = self._html_search_regex(
|
||||||
|
r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
|
||||||
|
# Transform into ISO 8601 format with timezone info
|
||||||
|
datetime_str = datetime_str.replace('/', '-') + ':00+0800'
|
||||||
|
timestamp = parse_iso8601(datetime_str, delimiter=' ')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': news_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
}
|
57
youtube_dl/extractor/dctp.py
Normal file
57
youtube_dl/extractor/dctp.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
|
||||||
|
|
||||||
|
class DctpTvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1324',
|
||||||
|
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Videoinstallation für eine Kaufhausfassade'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
|
||||||
|
version_json = self._download_json(
|
||||||
|
base_url + 'version.json',
|
||||||
|
video_id, note='Determining file version')
|
||||||
|
version = version_json['version_name']
|
||||||
|
info_json = self._download_json(
|
||||||
|
'{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
|
||||||
|
video_id, note='Fetching object ID')
|
||||||
|
object_id = compat_str(info_json['object_id'])
|
||||||
|
meta_json = self._download_json(
|
||||||
|
'{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
|
||||||
|
video_id, note='Downloading metadata')
|
||||||
|
uuid = meta_json['uuid']
|
||||||
|
title = meta_json['title']
|
||||||
|
wide = meta_json['is_wide']
|
||||||
|
if wide:
|
||||||
|
ratio = '16x9'
|
||||||
|
else:
|
||||||
|
ratio = '4x3'
|
||||||
|
play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
|
||||||
|
|
||||||
|
servers_json = self._download_json(
|
||||||
|
'http://www.dctp.tv/streaming_servers/',
|
||||||
|
video_id, note='Downloading server list')
|
||||||
|
url = servers_json[0]['endpoint']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': object_id,
|
||||||
|
'title': title,
|
||||||
|
'format': 'rtmp',
|
||||||
|
'url': url,
|
||||||
|
'play_path': play_path,
|
||||||
|
'rtmp_real_time': True,
|
||||||
|
'ext': 'flv',
|
||||||
|
'display_id': video_id
|
||||||
|
}
|
@ -1,40 +1,38 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class DefenseGouvFrIE(InfoExtractor):
|
class DefenseGouvFrIE(InfoExtractor):
|
||||||
IE_NAME = 'defense.gouv.fr'
|
IE_NAME = 'defense.gouv.fr'
|
||||||
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
|
_VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
|
||||||
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
||||||
'file': '11213.mp4',
|
|
||||||
'md5': '75bba6124da7e63d2d60b5244ec9430c',
|
'md5': '75bba6124da7e63d2d60b5244ec9430c',
|
||||||
"info_dict": {
|
'info_dict': {
|
||||||
"title": "attaque-chimique-syrienne-du-21-aout-2013-1"
|
'id': '11213',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
title = re.match(self._VALID_URL, url).group(1)
|
title = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r"flashvars.pvg_id=\"(\d+)\";",
|
r"flashvars.pvg_id=\"(\d+)\";",
|
||||||
webpage, 'ID')
|
webpage, 'ID')
|
||||||
|
|
||||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||||
+ video_id)
|
+ video_id)
|
||||||
info = self._download_webpage(json_url, title,
|
info = self._download_json(json_url, title, 'Downloading JSON config')
|
||||||
'Downloading JSON config')
|
video_url = info['renditions'][0]['url']
|
||||||
video_url = json.loads(info)['renditions'][0]['url']
|
|
||||||
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'ext': 'mp4',
|
'id': video_id,
|
||||||
'url': video_url,
|
'ext': 'mp4',
|
||||||
'title': title,
|
'url': video_url,
|
||||||
}
|
'title': title,
|
||||||
|
}
|
||||||
|
@ -6,7 +6,7 @@ from ..utils import parse_iso8601
|
|||||||
|
|
||||||
|
|
||||||
class DRTVIE(SubtitlesInfoExtractor):
|
class DRTVIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)+(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
|
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
|
||||||
@ -25,9 +25,15 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
programcard = self._download_json(
|
webpage = self._download_webpage(url, video_id)
|
||||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
|
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||||
|
webpage, 'video id')
|
||||||
|
|
||||||
|
programcard = self._download_json(
|
||||||
|
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
|
||||||
|
video_id, 'Downloading video JSON')
|
||||||
data = programcard['Data'][0]
|
data = programcard['Data'][0]
|
||||||
|
|
||||||
title = data['Title']
|
title = data['Title']
|
||||||
@ -48,14 +54,20 @@ class DRTVIE(SubtitlesInfoExtractor):
|
|||||||
elif asset['Kind'] == 'VideoResource':
|
elif asset['Kind'] == 'VideoResource':
|
||||||
duration = asset['DurationInMilliseconds'] / 1000.0
|
duration = asset['DurationInMilliseconds'] / 1000.0
|
||||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||||
|
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
|
||||||
for link in asset['Links']:
|
for link in asset['Links']:
|
||||||
target = link['Target']
|
target = link['Target']
|
||||||
uri = link['Uri']
|
uri = link['Uri']
|
||||||
|
format_id = target
|
||||||
|
preference = -1 if target == 'HDS' else -2
|
||||||
|
if spoken_subtitles:
|
||||||
|
preference -= 2
|
||||||
|
format_id += '-spoken-subtitles'
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
|
'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
|
||||||
'format_id': target,
|
'format_id': format_id,
|
||||||
'ext': link['FileFormat'],
|
'ext': link['FileFormat'],
|
||||||
'preference': -1 if target == 'HDS' else -2,
|
'preference': preference,
|
||||||
})
|
})
|
||||||
subtitles_list = asset.get('SubtitlesList')
|
subtitles_list = asset.get('SubtitlesList')
|
||||||
if isinstance(subtitles_list, list):
|
if isinstance(subtitles_list, list):
|
||||||
|
@ -5,6 +5,7 @@ import hashlib
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@ -16,7 +17,8 @@ from ..utils import (
|
|||||||
class FC2IE(InfoExtractor):
|
class FC2IE(InfoExtractor):
|
||||||
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
|
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
|
||||||
IE_NAME = 'fc2'
|
IE_NAME = 'fc2'
|
||||||
_TEST = {
|
_NETRC_MACHINE = 'fc2'
|
||||||
|
_TESTS = [{
|
||||||
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
|
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
|
||||||
'md5': 'a6ebe8ebe0396518689d963774a54eb7',
|
'md5': 'a6ebe8ebe0396518689d963774a54eb7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -24,12 +26,57 @@ class FC2IE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Boxing again with Puff',
|
'title': 'Boxing again with Puff',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '20150125cEva0hDn',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'username': 'ytdl@yt-dl.org',
|
||||||
|
'password': '(snip)',
|
||||||
|
'skip': 'requires actual password'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None or password is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Log in
|
||||||
|
login_form_strs = {
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
'done': 'video',
|
||||||
|
'Submit': ' Login ',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||||
|
# chokes on unicode
|
||||||
|
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
|
||||||
|
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||||
|
|
||||||
|
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
|
||||||
|
if 'mode=redirect&login=done' not in login_results:
|
||||||
|
self.report_warning('unable to log in: bad username or password')
|
||||||
|
return False
|
||||||
|
|
||||||
|
# this is also needed
|
||||||
|
login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||||
|
self._download_webpage(
|
||||||
|
login_redir, None, note='Login redirect', errnote='Login redirect failed')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
self._login()
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||||
|
self._login()
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
@ -46,7 +93,12 @@ class FC2IE(InfoExtractor):
|
|||||||
info = compat_urlparse.parse_qs(info_webpage)
|
info = compat_urlparse.parse_qs(info_webpage)
|
||||||
|
|
||||||
if 'err_code' in info:
|
if 'err_code' in info:
|
||||||
raise ExtractorError('Error code: %s' % info['err_code'][0])
|
# most of the time we can still download wideo even if err_code is 403 or 602
|
||||||
|
self.report_warning(
|
||||||
|
'Error code was: %s... but still trying' % info['err_code'][0])
|
||||||
|
|
||||||
|
if 'filepath' not in info:
|
||||||
|
raise ExtractorError('Cannot download file. Are you logged in?')
|
||||||
|
|
||||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||||
title_info = info.get('title')
|
title_info = info.get('title')
|
||||||
|
@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
|
_VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
|
'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
|
||||||
|
'md5': '6269e8626fa1a891bf5369b386ae996a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1165642',
|
'id': '1165642',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor):
|
|||||||
'upload_date': '20141120',
|
'upload_date': '20141120',
|
||||||
'duration': 3960,
|
'duration': 3960,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': 'rtmpdump required',
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
|||||||
|
|
||||||
class GenerationQuoiIE(InfoExtractor):
|
class GenerationQuoiIE(InfoExtractor):
|
||||||
IE_NAME = 'france2.fr:generation-quoi'
|
IE_NAME = 'france2.fr:generation-quoi'
|
||||||
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)'
|
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous',
|
||||||
'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'k7FJX8VBcvvLmX4wA5Q',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Génération Quoi - Garde à Vous',
|
'title': 'Génération Quoi - Garde à Vous',
|
||||||
'uploader': 'Génération Quoi',
|
'uploader': 'Génération Quoi',
|
||||||
},
|
},
|
||||||
@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):
|
|||||||
# It uses Dailymotion
|
# It uses Dailymotion
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Only available from France',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
name = mobj.group('name')
|
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id)
|
||||||
info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
|
info_json = self._download_webpage(info_url, display_id)
|
||||||
info_json = self._download_webpage(info_url, name)
|
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
|
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
|
||||||
ie='Dailymotion')
|
ie='Dailymotion')
|
||||||
|
@ -362,7 +362,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
|
||||||
'title': 'Zero Punctuation',
|
'title': 'Zero Punctuation',
|
||||||
'description': 're:'
|
'description': 're:.*groundbreaking video review series.*'
|
||||||
},
|
},
|
||||||
'playlist_mincount': 11,
|
'playlist_mincount': 11,
|
||||||
},
|
},
|
||||||
@ -498,6 +498,19 @@ class GenericIE(InfoExtractor):
|
|||||||
'uploader': 'www.abc.net.au',
|
'uploader': 'www.abc.net.au',
|
||||||
'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
|
'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
# embedded viddler video
|
||||||
|
{
|
||||||
|
'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4d03aad9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'deadspin',
|
||||||
|
'title': 'WALL-TO-GORTAT',
|
||||||
|
'timestamp': 1422285291,
|
||||||
|
'upload_date': '20150126',
|
||||||
|
},
|
||||||
|
'add_ie': ['Viddler'],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -860,9 +873,16 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
|
# Look for embedded Viddler player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||||
|
|
||||||
@ -1053,7 +1073,7 @@ class GenericIE(InfoExtractor):
|
|||||||
found = filter_video(re.findall(r'''(?xs)
|
found = filter_video(re.findall(r'''(?xs)
|
||||||
flowplayer\("[^"]+",\s*
|
flowplayer\("[^"]+",\s*
|
||||||
\{[^}]+?\}\s*,
|
\{[^}]+?\}\s*,
|
||||||
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
\s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||||
["']?url["']?\s*:\s*["']([^"']+)["']
|
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||||
''', webpage))
|
''', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
|
@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
|
||||||
|
'md5': 'c1defca721ce25b2354e927d3e4b3dec',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3928201',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas',
|
||||||
|
'duration': 1472.906,
|
||||||
|
'uploader': 'Canal Brasil',
|
||||||
|
'uploader_id': 705,
|
||||||
|
'like_count': int,
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
class MD5():
|
class MD5():
|
||||||
@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):
|
|||||||
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
|
signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)
|
||||||
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
|
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
|
||||||
|
|
||||||
formats.append({
|
resource_url = resource['url']
|
||||||
'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'),
|
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
||||||
'format_id': resource_id,
|
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||||
'height': resource['height']
|
formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4'))
|
||||||
})
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': signed_url,
|
||||||
|
'format_id': resource_id,
|
||||||
|
'height': resource.get('height'),
|
||||||
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ class GroovesharkIE(InfoExtractor):
|
|||||||
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
|
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
|
||||||
|
|
||||||
def _transform_bootstrap(self, js):
|
def _transform_bootstrap(self, js):
|
||||||
return re.split('(?m)^\s*try\s*{', js)[0] \
|
return re.split('(?m)^\s*try\s*\{', js)[0] \
|
||||||
.split(' = ', 1)[1].strip().rstrip(';')
|
.split(' = ', 1)[1].strip().rstrip(';')
|
||||||
|
|
||||||
def _transform_meta(self, js):
|
def _transform_meta(self, js):
|
||||||
|
46
youtube_dl/extractor/historicfilms.py
Normal file
46
youtube_dl/extractor/historicfilms.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_duration
|
||||||
|
|
||||||
|
|
||||||
|
class HistoricFilmsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.historicfilms.com/tapes/4728',
|
||||||
|
'md5': 'd4a437aec45d8d796a38a215db064e9a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4728',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Historic Films: GP-7',
|
||||||
|
'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 2096,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
tape_id = self._search_regex(
|
||||||
|
r'class="tapeId">([^<]+)<', webpage, 'tape id')
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._html_search_meta(
|
||||||
|
'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
|
||||||
|
duration = parse_duration(self._html_search_meta(
|
||||||
|
'duration', webpage, 'duration'))
|
||||||
|
|
||||||
|
video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
@ -16,7 +16,7 @@ from ..utils import (
|
|||||||
class IviIE(InfoExtractor):
|
class IviIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru'
|
IE_DESC = 'ivi.ru'
|
||||||
IE_NAME = 'ivi'
|
IE_NAME = 'ivi'
|
||||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Single movie
|
# Single movie
|
||||||
@ -63,29 +63,34 @@ class IviIE(InfoExtractor):
|
|||||||
return int(m.group('commentcount')) if m is not None else 0
|
return int(m.group('commentcount')) if m is not None else 0
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
|
|
||||||
api_url = 'http://api.digitalaccess.ru/api/json/'
|
api_url = 'http://api.digitalaccess.ru/api/json/'
|
||||||
|
|
||||||
data = {'method': 'da.content.get',
|
data = {
|
||||||
'params': [video_id, {'site': 's183',
|
'method': 'da.content.get',
|
||||||
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
'params': [
|
||||||
'contentid': video_id
|
video_id, {
|
||||||
}
|
'site': 's183',
|
||||||
]
|
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
|
||||||
|
'contentid': video_id
|
||||||
}
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||||
|
|
||||||
video_json_page = self._download_webpage(request, video_id, 'Downloading video JSON')
|
video_json_page = self._download_webpage(
|
||||||
|
request, video_id, 'Downloading video JSON')
|
||||||
video_json = json.loads(video_json_page)
|
video_json = json.loads(video_json_page)
|
||||||
|
|
||||||
if 'error' in video_json:
|
if 'error' in video_json:
|
||||||
error = video_json['error']
|
error = video_json['error']
|
||||||
if error['origin'] == 'NoRedisValidData':
|
if error['origin'] == 'NoRedisValidData':
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
raise ExtractorError('Unable to download video %s: %s' % (video_id, error['message']), expected=True)
|
raise ExtractorError(
|
||||||
|
'Unable to download video %s: %s' % (video_id, error['message']),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
result = video_json['result']
|
result = video_json['result']
|
||||||
|
|
||||||
|
@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
|
'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
|
||||||
'file': '48863.flv',
|
|
||||||
'md5': '29aca1e47ae68fc28804aca89f29507e',
|
'md5': '29aca1e47ae68fc28804aca89f29507e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '48863',
|
||||||
|
'ext': 'flv',
|
||||||
'title': 'Ready To Go',
|
'title': 'Ready To Go',
|
||||||
},
|
},
|
||||||
'skip': 'Only available from China',
|
'skip': 'Only available from China',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
|
title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
|
||||||
|
@ -7,10 +7,6 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
|
||||||
)
|
|
||||||
from ..aes import (
|
|
||||||
aes_decrypt_text
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
|
||||||
'file': '1214711.mp4',
|
|
||||||
'md5': '6e297b7e789329923fcf83abb67c9289',
|
'md5': '6e297b7e789329923fcf83abb67c9289',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '1214711',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Petite Asian Lady Mai Playing In Bathtub',
|
'title': 'Petite Asian Lady Mai Playing In Bathtub',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):
|
|||||||
embedded_url = mobj.group(1)
|
embedded_url = mobj.group(1)
|
||||||
return self.url_result(embedded_url)
|
return self.url_result(embedded_url)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title')
|
video_title = self._html_search_regex(
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, 'video_url'))
|
r'<h1 [^>]*>([^<]+)', webpage, 'title')
|
||||||
if 'encrypted=true' in webpage:
|
video_url = self._html_search_regex(
|
||||||
password = self._html_search_regex(r'video_title=(.+?)&', webpage, 'password')
|
r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')
|
||||||
video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8')
|
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
extension = os.path.splitext(path)[1][1:]
|
||||||
format = path.split('/')[4].split('_')[:2]
|
format = path.split('/')[4].split('_')[:2]
|
||||||
|
@ -2,18 +2,17 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
unescapeHTML,
|
js_to_json,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class KrasViewIE(InfoExtractor):
|
class KrasViewIE(InfoExtractor):
|
||||||
IE_DESC = 'Красвью'
|
IE_DESC = 'Красвью'
|
||||||
_VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://krasview.ru/video/512228',
|
'url': 'http://krasview.ru/video/512228',
|
||||||
@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
flashvars = json.loads(self._search_regex(
|
flashvars = json.loads(js_to_json(self._search_regex(
|
||||||
r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
|
r'video_Init\(({.+?})', webpage, 'flashvars')))
|
||||||
|
|
||||||
video_url = flashvars['url']
|
video_url = flashvars['url']
|
||||||
title = unescapeHTML(flashvars['title'])
|
title = self._og_search_title(webpage)
|
||||||
description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
|
description = self._og_search_description(webpage, default=None)
|
||||||
thumbnail = flashvars['image']
|
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
||||||
duration = int(flashvars['duration'])
|
duration = int_or_none(flashvars.get('duration'))
|
||||||
filesize = int(flashvars['size'])
|
|
||||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
||||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
||||||
|
|
||||||
@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'filesize': filesize,
|
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
|
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
|
||||||
'file': '50355319.mp4',
|
|
||||||
'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
|
'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '50355319',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'IL DIVO',
|
'title': 'IL DIVO',
|
||||||
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
|
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
|
||||||
'duration': 6254,
|
'duration': 6254,
|
||||||
@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
|
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
|
||||||
doc = self._download_xml(xml_url, video_id)
|
doc = self._download_xml(xml_url, video_id)
|
||||||
|
|
||||||
|
@ -8,20 +8,20 @@ from ..utils import int_or_none
|
|||||||
|
|
||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
class LiveLeakIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
|
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
||||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
'md5': '50f79e05ba149149c1b4ea961223d5b3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '757_1364311680',
|
'id': '757_1364311680',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'description': 'extremely bad day for this guy..!',
|
'description': 'extremely bad day for this guy..!',
|
||||||
'uploader': 'ljfriel2',
|
'uploader': 'ljfriel2',
|
||||||
'title': 'Most unlucky car accident'
|
'title': 'Most unlucky car accident'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
||||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
'md5': 'b13a29626183c9d33944e6a04f41aafc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'f93_1390833151',
|
'id': 'f93_1390833151',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -43,8 +43,7 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('video_id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
||||||
@ -81,9 +80,19 @@ class LiveLeakIE(InfoExtractor):
|
|||||||
sources = json.loads(sources_json)
|
sources = json.loads(sources_json)
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
|
'format_id': '%s' % i,
|
||||||
'format_note': s.get('label'),
|
'format_note': s.get('label'),
|
||||||
'url': s['file'],
|
'url': s['file'],
|
||||||
} for s in sources]
|
} for i, s in enumerate(sources)]
|
||||||
|
for i, s in enumerate(sources):
|
||||||
|
orig_url = s['file'].replace('.h264_base.mp4', '')
|
||||||
|
if s['file'] != orig_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'original-%s' % i,
|
||||||
|
'format_note': s.get('label'),
|
||||||
|
'url': orig_url,
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -6,13 +6,12 @@ import re
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LnkGoIE(InfoExtractor):
|
class LnkGoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi\-video/(?P<show>[^/]+)/ziurek\-(?P<display_id>[A-Za-z0-9\-]+)'
|
_VALID_URL = r'https?://(?:www\.)?lnkgo\.alfa\.lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -51,8 +50,7 @@ class LnkGoIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, display_id, 'Downloading player webpage')
|
url, display_id, 'Downloading player webpage')
|
||||||
@ -61,6 +59,8 @@ class LnkGoIE(InfoExtractor):
|
|||||||
r'data-ep="([^"]+)"', webpage, 'video ID')
|
r'data-ep="([^"]+)"', webpage, 'video ID')
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
thumbnail_w = int_or_none(
|
thumbnail_w = int_or_none(
|
||||||
self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
|
self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
|
||||||
@ -75,39 +75,28 @@ class LnkGoIE(InfoExtractor):
|
|||||||
'height': thumbnail_h,
|
'height': thumbnail_h,
|
||||||
})
|
})
|
||||||
|
|
||||||
upload_date = unified_strdate(self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'class="meta-item\sair-time">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
|
r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
|
||||||
duration = int_or_none(self._search_regex(
|
|
||||||
r'VideoDuration = "([^"]+)"', webpage, 'duration', fatal=False))
|
|
||||||
|
|
||||||
pg_rating = self._search_regex(
|
if config.get('pGeo'):
|
||||||
r'pgrating="([^"]+)"', webpage, 'PG rating', fatal=False, default='')
|
self.report_warning(
|
||||||
age_limit = self._AGE_LIMITS.get(pg_rating.upper(), 0)
|
'This content might not be available in your country due to copyright reasons')
|
||||||
|
|
||||||
sources_js = self._search_regex(
|
formats = [{
|
||||||
r'(?s)sources:\s(\[.*?\]),', webpage, 'sources')
|
'format_id': 'hls',
|
||||||
sources = self._parse_json(
|
'ext': 'mp4',
|
||||||
sources_js, video_id, transform_source=js_to_json)
|
'url': config['EpisodeVideoLink_HLS'],
|
||||||
|
}]
|
||||||
|
|
||||||
formats = []
|
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
|
||||||
for source in sources:
|
if m:
|
||||||
if source.get('provider') == 'rtmp':
|
formats.append({
|
||||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', source['file'])
|
'format_id': 'rtmp',
|
||||||
if not m:
|
'ext': 'flv',
|
||||||
continue
|
'url': m.group('url'),
|
||||||
formats.append({
|
'play_path': m.group('play_path'),
|
||||||
'format_id': 'rtmp',
|
'page_url': url,
|
||||||
'ext': 'flv',
|
})
|
||||||
'url': m.group('url'),
|
|
||||||
'play_path': m.group('play_path'),
|
|
||||||
'page_url': url,
|
|
||||||
})
|
|
||||||
elif source.get('file').endswith('.m3u8'):
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'hls',
|
|
||||||
'ext': source.get('type', 'mp4'),
|
|
||||||
'url': source['file'],
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@ -117,8 +106,8 @@ class LnkGoIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnails': [thumbnail],
|
'thumbnails': [thumbnail],
|
||||||
'duration': duration,
|
'duration': int_or_none(config.get('VideoTime')),
|
||||||
'description': description,
|
'description': description,
|
||||||
'age_limit': age_limit,
|
'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
|||||||
} for format_id, video_url in prioritized_streams['0'].items()
|
} for format_id, video_url in prioritized_streams['0'].items()
|
||||||
])
|
])
|
||||||
|
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
if self._downloader.params.get('listsubtitles', False):
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
|
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
|
||||||
'file': '2450.m4v',
|
|
||||||
'md5': '8649b8ea684b6666b4c5be736ecddc61',
|
'md5': '8649b8ea684b6666b4c5be736ecddc61',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '2450',
|
||||||
|
'ext': 'm4v',
|
||||||
'title': 'Crow',
|
'title': 'Crow',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, 'Downloading trailer page')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading trailer page')
|
if '>Missing Media<' in webpage:
|
||||||
|
raise ExtractorError(
|
||||||
if re.search(r'>Missing Media<', webpage) is not None:
|
'Trailer %s does not exist' % video_id, expected=True)
|
||||||
raise ExtractorError('Trailer %s does not exist' % video_id, expected=True)
|
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
|
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
|
||||||
|
@ -9,7 +9,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
int_or_none,
|
str_to_int,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -85,15 +85,17 @@ class MixcloudIE(InfoExtractor):
|
|||||||
uploader_id = self._search_regex(
|
uploader_id = self._search_regex(
|
||||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
like_count = int_or_none(self._search_regex(
|
like_count = str_to_int(self._search_regex(
|
||||||
r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
[r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
||||||
|
r'/favorites/?">([0-9]+)<'],
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
view_count = int_or_none(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||||
|
r'/listeners/?">([0-9,.]+)</a>'],
|
||||||
webpage, 'play count', fatal=False))
|
webpage, 'play count', fatal=False))
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
||||||
webpage, 'upload date'))
|
webpage, 'upload date', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
|
@ -1,21 +1,19 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class MporaIE(InfoExtractor):
|
class MporaIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
|
_VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'
|
||||||
IE_NAME = 'MPORA'
|
IE_NAME = 'MPORA'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
|
'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de',
|
||||||
'file': 'AAdo8okx4wiz.mp4',
|
|
||||||
'md5': 'a7a228473eedd3be741397cf452932eb',
|
'md5': 'a7a228473eedd3be741397cf452932eb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'AAdo8okx4wiz',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Katy Curd - Winter in the Forest',
|
'title': 'Katy Curd - Winter in the Forest',
|
||||||
'duration': 416,
|
'duration': 416,
|
||||||
'uploader': 'Peter Newman Media',
|
'uploader': 'Peter Newman Media',
|
||||||
@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = m.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data_json = self._search_regex(
|
data_json = self._search_regex(
|
||||||
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
|
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
|
||||||
|
data = self._parse_json(data_json, video_id)
|
||||||
data = json.loads(data_json)
|
|
||||||
|
|
||||||
uploader = data['info_overlay'].get('username')
|
uploader = data['info_overlay'].get('username')
|
||||||
duration = data['video']['duration'] // 1000
|
duration = data['video']['duration'] // 1000
|
||||||
|
@ -2,10 +2,11 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -22,7 +23,7 @@ def _media_xml_tag(tag):
|
|||||||
return '{http://search.yahoo.com/mrss/}%s' % tag
|
return '{http://search.yahoo.com/mrss/}%s' % tag
|
||||||
|
|
||||||
|
|
||||||
class MTVServicesInfoExtractor(InfoExtractor):
|
class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
|
||||||
_MOBILE_TEMPLATE = None
|
_MOBILE_TEMPLATE = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -53,7 +54,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||||
req = compat_urllib_request.Request(webpage_url)
|
req = compat_urllib_request.Request(webpage_url)
|
||||||
# Otherwise we get a webpage that would execute some javascript
|
# Otherwise we get a webpage that would execute some javascript
|
||||||
req.add_header('Youtubedl-user-agent', 'curl/7')
|
req.add_header('User-Agent', 'curl/7')
|
||||||
webpage = self._download_webpage(req, mtvn_id,
|
webpage = self._download_webpage(req, mtvn_id,
|
||||||
'Downloading mobile page')
|
'Downloading mobile page')
|
||||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||||
@ -78,17 +79,42 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
try:
|
try:
|
||||||
_, _, ext = rendition.attrib['type'].partition('/')
|
_, _, ext = rendition.attrib['type'].partition('/')
|
||||||
rtmp_video_url = rendition.find('./src').text
|
rtmp_video_url = rendition.find('./src').text
|
||||||
formats.append({'ext': ext,
|
if rtmp_video_url.endswith('siteunavail.png'):
|
||||||
'url': self._transform_rtmp_url(rtmp_video_url),
|
continue
|
||||||
'format_id': rendition.get('bitrate'),
|
formats.append({
|
||||||
'width': int(rendition.get('width')),
|
'ext': ext,
|
||||||
'height': int(rendition.get('height')),
|
'url': self._transform_rtmp_url(rtmp_video_url),
|
||||||
})
|
'format_id': rendition.get('bitrate'),
|
||||||
|
'width': int(rendition.get('width')),
|
||||||
|
'height': int(rendition.get('height')),
|
||||||
|
})
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
raise ExtractorError('Invalid rendition field.')
|
raise ExtractorError('Invalid rendition field.')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _extract_subtitles(self, mdoc, mtvn_id):
|
||||||
|
subtitles = {}
|
||||||
|
FORMATS = {
|
||||||
|
'scc': 'cea-608',
|
||||||
|
'eia-608': 'cea-608',
|
||||||
|
'xml': 'ttml',
|
||||||
|
}
|
||||||
|
subtitles_format = FORMATS.get(
|
||||||
|
self._downloader.params.get('subtitlesformat'), 'ttml')
|
||||||
|
for transcript in mdoc.findall('.//transcript'):
|
||||||
|
if transcript.get('kind') != 'captions':
|
||||||
|
continue
|
||||||
|
lang = transcript.get('srclang')
|
||||||
|
for typographic in transcript.findall('./typographic'):
|
||||||
|
captions_format = typographic.get('format')
|
||||||
|
if captions_format == subtitles_format:
|
||||||
|
subtitles[lang] = compat_str(typographic.get('src'))
|
||||||
|
break
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(mtvn_id, subtitles)
|
||||||
|
return self.extract_subtitles(mtvn_id, subtitles)
|
||||||
|
|
||||||
def _get_video_info(self, itemdoc):
|
def _get_video_info(self, itemdoc):
|
||||||
uri = itemdoc.find('guid').text
|
uri = itemdoc.find('guid').text
|
||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
@ -135,6 +161,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
||||||
|
'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||||
'description': description,
|
'description': description,
|
||||||
@ -167,7 +194,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
mgid = self._search_regex(
|
mgid = self._search_regex(
|
||||||
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
[r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
|
||||||
webpage, 'mgid')
|
webpage, 'mgid')
|
||||||
return self._get_videos_info(mgid)
|
|
||||||
|
videos_info = self._get_videos_info(mgid)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
return
|
||||||
|
return videos_info
|
||||||
|
|
||||||
|
|
||||||
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||||
@ -212,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
|
||||||
'file': '853555.mp4',
|
|
||||||
'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
|
'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '853555',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
|
'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
|
||||||
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'add_ie': ['Vevo'],
|
|
||||||
'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
|
|
||||||
'file': 'USCJY1331283.mp4',
|
|
||||||
'md5': '73b4e7fcadd88929292fe52c3ced8caf',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'Everything Has Changed',
|
|
||||||
'upload_date': '20130606',
|
|
||||||
'uploader': 'Taylor Swift',
|
|
||||||
},
|
|
||||||
'skip': 'VEVO is only available in some countries',
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _get_thumbnail_url(self, uri, itemdoc):
|
def _get_thumbnail_url(self, uri, itemdoc):
|
||||||
@ -244,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
# Some videos come from Vevo.com
|
# Some videos come from Vevo.com
|
||||||
m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";',
|
m_vevo = re.search(
|
||||||
webpage, re.DOTALL)
|
r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
|
||||||
if m_vevo:
|
if m_vevo:
|
||||||
vevo_id = m_vevo.group(1)
|
vevo_id = m_vevo.group(1)
|
||||||
self.to_screen('Vevo video detected: %s' % vevo_id)
|
self.to_screen('Vevo video detected: %s' % vevo_id)
|
||||||
|
@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.nerdcubed.co.uk/feed.json',
|
'url': 'http://www.nerdcubed.co.uk/feed.json',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'nerdcubed-feed',
|
||||||
'title': 'nerdcubed.co.uk feed',
|
'title': 'nerdcubed.co.uk feed',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 1300,
|
'playlist_mincount': 1300,
|
||||||
|
163
youtube_dl/extractor/nextmedia.py
Normal file
163
youtube_dl/extractor/nextmedia.py
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_iso8601
|
||||||
|
|
||||||
|
|
||||||
|
class NextMediaIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199',
|
||||||
|
'md5': 'dff9fad7009311c421176d1ac90bfe4f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '53109199',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【佔領金鐘】50外國領事議員撐場 讚學生勇敢香港有希望',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:28222b9912b6665a21011b034c70fcc7',
|
||||||
|
'timestamp': 1415456273,
|
||||||
|
'upload_date': '20141108',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
_URL_PATTERN = r'\{ url: \'(.+)\' \}'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
news_id = self._match_id(url)
|
||||||
|
page = self._download_webpage(url, news_id)
|
||||||
|
return self._extract_from_nextmedia_page(news_id, url, page)
|
||||||
|
|
||||||
|
def _extract_from_nextmedia_page(self, news_id, url, page):
|
||||||
|
title = self._fetch_title(page)
|
||||||
|
video_url = self._search_regex(self._URL_PATTERN, page, 'video url')
|
||||||
|
|
||||||
|
attrs = {
|
||||||
|
'id': news_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url, # ext can be inferred from url
|
||||||
|
'thumbnail': self._fetch_thumbnail(page),
|
||||||
|
'description': self._fetch_description(page),
|
||||||
|
}
|
||||||
|
|
||||||
|
timestamp = self._fetch_timestamp(page)
|
||||||
|
if timestamp:
|
||||||
|
attrs['timestamp'] = timestamp
|
||||||
|
else:
|
||||||
|
attrs['upload_date'] = self._fetch_upload_date(url)
|
||||||
|
|
||||||
|
return attrs
|
||||||
|
|
||||||
|
def _fetch_title(self, page):
|
||||||
|
return self._og_search_title(page)
|
||||||
|
|
||||||
|
def _fetch_thumbnail(self, page):
|
||||||
|
return self._og_search_thumbnail(page)
|
||||||
|
|
||||||
|
def _fetch_timestamp(self, page):
|
||||||
|
dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time')
|
||||||
|
return parse_iso8601(dateCreated)
|
||||||
|
|
||||||
|
def _fetch_upload_date(self, url):
|
||||||
|
return self._search_regex(self._VALID_URL, url, 'upload date', group='date')
|
||||||
|
|
||||||
|
def _fetch_description(self, page):
|
||||||
|
return self._og_search_property('description', page)
|
||||||
|
|
||||||
|
|
||||||
|
class NextMediaActionNewsIE(NextMediaIE):
|
||||||
|
_VALID_URL = r'http://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460',
|
||||||
|
'md5': '05fce8ffeed7a5e00665d4b7cf0f9201',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '19009428',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【壹週刊】細10年男友偷食 50歲邵美琪再失戀',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659',
|
||||||
|
'timestamp': 1421791200,
|
||||||
|
'upload_date': '20150120',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
news_id = self._match_id(url)
|
||||||
|
actionnews_page = self._download_webpage(url, news_id)
|
||||||
|
article_url = self._og_search_url(actionnews_page)
|
||||||
|
article_page = self._download_webpage(article_url, news_id)
|
||||||
|
return self._extract_from_nextmedia_page(news_id, url, article_page)
|
||||||
|
|
||||||
|
|
||||||
|
class AppleDailyRealtimeNewsIE(NextMediaIE):
|
||||||
|
_VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
|
||||||
|
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36354694',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:b23787119933404ce515c6356a8c355c',
|
||||||
|
'upload_date': '20150128',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A',
|
||||||
|
'md5': '86b4e9132d158279c7883822d94ccc49',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '550549',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '不滿被踩腳 山東兩大媽一路打下車',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
|
||||||
|
'upload_date': '20150128',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
_URL_PATTERN = r'\{url: \'(.+)\'\}'
|
||||||
|
|
||||||
|
def _fetch_title(self, page):
|
||||||
|
return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
|
||||||
|
|
||||||
|
def _fetch_thumbnail(self, page):
|
||||||
|
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
|
||||||
|
|
||||||
|
def _fetch_timestamp(self, page):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
|
||||||
|
_VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
|
||||||
|
'md5': '03df296d95dedc2d5886debbb80cb43f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5003671',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '20正妹熱舞 《刀龍傳說Online》火辣上市',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:23c0aac567dc08c9c16a3161a2c2e3cd',
|
||||||
|
'upload_date': '20150128',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# No thumbnail
|
||||||
|
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003673/',
|
||||||
|
'md5': 'b06182cd386ea7bc6115ec7ff0f72aeb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5003673',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '半夜尿尿 好像會看到___',
|
||||||
|
'description': 'md5:61d2da7fe117fede148706cdb85ac066',
|
||||||
|
'upload_date': '20150128',
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'video thumbnail',
|
||||||
|
]
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _fetch_title(self, page):
|
||||||
|
return self._html_search_meta('description', page, 'news title')
|
||||||
|
|
||||||
|
def _fetch_description(self, page):
|
||||||
|
return self._html_search_meta('description', page, 'news description')
|
@ -20,6 +20,12 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
def _fix_json(json_string):
|
def _fix_json(json_string):
|
||||||
return json_string.replace('\\\'', '\'')
|
return json_string.replace('\\\'', '\'')
|
||||||
|
|
||||||
|
def _real_extract_video(self, video_id):
|
||||||
|
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
||||||
|
data = self._download_json(
|
||||||
|
json_url, video_id, transform_source=self._fix_json)
|
||||||
|
return self._extract_video(data[0])
|
||||||
|
|
||||||
def _extract_video(self, info):
|
def _extract_video(self, info):
|
||||||
video_id = info['id']
|
video_id = info['id']
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
@ -54,7 +60,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
class NHLIE(NHLBaseInfoExtractor):
|
class NHLIE(NHLBaseInfoExtractor):
|
||||||
IE_NAME = 'nhl.com'
|
IE_NAME = 'nhl.com'
|
||||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||||
@ -92,15 +98,41 @@ class NHLIE(NHLBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://video.nhl.com/videocenter/?id=736722',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
return self._real_extract_video(video_id)
|
||||||
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
|
|
||||||
data = self._download_json(
|
|
||||||
json_url, video_id, transform_source=self._fix_json)
|
class NHLNewsIE(NHLBaseInfoExtractor):
|
||||||
return self._extract_video(data[0])
|
IE_NAME = 'nhl.com:news'
|
||||||
|
IE_DESC = 'NHL news'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
|
||||||
|
'md5': '4b3d1262e177687a3009937bd9ec0be8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '736722',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cal Clutterbuck has been fined $2,000',
|
||||||
|
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
|
||||||
|
'duration': 37,
|
||||||
|
'upload_date': '20150128',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
news_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, news_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"],
|
||||||
|
webpage, 'video id')
|
||||||
|
return self._real_extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
class NHLVideocenterIE(NHLBaseInfoExtractor):
|
||||||
|
68
youtube_dl/extractor/ntvde.py
Normal file
68
youtube_dl/extractor/ntvde.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NTVDeIE(InfoExtractor):
|
||||||
|
IE_NAME = 'n-tv.de'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
|
||||||
|
'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14438086',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
|
||||||
|
'alt_title': 'Winterchaos auf deutschen Straßen',
|
||||||
|
'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
|
||||||
|
'duration': 4020,
|
||||||
|
'timestamp': 1422892797,
|
||||||
|
'upload_date': '20150202',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
info = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
|
||||||
|
vdata = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
|
||||||
|
webpage, 'player data'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
duration = parse_duration(vdata.get('duration'))
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'flash',
|
||||||
|
'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
|
||||||
|
}, {
|
||||||
|
'format_id': 'mobile',
|
||||||
|
'url': 'http://video.n-tv.de' + vdata['videoMp4'],
|
||||||
|
'tbr': 400, # estimation
|
||||||
|
}]
|
||||||
|
m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', preference=0))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info['headline'],
|
||||||
|
'description': info.get('intro'),
|
||||||
|
'alt_title': info.get('kicker'),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnail': vdata.get('html5VideoPoster'),
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,15 +1,14 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unescapeHTML
|
unescapeHTML
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NTVIE(InfoExtractor):
|
class NTVRuIE(InfoExtractor):
|
||||||
|
IE_NAME = 'ntv.ru'
|
||||||
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
_VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -92,9 +91,7 @@ class NTVIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
page = self._download_webpage(url, video_id)
|
page = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
@ -6,12 +6,13 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class RingTVIE(InfoExtractor):
|
class RingTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
|
"url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30",
|
||||||
"file": "857645.mp4",
|
|
||||||
"md5": "d25945f5df41cdca2d2587165ac28720",
|
"md5": "d25945f5df41cdca2d2587165ac28720",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
'id': '857645',
|
||||||
|
'ext': 'mp4',
|
||||||
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
|
"title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
|
||||||
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
|
"description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
|
||||||
}
|
}
|
||||||
|
@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
|
'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
|
||||||
'file': '613340.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '613340',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'TOY STORY 3',
|
'title': 'TOY STORY 3',
|
||||||
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
|
'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',
|
||||||
},
|
},
|
||||||
|
72
youtube_dl/extractor/rtl2.py
Normal file
72
youtube_dl/extractor/rtl2.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class RTL2IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
|
||||||
|
'md5': 'bfcc179030535b08dc2b36b469b5adc7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'folge-203-0',
|
||||||
|
'ext': 'f4v',
|
||||||
|
'title': 'GRIP sucht den Sommerkönig',
|
||||||
|
'description': 'Matthias, Det und Helge treten gegeneinander an.'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
|
||||||
|
'md5': 'ffcd517d2805b57ce11a58a2980c2b02',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '21040-anna-erwischt-alex',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Anna erwischt Alex!',
|
||||||
|
'description': 'Anna ist Alex\' Tochter bei Köln 50667.'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
# Some rtl2 urls have no slash at the end, so append it.
|
||||||
|
if not url.endswith('/'):
|
||||||
|
url += '/'
|
||||||
|
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
vico_id = self._html_search_regex(
|
||||||
|
r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
|
||||||
|
vivi_id = self._html_search_regex(
|
||||||
|
r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
|
||||||
|
info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id
|
||||||
|
webpage = self._download_webpage(info_url, '')
|
||||||
|
|
||||||
|
info = self._download_json(info_url, video_id)
|
||||||
|
video_info = info['video']
|
||||||
|
title = video_info['titel']
|
||||||
|
description = video_info.get('beschreibung')
|
||||||
|
thumbnail = video_info.get('image')
|
||||||
|
|
||||||
|
download_url = video_info['streamurl']
|
||||||
|
download_url = download_url.replace('\\', '')
|
||||||
|
stream_url = 'mp4:' + self._html_search_regex(r'ondemand/(.*)', download_url, 'stream URL')
|
||||||
|
rtmp_conn = ["S:connect", "O:1", "NS:pageUrl:" + url, "NB:fpad:0", "NN:videoFunction:1", "O:0"]
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': download_url,
|
||||||
|
'play_path': stream_url,
|
||||||
|
'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
|
||||||
|
'page_url': url,
|
||||||
|
'flash_version': 'LNX 11,2,202,429',
|
||||||
|
'rtmp_conn': rtmp_conn,
|
||||||
|
'no_resume': True,
|
||||||
|
}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -57,7 +57,7 @@ def _decrypt_url(png):
|
|||||||
class RTVEALaCartaIE(InfoExtractor):
|
class RTVEALaCartaIE(InfoExtractor):
|
||||||
IE_NAME = 'rtve.es:alacarta'
|
IE_NAME = 'rtve.es:alacarta'
|
||||||
IE_DESC = 'RTVE a la carta'
|
IE_DESC = 'RTVE a la carta'
|
||||||
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'http://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||||
@ -74,7 +74,11 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'id': '1694255',
|
'id': '1694255',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'TODO',
|
'title': 'TODO',
|
||||||
}
|
},
|
||||||
|
'skip': 'The f4m manifest can\'t be used yet',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -86,6 +90,18 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
video_url = _decrypt_url(png)
|
video_url = _decrypt_url(png)
|
||||||
|
if not video_url.endswith('.f4m'):
|
||||||
|
auth_url = video_url.replace(
|
||||||
|
'resources/', 'auth/resources/'
|
||||||
|
).replace('.net.rtve', '.multimedia.cdn.rtve')
|
||||||
|
video_path = self._download_webpage(
|
||||||
|
auth_url, video_id, 'Getting video url')
|
||||||
|
# Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get
|
||||||
|
# the right Content-Length header and the mp4 format
|
||||||
|
video_url = (
|
||||||
|
'http://mvod.akcdn.rtve.es/{0}&v=2.6.8'
|
||||||
|
'&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path)
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):
|
|||||||
'vbr': int(quality),
|
'vbr': int(quality),
|
||||||
}
|
}
|
||||||
elif transport == 'm3u8':
|
elif transport == 'm3u8':
|
||||||
fmt = {
|
formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4'))
|
||||||
'url': url,
|
continue
|
||||||
'ext': 'mp4',
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
fmt = {
|
fmt = {
|
||||||
'url': url
|
'url': url
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
|
'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5349193',
|
||||||
|
'title': 'AdAPPter_Hyundai_demo',
|
||||||
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'file': '29955898.flv',
|
|
||||||
'md5': 'baed851342df6846eb8677a60a011a0f',
|
'md5': 'baed851342df6846eb8677a60a011a0f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '29955898',
|
||||||
|
'ext': 'flv',
|
||||||
'title': 'AdAPPter_Hyundai_demo (1)',
|
'title': 'AdAPPter_Hyundai_demo (1)',
|
||||||
'duration': 74,
|
'duration': 74,
|
||||||
'tbr': 1378,
|
'tbr': 1378,
|
||||||
@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):
|
|||||||
'height': 400,
|
'height': 400,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'file': '29907998.flv',
|
|
||||||
'md5': '979b4da2655c4bc2d81aeb915a8c5014',
|
'md5': '979b4da2655c4bc2d81aeb915a8c5014',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '29907998',
|
||||||
|
'ext': 'flv',
|
||||||
'title': 'AdAPPter_Hyundai_demo (2)',
|
'title': 'AdAPPter_Hyundai_demo (2)',
|
||||||
'duration': 34,
|
'duration': 34,
|
||||||
'width': 854,
|
'width': 854,
|
||||||
@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2,
|
'playlistend': 2,
|
||||||
},
|
},
|
||||||
'skip': 'Blocked in the US [sic]',
|
'_skip': 'Blocked in the US [sic]',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
pl_id = self._match_id(url)
|
||||||
pl_id = mobj.group('id')
|
|
||||||
|
|
||||||
vast_doc = self._download_xml(url, pl_id)
|
vast_doc = self._download_xml(url, pl_id)
|
||||||
|
|
||||||
title = vast_doc.find('.//AdTitle').text
|
title = vast_doc.find('.//AdTitle').text
|
||||||
media = vast_doc.find('.//MediaFile').text
|
media = vast_doc.find('.//MediaFile').text
|
||||||
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
|
info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
|
||||||
|
@ -11,7 +11,7 @@ from ..compat import (
|
|||||||
|
|
||||||
|
|
||||||
class SinaIE(InfoExtractor):
|
class SinaIE(InfoExtractor):
|
||||||
_VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
|
_VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/
|
||||||
(
|
(
|
||||||
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
|
(.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))
|
||||||
|
|
|
|
||||||
@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
|
'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
|
||||||
'file': '110028898.flv',
|
|
||||||
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
|
'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '110028898',
|
||||||
|
'ext': 'flv',
|
||||||
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
|
'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
|
||||||
|
|
||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||||
@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
if mobj.group('token') is not None:
|
if mobj.group('token') is not None:
|
||||||
# The video id is in the redirected url
|
# The video id is in the redirected url
|
||||||
|
@ -102,12 +102,13 @@ class SmotriIE(InfoExtractor):
|
|||||||
'uploader_id': 'mopeder',
|
'uploader_id': 'mopeder',
|
||||||
'duration': 71,
|
'duration': 71,
|
||||||
'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
|
'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
|
||||||
|
'upload_date': '20150114',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# swf player
|
# swf player
|
||||||
{
|
{
|
||||||
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
|
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
|
||||||
'md5': '4d47034979d9390d14acdf59c4935bc2',
|
'md5': '31099eeb4bc906712c5f40092045108d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'v9188090500',
|
'id': 'v9188090500',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -138,9 +139,6 @@ class SmotriIE(InfoExtractor):
|
|||||||
def _search_meta(self, name, html, display_name=None):
|
def _search_meta(self, name, html, display_name=None):
|
||||||
if display_name is None:
|
if display_name is None:
|
||||||
display_name = name
|
display_name = name
|
||||||
return self._html_search_regex(
|
|
||||||
r'<meta itemprop="%s" content="([^"]+)" />' % re.escape(name),
|
|
||||||
html, display_name, fatal=False)
|
|
||||||
return self._html_search_meta(name, html, display_name)
|
return self._html_search_meta(name, html, display_name)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '2284613',
|
||||||
'title': 'The Royal Concept EP',
|
'title': 'The Royal Concept EP',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
|||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
||||||
'id': info['id'],
|
'id': '%s' % info['id'],
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,14 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_urlparse
|
||||||
compat_urlparse,
|
|
||||||
compat_HTTPError,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
HEADRequest,
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
|
|
||||||
|
|
||||||
@ -72,16 +65,6 @@ class SpiegelIE(InfoExtractor):
|
|||||||
if n.tag.startswith('type') and n.tag != 'type6':
|
if n.tag.startswith('type') and n.tag != 'type6':
|
||||||
format_id = n.tag.rpartition('type')[2]
|
format_id = n.tag.rpartition('type')[2]
|
||||||
video_url = base_url + n.find('./filename').text
|
video_url = base_url + n.find('./filename').text
|
||||||
# Test video URLs beforehand as some of them are invalid
|
|
||||||
try:
|
|
||||||
self._request_webpage(
|
|
||||||
HEADRequest(video_url), video_id,
|
|
||||||
'Checking %s video URL' % format_id)
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
|
||||||
self.report_warning(
|
|
||||||
'%s video URL is invalid, skipping' % format_id, video_id)
|
|
||||||
continue
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@ -94,6 +77,7 @@ class SpiegelIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
duration = float(idoc[0].findall('./duration')[0].text)
|
duration = float(idoc[0].findall('./duration')[0].text)
|
||||||
|
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -1,14 +1,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class SpikeIE(MTVServicesInfoExtractor):
|
class SpikeIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(www\.spike\.com/(video-clips|episodes)/.+|
|
(?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+|
|
||||||
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
|
m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+))
|
||||||
'''
|
'''
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||||
@ -25,8 +23,7 @@ class SpikeIE(MTVServicesInfoExtractor):
|
|||||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.search(self._VALID_URL, url)
|
mobile_id = self._match_id(url)
|
||||||
mobile_id = mobj.group('mobile_id')
|
if mobile_id:
|
||||||
if mobile_id is not None:
|
|
||||||
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||||
return super(SpikeIE, self)._real_extract(url)
|
return super(SpikeIE, self)._real_extract(url)
|
||||||
|
@ -8,7 +8,7 @@ from ..utils import js_to_json
|
|||||||
|
|
||||||
|
|
||||||
class SRMediathekIE(InfoExtractor):
|
class SRMediathekIE(InfoExtractor):
|
||||||
IE_DESC = 'Süddeutscher Rundfunk'
|
IE_DESC = 'Saarländischer Rundfunk'
|
||||||
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
||||||
'file': '80187.mp4',
|
|
||||||
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '80187',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||||
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||||
'file': '19705.mp4',
|
|
||||||
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '19705',
|
||||||
|
'ext': 'mp4',
|
||||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||||
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
||||||
}
|
}
|
||||||
@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
video_id = mobj.group("video_id")
|
video_id = mobj.group("video_id")
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
r'data-node-id="(\d+?)"',
|
r'<div\s+class="player".*?data-id="(\d+?)"',
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
|
@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):
|
|||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
|
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '26168',
|
||||||
'title': 'Duplicate Detection',
|
'title': 'Duplicate Detection',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lecture_id = self._match_id(url)
|
lecture_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, lecture_id)
|
webpage = self._download_webpage(url, lecture_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestTubeIE(InfoExtractor):
|
class TestTubeIE(InfoExtractor):
|
||||||
@ -46,13 +49,22 @@ class TestTubeIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
duration = int_or_none(info.get('duration'))
|
duration = int_or_none(info.get('duration'))
|
||||||
|
images = info.get('images')
|
||||||
|
thumbnails = None
|
||||||
|
preference = qualities(['mini', 'small', 'medium', 'large'])
|
||||||
|
if images:
|
||||||
|
thumbnails = [{
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': img_url,
|
||||||
|
'preference': preference(thumbnail_id)
|
||||||
|
} for thumbnail_id, img_url in images.items()]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'description': info.get('summary'),
|
'description': info.get('summary'),
|
||||||
'thumbnail': info.get('images', {}).get('large'),
|
'thumbnails': thumbnails,
|
||||||
'uploader': info.get('show', {}).get('name'),
|
'uploader': info.get('show', {}).get('name'),
|
||||||
'uploader_id': info.get('show', {}).get('slug'),
|
'uploader_id': info.get('show', {}).get('slug'),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.tou.tv/30-vies/S04E41',
|
'url': 'http://www.tou.tv/30-vies/S04E41',
|
||||||
'file': '30-vies_S04E41.mp4',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '30-vies_S04E41',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '30 vies Saison 4 / Épisode 41',
|
'title': '30 vies Saison 4 / Épisode 41',
|
||||||
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
'description': 'md5:da363002db82ccbe4dafeb9cab039b09',
|
||||||
'age_limit': 8,
|
'age_limit': 8,
|
||||||
|
@ -220,12 +220,18 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
|||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
self._PLAYLIST_URL % (channel_id, offset, limit),
|
self._PLAYLIST_URL % (channel_id, offset, limit),
|
||||||
channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
|
channel_id, 'Downloading %s videos JSON page %d' % (self._PLAYLIST_TYPE, counter))
|
||||||
videos = response['videos']
|
page_entries = self._extract_playlist_page(response)
|
||||||
if not videos:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
entries.extend([self.url_result(video['url']) for video in videos])
|
entries.extend(page_entries)
|
||||||
offset += limit
|
offset += limit
|
||||||
return self.playlist_result(entries, channel_id, channel_name)
|
return self.playlist_result(
|
||||||
|
[self.url_result(entry) for entry in set(entries)],
|
||||||
|
channel_id, channel_name)
|
||||||
|
|
||||||
|
def _extract_playlist_page(self, response):
|
||||||
|
videos = response.get('videos')
|
||||||
|
return [video['url'] for video in videos] if videos else []
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self._extract_playlist(self._match_id(url))
|
return self._extract_playlist(self._match_id(url))
|
||||||
@ -262,6 +268,31 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TwitchBookmarksIE(TwitchPlaylistBaseIE):
|
||||||
|
IE_NAME = 'twitch:bookmarks'
|
||||||
|
_VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
_PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE
|
||||||
|
_PLAYLIST_TYPE = 'bookmarks'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.twitch.tv/ognos/profile/bookmarks',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ognos',
|
||||||
|
'title': 'Ognos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_playlist_page(self, response):
|
||||||
|
entries = []
|
||||||
|
for bookmark in response.get('bookmarks', []):
|
||||||
|
video = bookmark.get('video')
|
||||||
|
if not video:
|
||||||
|
continue
|
||||||
|
entries.append(video['url'])
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
class TwitchStreamIE(TwitchBaseIE):
|
class TwitchStreamIE(TwitchBaseIE):
|
||||||
IE_NAME = 'twitch:stream'
|
IE_NAME = 'twitch:stream'
|
||||||
_VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
_VALID_URL = r'%s/(?P<id>[^/]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
|
||||||
|
@ -3,50 +3,51 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
qualities,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class UbuIE(InfoExtractor):
|
class UbuIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
|
_VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://ubu.com/film/her_noise.html',
|
'url': 'http://ubu.com/film/her_noise.html',
|
||||||
'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
|
'md5': '138d5652618bf0f03878978db9bef1ee',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'her_noise',
|
'id': 'her_noise',
|
||||||
'ext': 'mp4',
|
'ext': 'm4v',
|
||||||
'title': 'Her Noise - The Making Of (2007)',
|
'title': 'Her Noise - The Making Of (2007)',
|
||||||
'duration': 3600,
|
'duration': 3600,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title')
|
r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title')
|
||||||
|
|
||||||
duration = int_or_none(self._html_search_regex(
|
duration = int_or_none(self._html_search_regex(
|
||||||
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
|
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
|
||||||
if duration:
|
invscale=60)
|
||||||
duration *= 60
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
FORMAT_REGEXES = [
|
FORMAT_REGEXES = [
|
||||||
['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
|
('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
|
||||||
['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
|
('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
|
||||||
]
|
]
|
||||||
|
preference = qualities([fid for fid, _ in FORMAT_REGEXES])
|
||||||
for format_id, format_regex in FORMAT_REGEXES:
|
for format_id, format_regex in FORMAT_REGEXES:
|
||||||
m = re.search(format_regex, webpage)
|
m = re.search(format_regex, webpage)
|
||||||
if m:
|
if m:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': m.group(1),
|
'url': m.group(1),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
'preference': preference(format_id),
|
||||||
})
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -9,6 +9,7 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -192,9 +193,29 @@ class VevoIE(InfoExtractor):
|
|||||||
# Download via HLS API
|
# Download via HLS API
|
||||||
formats.extend(self._download_api_formats(video_id))
|
formats.extend(self._download_api_formats(video_id))
|
||||||
|
|
||||||
|
# Download SMIL
|
||||||
|
smil_blocks = sorted((
|
||||||
|
f for f in video_info['videoVersions']
|
||||||
|
if f['sourceType'] == 13),
|
||||||
|
key=lambda f: f['version'])
|
||||||
|
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||||
|
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||||
|
if smil_blocks:
|
||||||
|
smil_url_m = self._search_regex(
|
||||||
|
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
||||||
|
default=None)
|
||||||
|
if smil_url_m is not None:
|
||||||
|
smil_url = smil_url_m
|
||||||
|
if smil_url:
|
||||||
|
smil_xml = self._download_webpage(
|
||||||
|
smil_url, video_id, 'Downloading SMIL info', fatal=False)
|
||||||
|
if smil_xml:
|
||||||
|
formats.extend(self._formats_from_smil(smil_xml))
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int_or_none(self._search_regex(
|
||||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
r'/Date\((\d+)\)/',
|
||||||
|
video_info['launchDate'], 'launch date', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -5,27 +5,58 @@ from ..utils import (
|
|||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_request
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ViddlerIE(InfoExtractor):
|
class ViddlerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
"url": "http://www.viddler.com/v/43903784",
|
'url': 'http://www.viddler.com/v/43903784',
|
||||||
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '43903784',
|
'id': '43903784',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
"title": "Video Made Easy",
|
'title': 'Video Made Easy',
|
||||||
'description': 'You don\'t need to be a professional to make high-quality video content. Viddler provides some quick and easy tips on how to produce great video content with limited resources. ',
|
'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd',
|
||||||
"uploader": "viddler",
|
'uploader': 'viddler',
|
||||||
'timestamp': 1335371429,
|
'timestamp': 1335371429,
|
||||||
'upload_date': '20120425',
|
'upload_date': '20120425',
|
||||||
"duration": 100.89,
|
'duration': 100.89,
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
|
'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'],
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.viddler.com/v/4d03aad9/',
|
||||||
|
'md5': 'faa71fbf70c0bee7ab93076fd007f4b0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4d03aad9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'WALL-TO-GORTAT',
|
||||||
|
'upload_date': '20150126',
|
||||||
|
'uploader': 'deadspin',
|
||||||
|
'timestamp': 1422285291,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.viddler.com/player/221ebbbd/0/',
|
||||||
|
'md5': '0defa2bd0ea613d14a6e9bd1db6be326',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '221ebbbd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'LETeens-Grammar-snack-third-conditional',
|
||||||
|
'description': ' ',
|
||||||
|
'upload_date': '20140929',
|
||||||
|
'uploader': 'BCLETeens',
|
||||||
|
'timestamp': 1411997190,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
@ -33,14 +64,17 @@ class ViddlerIE(InfoExtractor):
|
|||||||
json_url = (
|
json_url = (
|
||||||
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' %
|
||||||
video_id)
|
video_id)
|
||||||
data = self._download_json(json_url, video_id)['video']
|
headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
|
||||||
|
request = compat_urllib_request.Request(json_url, None, headers)
|
||||||
|
data = self._download_json(request, video_id)['video']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for filed in data['files']:
|
for filed in data['files']:
|
||||||
if filed.get('status', 'ready') != 'ready':
|
if filed.get('status', 'ready') != 'ready':
|
||||||
continue
|
continue
|
||||||
|
format_id = filed.get('profile_id') or filed['profile_name']
|
||||||
f = {
|
f = {
|
||||||
'format_id': filed['profile_id'],
|
'format_id': format_id,
|
||||||
'format_note': filed['profile_name'],
|
'format_note': filed['profile_name'],
|
||||||
'url': self._proto_relative_url(filed['url']),
|
'url': self._proto_relative_url(filed['url']),
|
||||||
'width': int_or_none(filed.get('width')),
|
'width': int_or_none(filed.get('width')),
|
||||||
@ -53,16 +87,15 @@ class ViddlerIE(InfoExtractor):
|
|||||||
|
|
||||||
if filed.get('cdn_url'):
|
if filed.get('cdn_url'):
|
||||||
f = f.copy()
|
f = f.copy()
|
||||||
f['url'] = self._proto_relative_url(filed['cdn_url'])
|
f['url'] = self._proto_relative_url(filed['cdn_url'], 'http:')
|
||||||
f['format_id'] = filed['profile_id'] + '-cdn'
|
f['format_id'] = format_id + '-cdn'
|
||||||
f['source_preference'] = 1
|
f['source_preference'] = 1
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
if filed.get('html5_video_source'):
|
if filed.get('html5_video_source'):
|
||||||
f = f.copy()
|
f = f.copy()
|
||||||
f['url'] = self._proto_relative_url(
|
f['url'] = self._proto_relative_url(filed['html5_video_source'])
|
||||||
filed['html5_video_source'])
|
f['format_id'] = format_id + '-html5'
|
||||||
f['format_id'] = filed['profile_id'] + '-html5'
|
|
||||||
f['source_preference'] = 0
|
f['source_preference'] = 0
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@ -71,7 +104,6 @@ class ViddlerIE(InfoExtractor):
|
|||||||
t.get('text') for t in data.get('tags', []) if 'text' in t]
|
t.get('text') for t in data.get('tags', []) if 'text' in t]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': data['title'],
|
'title': data['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@ -81,5 +113,6 @@ class ViddlerIE(InfoExtractor):
|
|||||||
'uploader': data.get('author'),
|
'uploader': data.get('author'),
|
||||||
'duration': float_or_none(data.get('length')),
|
'duration': float_or_none(data.get('length')),
|
||||||
'view_count': int_or_none(data.get('view_count')),
|
'view_count': int_or_none(data.get('view_count')),
|
||||||
|
'comment_count': int_or_none(data.get('comment_count')),
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
}
|
}
|
||||||
|
@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'http_referer': iframe_url,
|
'http_headers': {
|
||||||
|
'Referer': iframe_url,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
@ -13,9 +13,9 @@ from ..utils import (
|
|||||||
class VideoTtIE(InfoExtractor):
|
class VideoTtIE(InfoExtractor):
|
||||||
ID_NAME = 'video.tt'
|
ID_NAME = 'video.tt'
|
||||||
IE_DESC = 'video.tt - Your True Tube'
|
IE_DESC = 'video.tt - Your True Tube'
|
||||||
_VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
|
_VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
|
'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
|
||||||
'md5': 'b13aa9e2f267effb5d1094443dff65ba',
|
'md5': 'b13aa9e2f267effb5d1094443dff65ba',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -26,7 +26,10 @@ class VideoTtIE(InfoExtractor):
|
|||||||
'upload_date': '20130827',
|
'upload_date': '20130827',
|
||||||
'uploader': 'joseph313',
|
'uploader': 'joseph313',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://video.tt/embed/amd5YujV8',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
|
_VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||||
'file': '75524534.mp4',
|
|
||||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '75524534',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': "DICK HARDWICK 'Comedian'",
|
'title': "DICK HARDWICK 'Comedian'",
|
||||||
'uploader': 'Richard Hardwick',
|
'uploader': 'Richard Hardwick',
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -11,9 +12,10 @@ from ..utils import (
|
|||||||
|
|
||||||
class WashingtonPostIE(InfoExtractor):
|
class WashingtonPostIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'sinkhole-of-bureaucracy',
|
||||||
'title': 'Sinkhole of bureaucracy',
|
'title': 'Sinkhole of bureaucracy',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
@ -40,15 +42,38 @@ class WashingtonPostIE(InfoExtractor):
|
|||||||
'upload_date': '20140322',
|
'upload_date': '20140322',
|
||||||
'uploader': 'The Washington Post',
|
'uploader': 'The Washington Post',
|
||||||
},
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear',
|
||||||
|
'title': 'One airline figured out how to make sure its airplanes never disappear',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'a7c1b5634ba5e57a6a82cdffa5b1e0d0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0e4bb54c-9065-11e4-a66f-0ca5037a597d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
|
||||||
|
'upload_date': '20141230',
|
||||||
|
'uploader': 'The Washington Post',
|
||||||
|
'timestamp': 1419974765,
|
||||||
|
'title': 'Why black boxes don’t transmit data in real time',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
|
|
||||||
|
uuids = re.findall(r'''(?x)
|
||||||
|
(?:
|
||||||
|
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
|
||||||
|
data-video-uuid=
|
||||||
|
)"([^"]+)"''', webpage)
|
||||||
entries = []
|
entries = []
|
||||||
for i, uuid in enumerate(uuids, start=1):
|
for i, uuid in enumerate(uuids, start=1):
|
||||||
vinfo_all = self._download_json(
|
vinfo_all = self._download_json(
|
||||||
@ -75,10 +100,11 @@ class WashingtonPostIE(InfoExtractor):
|
|||||||
'filesize': s.get('fileSize'),
|
'filesize': s.get('fileSize'),
|
||||||
'url': s.get('url'),
|
'url': s.get('url'),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'preference': -100 if s.get('type') == 'smil' else None,
|
||||||
'protocol': {
|
'protocol': {
|
||||||
'MP4': 'http',
|
'MP4': 'http',
|
||||||
'F4F': 'f4m',
|
'F4F': 'f4m',
|
||||||
}.get(s.get('type'))
|
}.get(s.get('type')),
|
||||||
} for s in vinfo.get('streams', [])]
|
} for s in vinfo.get('streams', [])]
|
||||||
source_media_url = vinfo.get('sourceMediaURL')
|
source_media_url = vinfo.get('sourceMediaURL')
|
||||||
if source_media_url:
|
if source_media_url:
|
||||||
|
@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
|
||||||
'playlist_mincount': 146,
|
'playlist_mincount': 146,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -169,7 +172,9 @@ class WDRMobileIE(InfoExtractor):
|
|||||||
'title': mobj.group('title'),
|
'title': mobj.group('title'),
|
||||||
'age_limit': int(mobj.group('age_limit')),
|
'age_limit': int(mobj.group('age_limit')),
|
||||||
'url': url,
|
'url': url,
|
||||||
'user_agent': 'mobile',
|
'http_headers': {
|
||||||
|
'User-Agent': 'mobile',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
142
youtube_dl/extractor/xuite.py
Normal file
142
youtube_dl/extractor/xuite.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class XuiteIE(InfoExtractor):
|
||||||
|
_REGEX_BASE64 = r'(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
|
||||||
|
_VALID_URL = r'https?://vlog\.xuite\.net/(?:play|embed)/(?P<id>%s)' % _REGEX_BASE64
|
||||||
|
_TESTS = [{
|
||||||
|
# Audio
|
||||||
|
'url': 'http://vlog.xuite.net/play/RGkzc1ZULTM4NjA5MTQuZmx2',
|
||||||
|
'md5': '63a42c705772aa53fd4c1a0027f86adf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3860914',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '孤單南半球-歐德陽',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 247.246,
|
||||||
|
'timestamp': 1314932940,
|
||||||
|
'upload_date': '20110902',
|
||||||
|
'uploader': '阿能',
|
||||||
|
'uploader_id': '15973816',
|
||||||
|
'categories': ['個人短片'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Video with only one format
|
||||||
|
'url': 'http://vlog.xuite.net/play/TkRZNjhULTM0NDE2MjkuZmx2',
|
||||||
|
'md5': 'c45737fc8ac5dc8ac2f92ecbcecf505e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3441629',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '孫燕姿 - 眼淚成詩',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 217.399,
|
||||||
|
'timestamp': 1299383640,
|
||||||
|
'upload_date': '20110306',
|
||||||
|
'uploader': 'Valen',
|
||||||
|
'uploader_id': '10400126',
|
||||||
|
'categories': ['影視娛樂'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Video with two formats
|
||||||
|
'url': 'http://vlog.xuite.net/play/bWo1N1pLLTIxMzAxMTcwLmZsdg==',
|
||||||
|
'md5': '1166e0f461efe55b62e26a2d2a68e6de',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '21301170',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '暗殺教室 02',
|
||||||
|
'description': '字幕:【極影字幕社】',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 1384.907,
|
||||||
|
'timestamp': 1421481240,
|
||||||
|
'upload_date': '20150117',
|
||||||
|
'uploader': '我只是想認真點',
|
||||||
|
'uploader_id': '242127761',
|
||||||
|
'categories': ['電玩動漫'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_flv_config(self, media_id):
|
||||||
|
base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
|
||||||
|
flv_config = self._download_xml(
|
||||||
|
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
|
||||||
|
'flv config')
|
||||||
|
prop_dict = {}
|
||||||
|
for prop in flv_config.findall('./property'):
|
||||||
|
prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
|
||||||
|
# CDATA may be empty in flv config
|
||||||
|
if not prop.text:
|
||||||
|
continue
|
||||||
|
encoded_content = base64.b64decode(prop.text).decode('utf-8')
|
||||||
|
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
|
||||||
|
return prop_dict
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
error_msg = self._search_regex(
|
||||||
|
r'<div id="error-message-content">([^<]+)',
|
||||||
|
webpage, 'error message', default=None)
|
||||||
|
if error_msg:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, error_msg),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
video_id = self._html_search_regex(
|
||||||
|
r'data-mediaid="(\d+)"', webpage, 'media id')
|
||||||
|
flv_config = self._extract_flv_config(video_id)
|
||||||
|
|
||||||
|
FORMATS = {
|
||||||
|
'audio': 'mp3',
|
||||||
|
'video': 'mp4',
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_tag in ('src', 'hq_src'):
|
||||||
|
video_url = flv_config.get(format_tag)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
format_id = self._search_regex(
|
||||||
|
r'\bq=(.+?)\b', video_url, 'format id', default=format_tag)
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': FORMATS.get(flv_config['type'], 'mp4'),
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': int(format_id) if format_id.isnumeric() else None,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
timestamp = flv_config.get('publish_datetime')
|
||||||
|
if timestamp:
|
||||||
|
timestamp = parse_iso8601(timestamp + ' +0800', ' ')
|
||||||
|
|
||||||
|
category = flv_config.get('category')
|
||||||
|
categories = [category] if category else []
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': flv_config['title'],
|
||||||
|
'description': flv_config.get('description'),
|
||||||
|
'thumbnail': flv_config.get('thumb'),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': flv_config.get('author_name'),
|
||||||
|
'uploader_id': flv_config.get('author_id'),
|
||||||
|
'duration': parse_duration(flv_config.get('duration')),
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -264,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
|
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
|
||||||
|
|
||||||
# Dash mp4 audio
|
# Dash mp4 audio
|
||||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
|
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
|
||||||
|
|
||||||
# Dash webm
|
# Dash webm
|
||||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||||
@ -809,6 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
player_url = None
|
player_url = None
|
||||||
|
|
||||||
# Get video info
|
# Get video info
|
||||||
|
embed_webpage = None
|
||||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||||
age_gate = True
|
age_gate = True
|
||||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||||
@ -1016,10 +1017,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
url += '&signature=' + url_data['sig'][0]
|
url += '&signature=' + url_data['sig'][0]
|
||||||
elif 's' in url_data:
|
elif 's' in url_data:
|
||||||
encrypted_sig = url_data['s'][0]
|
encrypted_sig = url_data['s'][0]
|
||||||
|
ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
||||||
|
|
||||||
jsplayer_url_json = self._search_regex(
|
jsplayer_url_json = self._search_regex(
|
||||||
r'"assets":.+?"js":\s*("[^"]+")',
|
ASSETS_RE,
|
||||||
embed_webpage if age_gate else video_webpage, 'JS player URL')
|
embed_webpage if age_gate else video_webpage,
|
||||||
|
'JS player URL (1)', default=None)
|
||||||
|
if not jsplayer_url_json and not age_gate:
|
||||||
|
# We need the embed website after all
|
||||||
|
if embed_webpage is None:
|
||||||
|
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||||
|
embed_webpage = self._download_webpage(
|
||||||
|
embed_url, video_id, 'Downloading embed webpage')
|
||||||
|
jsplayer_url_json = self._search_regex(
|
||||||
|
ASSETS_RE, embed_webpage, 'JS player URL')
|
||||||
|
|
||||||
player_url = json.loads(jsplayer_url_json)
|
player_url = json.loads(jsplayer_url_json)
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
player_url_json = self._search_regex(
|
player_url_json = self._search_regex(
|
||||||
@ -1148,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||||
'title': 'YDL_Empty_List',
|
'title': 'YDL_Empty_List',
|
||||||
},
|
},
|
||||||
'playlist_count': 0,
|
'playlist_count': 0,
|
||||||
@ -1156,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': '29C3: Not my department',
|
'title': '29C3: Not my department',
|
||||||
|
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||||
},
|
},
|
||||||
'playlist_count': 95,
|
'playlist_count': 95,
|
||||||
}, {
|
}, {
|
||||||
@ -1163,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'PLBB231211A4F62143',
|
'url': 'PLBB231211A4F62143',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': '[OLD]Team Fortress 2 (Class-based LP)',
|
'title': '[OLD]Team Fortress 2 (Class-based LP)',
|
||||||
|
'id': 'PLBB231211A4F62143',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 26,
|
'playlist_mincount': 26,
|
||||||
}, {
|
}, {
|
||||||
@ -1170,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
|
'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Uploads from Cauchemar',
|
'title': 'Uploads from Cauchemar',
|
||||||
|
'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 799,
|
'playlist_mincount': 799,
|
||||||
}, {
|
}, {
|
||||||
'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
|
'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'YDL_safe_search',
|
'title': 'YDL_safe_search',
|
||||||
|
'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}, {
|
}, {
|
||||||
@ -1184,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'playlist_count': 4,
|
'playlist_count': 4,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'JODA15',
|
'title': 'JODA15',
|
||||||
|
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'Embedded SWF player',
|
'note': 'Embedded SWF player',
|
||||||
@ -1191,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'playlist_count': 4,
|
'playlist_count': 4,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'JODA7',
|
'title': 'JODA7',
|
||||||
|
'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
|
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
|
||||||
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
|
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Uploads from Interstellar Movie',
|
'title': 'Uploads from Interstellar Movie',
|
||||||
|
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
|
||||||
},
|
},
|
||||||
'playlist_mincout': 21,
|
'playlist_mincout': 21,
|
||||||
}]
|
}]
|
||||||
@ -1302,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
'note': 'paginated channel',
|
'note': 'paginated channel',
|
||||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||||
'playlist_mincount': 91,
|
'playlist_mincount': 91,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def extract_videos_from_page(self, page):
|
def extract_videos_from_page(self, page):
|
||||||
@ -1682,11 +1705,18 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
|||||||
IE_NAME = 'youtube:truncated_url'
|
IE_NAME = 'youtube:truncated_url'
|
||||||
IE_DESC = False # Do not list
|
IE_DESC = False # Do not list
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:https?://)?[^/]+/watch\?(?:
|
(?:https?://)?
|
||||||
|
(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
|
||||||
|
(?:watch\?(?:
|
||||||
feature=[a-z_]+|
|
feature=[a-z_]+|
|
||||||
annotation_id=annotation_[^&]+
|
annotation_id=annotation_[^&]+|
|
||||||
)?$|
|
x-yt-cl=[0-9]+|
|
||||||
(?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$
|
hl=[^&]*|
|
||||||
|
)?
|
||||||
|
|
|
||||||
|
attribution_link\?a=[^&]+
|
||||||
|
)
|
||||||
|
$
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -1695,6 +1725,15 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.youtube.com/watch?',
|
'url': 'http://www.youtube.com/watch?',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/watch?feature=foo',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/watch?hl=en-GB',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -1710,7 +1749,7 @@ class YoutubeTruncatedURLIE(InfoExtractor):
|
|||||||
class YoutubeTruncatedIDIE(InfoExtractor):
|
class YoutubeTruncatedIDIE(InfoExtractor):
|
||||||
IE_NAME = 'youtube:truncated_id'
|
IE_NAME = 'youtube:truncated_id'
|
||||||
IE_DESC = False # Do not list
|
IE_DESC = False # Do not list
|
||||||
_VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
|
'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
|
||||||
|
@ -1,59 +1,122 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import operator
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_OPERATORS = [
|
||||||
|
('|', operator.or_),
|
||||||
|
('^', operator.xor),
|
||||||
|
('&', operator.and_),
|
||||||
|
('>>', operator.rshift),
|
||||||
|
('<<', operator.lshift),
|
||||||
|
('-', operator.sub),
|
||||||
|
('+', operator.add),
|
||||||
|
('%', operator.mod),
|
||||||
|
('/', operator.truediv),
|
||||||
|
('*', operator.mul),
|
||||||
|
]
|
||||||
|
_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
|
||||||
|
_ASSIGN_OPERATORS.append(('=', lambda cur, right: right))
|
||||||
|
|
||||||
|
_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
|
||||||
|
|
||||||
|
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
def __init__(self, code):
|
def __init__(self, code, objects=None):
|
||||||
self.code = code
|
if objects is None:
|
||||||
|
objects = {}
|
||||||
|
self.code = self._remove_comments(code)
|
||||||
self._functions = {}
|
self._functions = {}
|
||||||
self._objects = {}
|
self._objects = objects
|
||||||
|
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=20):
|
def _remove_comments(self, code):
|
||||||
|
return re.sub(r'(?s)/\*.*?\*/', '', code)
|
||||||
|
|
||||||
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise ExtractorError('Recursion limit reached')
|
raise ExtractorError('Recursion limit reached')
|
||||||
|
|
||||||
if stmt.startswith('var '):
|
should_abort = False
|
||||||
stmt = stmt[len('var '):]
|
stmt = stmt.lstrip()
|
||||||
ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
|
stmt_m = re.match(r'var\s', stmt)
|
||||||
r'=(?P<expr>.*)$', stmt)
|
if stmt_m:
|
||||||
if ass_m:
|
expr = stmt[len(stmt_m.group(0)):]
|
||||||
if ass_m.groupdict().get('index'):
|
|
||||||
def assign(val):
|
|
||||||
lvar = local_vars[ass_m.group('out')]
|
|
||||||
idx = self.interpret_expression(
|
|
||||||
ass_m.group('index'), local_vars, allow_recursion)
|
|
||||||
assert isinstance(idx, int)
|
|
||||||
lvar[idx] = val
|
|
||||||
return val
|
|
||||||
expr = ass_m.group('expr')
|
|
||||||
else:
|
|
||||||
def assign(val):
|
|
||||||
local_vars[ass_m.group('out')] = val
|
|
||||||
return val
|
|
||||||
expr = ass_m.group('expr')
|
|
||||||
elif stmt.startswith('return '):
|
|
||||||
assign = lambda v: v
|
|
||||||
expr = stmt[len('return '):]
|
|
||||||
else:
|
else:
|
||||||
# Try interpreting it as an expression
|
return_m = re.match(r'return(?:\s+|$)', stmt)
|
||||||
expr = stmt
|
if return_m:
|
||||||
assign = lambda v: v
|
expr = stmt[len(return_m.group(0)):]
|
||||||
|
should_abort = True
|
||||||
|
else:
|
||||||
|
# Try interpreting it as an expression
|
||||||
|
expr = stmt
|
||||||
|
|
||||||
v = self.interpret_expression(expr, local_vars, allow_recursion)
|
v = self.interpret_expression(expr, local_vars, allow_recursion)
|
||||||
return assign(v)
|
return v, should_abort
|
||||||
|
|
||||||
def interpret_expression(self, expr, local_vars, allow_recursion):
|
def interpret_expression(self, expr, local_vars, allow_recursion):
|
||||||
|
expr = expr.strip()
|
||||||
|
|
||||||
|
if expr == '': # Empty expression
|
||||||
|
return None
|
||||||
|
|
||||||
|
if expr.startswith('('):
|
||||||
|
parens_count = 0
|
||||||
|
for m in re.finditer(r'[()]', expr):
|
||||||
|
if m.group(0) == '(':
|
||||||
|
parens_count += 1
|
||||||
|
else:
|
||||||
|
parens_count -= 1
|
||||||
|
if parens_count == 0:
|
||||||
|
sub_expr = expr[1:m.start()]
|
||||||
|
sub_result = self.interpret_expression(
|
||||||
|
sub_expr, local_vars, allow_recursion)
|
||||||
|
remaining_expr = expr[m.end():].strip()
|
||||||
|
if not remaining_expr:
|
||||||
|
return sub_result
|
||||||
|
else:
|
||||||
|
expr = json.dumps(sub_result) + remaining_expr
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Premature end of parens in %r' % expr)
|
||||||
|
|
||||||
|
for op, opfunc in _ASSIGN_OPERATORS:
|
||||||
|
m = re.match(r'''(?x)
|
||||||
|
(?P<out>%s)(?:\[(?P<index>[^\]]+?)\])?
|
||||||
|
\s*%s
|
||||||
|
(?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
right_val = self.interpret_expression(
|
||||||
|
m.group('expr'), local_vars, allow_recursion - 1)
|
||||||
|
|
||||||
|
if m.groupdict().get('index'):
|
||||||
|
lvar = local_vars[m.group('out')]
|
||||||
|
idx = self.interpret_expression(
|
||||||
|
m.group('index'), local_vars, allow_recursion)
|
||||||
|
assert isinstance(idx, int)
|
||||||
|
cur = lvar[idx]
|
||||||
|
val = opfunc(cur, right_val)
|
||||||
|
lvar[idx] = val
|
||||||
|
return val
|
||||||
|
else:
|
||||||
|
cur = local_vars.get(m.group('out'))
|
||||||
|
val = opfunc(cur, right_val)
|
||||||
|
local_vars[m.group('out')] = val
|
||||||
|
return val
|
||||||
|
|
||||||
if expr.isdigit():
|
if expr.isdigit():
|
||||||
return int(expr)
|
return int(expr)
|
||||||
|
|
||||||
if expr.isalpha():
|
var_m = re.match(
|
||||||
return local_vars[expr]
|
r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE,
|
||||||
|
expr)
|
||||||
|
if var_m:
|
||||||
|
return local_vars[var_m.group('name')]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return json.loads(expr)
|
return json.loads(expr)
|
||||||
@ -61,7 +124,7 @@ class JSInterpreter(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
|
r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,
|
||||||
expr)
|
expr)
|
||||||
if m:
|
if m:
|
||||||
variable = m.group('var')
|
variable = m.group('var')
|
||||||
@ -114,23 +177,31 @@ class JSInterpreter(object):
|
|||||||
return obj[member](argvals)
|
return obj[member](argvals)
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
|
r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
|
||||||
if m:
|
if m:
|
||||||
val = local_vars[m.group('in')]
|
val = local_vars[m.group('in')]
|
||||||
idx = self.interpret_expression(
|
idx = self.interpret_expression(
|
||||||
m.group('idx'), local_vars, allow_recursion - 1)
|
m.group('idx'), local_vars, allow_recursion - 1)
|
||||||
return val[idx]
|
return val[idx]
|
||||||
|
|
||||||
m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
|
for op, opfunc in _OPERATORS:
|
||||||
if m:
|
m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr)
|
||||||
a = self.interpret_expression(
|
if not m:
|
||||||
m.group('a'), local_vars, allow_recursion)
|
continue
|
||||||
b = self.interpret_expression(
|
x, abort = self.interpret_statement(
|
||||||
m.group('b'), local_vars, allow_recursion)
|
m.group('x'), local_vars, allow_recursion - 1)
|
||||||
return a % b
|
if abort:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Premature left-side return of %s in %r' % (op, expr))
|
||||||
|
y, abort = self.interpret_statement(
|
||||||
|
m.group('y'), local_vars, allow_recursion - 1)
|
||||||
|
if abort:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Premature right-side return of %s in %r' % (op, expr))
|
||||||
|
return opfunc(x, y)
|
||||||
|
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
|
r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)
|
||||||
if m:
|
if m:
|
||||||
fname = m.group('func')
|
fname = m.group('func')
|
||||||
argvals = tuple([
|
argvals = tuple([
|
||||||
@ -139,6 +210,7 @@ class JSInterpreter(object):
|
|||||||
if fname not in self._functions:
|
if fname not in self._functions:
|
||||||
self._functions[fname] = self.extract_function(fname)
|
self._functions[fname] = self.extract_function(fname)
|
||||||
return self._functions[fname](argvals)
|
return self._functions[fname](argvals)
|
||||||
|
|
||||||
raise ExtractorError('Unsupported JS expression %r' % expr)
|
raise ExtractorError('Unsupported JS expression %r' % expr)
|
||||||
|
|
||||||
def extract_object(self, objname):
|
def extract_object(self, objname):
|
||||||
@ -162,9 +234,11 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
def extract_function(self, funcname):
|
def extract_function(self, funcname):
|
||||||
func_m = re.search(
|
func_m = re.search(
|
||||||
(r'(?:function %s|[{;]%s\s*=\s*function)' % (
|
r'''(?x)
|
||||||
re.escape(funcname), re.escape(funcname))) +
|
(?:function\s+%s|[{;]%s\s*=\s*function)\s*
|
||||||
r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
|
\((?P<args>[^)]*)\)\s*
|
||||||
|
\{(?P<code>[^}]+)\}''' % (
|
||||||
|
re.escape(funcname), re.escape(funcname)),
|
||||||
self.code)
|
self.code)
|
||||||
if func_m is None:
|
if func_m is None:
|
||||||
raise ExtractorError('Could not find JS function %r' % funcname)
|
raise ExtractorError('Could not find JS function %r' % funcname)
|
||||||
@ -172,10 +246,16 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
return self.build_function(argnames, func_m.group('code'))
|
return self.build_function(argnames, func_m.group('code'))
|
||||||
|
|
||||||
|
def call_function(self, funcname, *args):
|
||||||
|
f = self.extract_function(funcname)
|
||||||
|
return f(args)
|
||||||
|
|
||||||
def build_function(self, argnames, code):
|
def build_function(self, argnames, code):
|
||||||
def resf(args):
|
def resf(args):
|
||||||
local_vars = dict(zip(argnames, args))
|
local_vars = dict(zip(argnames, args))
|
||||||
for stmt in code.split(';'):
|
for stmt in code.split(';'):
|
||||||
res = self.interpret_statement(stmt, local_vars)
|
res, abort = self.interpret_statement(stmt, local_vars)
|
||||||
|
if abort:
|
||||||
|
break
|
||||||
return res
|
return res
|
||||||
return resf
|
return resf
|
||||||
|
@ -5,6 +5,7 @@ import optparse
|
|||||||
import shlex
|
import shlex
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from .downloader.external import list_external_downloaders
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_expanduser,
|
compat_expanduser,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
@ -199,6 +200,10 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--playlist-end',
|
'--playlist-end',
|
||||||
dest='playlistend', metavar='NUMBER', default=None, type=int,
|
dest='playlistend', metavar='NUMBER', default=None, type=int,
|
||||||
help='playlist video to end at (default is last)')
|
help='playlist video to end at (default is last)')
|
||||||
|
selection.add_option(
|
||||||
|
'--playlist-items',
|
||||||
|
dest='playlist_items', metavar='ITEM_SPEC', default=None,
|
||||||
|
help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--match-title',
|
'--match-title',
|
||||||
dest='matchtitle', metavar='REGEX',
|
dest='matchtitle', metavar='REGEX',
|
||||||
@ -292,7 +297,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
' You can filter the video results by putting a condition in'
|
' You can filter the video results by putting a condition in'
|
||||||
' brackets, as in -f "best[height=720]"'
|
' brackets, as in -f "best[height=720]"'
|
||||||
' (or -f "[filesize>10M]"). '
|
' (or -f "[filesize>10M]"). '
|
||||||
' This works for filesize, height, width, tbr, abr, and vbr'
|
' This works for filesize, height, width, tbr, abr, vbr, and fps'
|
||||||
' and the comparisons <, <=, >, >=, =, != .'
|
' and the comparisons <, <=, >, >=, =, != .'
|
||||||
' Formats for which the value is not known are excluded unless you'
|
' Formats for which the value is not known are excluded unless you'
|
||||||
' put a question mark (?) after the operator.'
|
' put a question mark (?) after the operator.'
|
||||||
@ -372,7 +377,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'-R', '--retries',
|
'-R', '--retries',
|
||||||
dest='retries', metavar='RETRIES', default=10,
|
dest='retries', metavar='RETRIES', default=10,
|
||||||
help='number of retries (default is %default)')
|
help='number of retries (default is %default), or "infinite".')
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
'--buffer-size',
|
'--buffer-size',
|
||||||
dest='buffersize', metavar='SIZE', default='1024',
|
dest='buffersize', metavar='SIZE', default='1024',
|
||||||
@ -389,6 +394,15 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--playlist-reverse',
|
'--playlist-reverse',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Download playlist videos in reverse order')
|
help='Download playlist videos in reverse order')
|
||||||
|
downloader.add_option(
|
||||||
|
'--xattr-set-filesize',
|
||||||
|
dest='xattr_set_filesize', action='store_true',
|
||||||
|
help='(experimental) set file xattribute ytdl.filesize with expected filesize')
|
||||||
|
downloader.add_option(
|
||||||
|
'--external-downloader',
|
||||||
|
dest='external_downloader', metavar='COMMAND',
|
||||||
|
help='(experimental) Use the specified external downloader. '
|
||||||
|
'Currently supports %s' % ','.join(list_external_downloaders()))
|
||||||
|
|
||||||
workarounds = optparse.OptionGroup(parser, 'Workarounds')
|
workarounds = optparse.OptionGroup(parser, 'Workarounds')
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
@ -608,10 +622,6 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--write-annotations',
|
'--write-annotations',
|
||||||
action='store_true', dest='writeannotations', default=False,
|
action='store_true', dest='writeannotations', default=False,
|
||||||
help='write video annotations to a .annotation file')
|
help='write video annotations to a .annotation file')
|
||||||
filesystem.add_option(
|
|
||||||
'--write-thumbnail',
|
|
||||||
action='store_true', dest='writethumbnail', default=False,
|
|
||||||
help='write thumbnail image to disk')
|
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--load-info',
|
'--load-info',
|
||||||
dest='load_info_filename', metavar='FILE',
|
dest='load_info_filename', metavar='FILE',
|
||||||
@ -631,6 +641,20 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='store_true', dest='rm_cachedir',
|
action='store_true', dest='rm_cachedir',
|
||||||
help='Delete all filesystem cache files')
|
help='Delete all filesystem cache files')
|
||||||
|
|
||||||
|
thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
|
||||||
|
thumbnail.add_option(
|
||||||
|
'--write-thumbnail',
|
||||||
|
action='store_true', dest='writethumbnail', default=False,
|
||||||
|
help='write thumbnail image to disk')
|
||||||
|
thumbnail.add_option(
|
||||||
|
'--write-all-thumbnails',
|
||||||
|
action='store_true', dest='write_all_thumbnails', default=False,
|
||||||
|
help='write all thumbnail image formats to disk')
|
||||||
|
thumbnail.add_option(
|
||||||
|
'--list-thumbnails',
|
||||||
|
action='store_true', dest='list_thumbnails', default=False,
|
||||||
|
help='Simulate and list all available thumbnail formats')
|
||||||
|
|
||||||
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'-x', '--extract-audio',
|
'-x', '--extract-audio',
|
||||||
@ -674,10 +698,9 @@ def parseOpts(overrideArguments=None):
|
|||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--fixup',
|
'--fixup',
|
||||||
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
metavar='POLICY', dest='fixup', default='detect_or_warn',
|
||||||
help='(experimental) Automatically correct known faults of the file. '
|
help='Automatically correct known faults of the file. '
|
||||||
'One of never (do nothing), warn (only emit a warning), '
|
'One of never (do nothing), warn (only emit a warning), '
|
||||||
'detect_or_warn(check whether we can do anything about it, warn '
|
'detect_or_warn(the default; fix file if we can, warn otherwise)')
|
||||||
'otherwise')
|
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--prefer-avconv',
|
'--prefer-avconv',
|
||||||
action='store_false', dest='prefer_ffmpeg',
|
action='store_false', dest='prefer_ffmpeg',
|
||||||
@ -696,6 +719,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
parser.add_option_group(selection)
|
parser.add_option_group(selection)
|
||||||
parser.add_option_group(downloader)
|
parser.add_option_group(downloader)
|
||||||
parser.add_option_group(filesystem)
|
parser.add_option_group(filesystem)
|
||||||
|
parser.add_option_group(thumbnail)
|
||||||
parser.add_option_group(verbosity)
|
parser.add_option_group(verbosity)
|
||||||
parser.add_option_group(workarounds)
|
parser.add_option_group(workarounds)
|
||||||
parser.add_option_group(video_format)
|
parser.add_option_group(video_format)
|
||||||
|
@ -7,6 +7,7 @@ from .ffmpeg import (
|
|||||||
FFmpegEmbedSubtitlePP,
|
FFmpegEmbedSubtitlePP,
|
||||||
FFmpegExtractAudioPP,
|
FFmpegExtractAudioPP,
|
||||||
FFmpegFixupStretchedPP,
|
FFmpegFixupStretchedPP,
|
||||||
|
FFmpegFixupM4aPP,
|
||||||
FFmpegMergerPP,
|
FFmpegMergerPP,
|
||||||
FFmpegMetadataPP,
|
FFmpegMetadataPP,
|
||||||
FFmpegVideoConvertorPP,
|
FFmpegVideoConvertorPP,
|
||||||
@ -25,6 +26,7 @@ __all__ = [
|
|||||||
'FFmpegAudioFixPP',
|
'FFmpegAudioFixPP',
|
||||||
'FFmpegEmbedSubtitlePP',
|
'FFmpegEmbedSubtitlePP',
|
||||||
'FFmpegExtractAudioPP',
|
'FFmpegExtractAudioPP',
|
||||||
|
'FFmpegFixupM4aPP',
|
||||||
'FFmpegFixupStretchedPP',
|
'FFmpegFixupStretchedPP',
|
||||||
'FFmpegMergerPP',
|
'FFmpegMergerPP',
|
||||||
'FFmpegMetadataPP',
|
'FFmpegMetadataPP',
|
||||||
|
@ -564,7 +564,7 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
|||||||
def run(self, info):
|
def run(self, info):
|
||||||
stretched_ratio = info.get('stretched_ratio')
|
stretched_ratio = info.get('stretched_ratio')
|
||||||
if stretched_ratio is None or stretched_ratio == 1:
|
if stretched_ratio is None or stretched_ratio == 1:
|
||||||
return
|
return True, info
|
||||||
|
|
||||||
filename = info['filepath']
|
filename = info['filepath']
|
||||||
temp_filename = prepend_extension(filename, 'temp')
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
@ -577,3 +577,21 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor):
|
|||||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||||
|
|
||||||
return True, info
|
return True, info
|
||||||
|
|
||||||
|
|
||||||
|
class FFmpegFixupM4aPP(FFmpegPostProcessor):
|
||||||
|
def run(self, info):
|
||||||
|
if info.get('container') != 'm4a_dash':
|
||||||
|
return True, info
|
||||||
|
|
||||||
|
filename = info['filepath']
|
||||||
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
|
|
||||||
|
options = ['-c', 'copy', '-f', 'mp4']
|
||||||
|
self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
|
||||||
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
|
|
||||||
|
os.remove(encodeFilename(filename))
|
||||||
|
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||||
|
|
||||||
|
return True, info
|
||||||
|
@ -32,6 +32,7 @@ import xml.etree.ElementTree
|
|||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_html_entities,
|
compat_html_entities,
|
||||||
@ -140,7 +141,7 @@ else:
|
|||||||
def find_xpath_attr(node, xpath, key, val):
|
def find_xpath_attr(node, xpath, key, val):
|
||||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||||
# .//node does not match if a node is a direct child of . !
|
# .//node does not match if a node is a direct child of . !
|
||||||
if isinstance(xpath, unicode):
|
if isinstance(xpath, compat_str):
|
||||||
xpath = xpath.encode('ascii')
|
xpath = xpath.encode('ascii')
|
||||||
|
|
||||||
for f in node.findall(xpath):
|
for f in node.findall(xpath):
|
||||||
@ -606,11 +607,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|||||||
if 'Accept-encoding' in req.headers:
|
if 'Accept-encoding' in req.headers:
|
||||||
del req.headers['Accept-encoding']
|
del req.headers['Accept-encoding']
|
||||||
del req.headers['Youtubedl-no-compression']
|
del req.headers['Youtubedl-no-compression']
|
||||||
if 'Youtubedl-user-agent' in req.headers:
|
|
||||||
if 'User-agent' in req.headers:
|
|
||||||
del req.headers['User-agent']
|
|
||||||
req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
|
|
||||||
del req.headers['Youtubedl-user-agent']
|
|
||||||
|
|
||||||
if sys.version_info < (2, 7) and '#' in req.get_full_url():
|
if sys.version_info < (2, 7) and '#' in req.get_full_url():
|
||||||
# Python 2.6 is brain-dead when it comes to fragments
|
# Python 2.6 is brain-dead when it comes to fragments
|
||||||
@ -659,9 +655,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
|||||||
self._params = params
|
self._params = params
|
||||||
|
|
||||||
def https_open(self, req):
|
def https_open(self, req):
|
||||||
|
kwargs = {}
|
||||||
|
if hasattr(self, '_context'): # python > 2.6
|
||||||
|
kwargs['context'] = self._context
|
||||||
|
if hasattr(self, '_check_hostname'): # python 3.x
|
||||||
|
kwargs['check_hostname'] = self._check_hostname
|
||||||
return self.do_open(functools.partial(
|
return self.do_open(functools.partial(
|
||||||
_create_http_connection, self, self._https_conn_class, True),
|
_create_http_connection, self, self._https_conn_class, True),
|
||||||
req)
|
req, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def parse_iso8601(date_str, delimiter='T'):
|
def parse_iso8601(date_str, delimiter='T'):
|
||||||
@ -1262,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
|
|||||||
|
|
||||||
|
|
||||||
def parse_duration(s):
|
def parse_duration(s):
|
||||||
if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str):
|
if not isinstance(s, compat_basestring):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
s = s.strip()
|
s = s.strip()
|
||||||
@ -1274,7 +1275,10 @@ def parse_duration(s):
|
|||||||
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
(?P<only_hours>[0-9.]+)\s*(?:hours?)|
|
||||||
|
|
||||||
(?:
|
(?:
|
||||||
(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
|
(?:
|
||||||
|
(?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
|
||||||
|
(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
|
||||||
|
)?
|
||||||
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
|
(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
|
||||||
)?
|
)?
|
||||||
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
|
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
|
||||||
@ -1292,6 +1296,8 @@ def parse_duration(s):
|
|||||||
res += int(m.group('mins')) * 60
|
res += int(m.group('mins')) * 60
|
||||||
if m.group('hours'):
|
if m.group('hours'):
|
||||||
res += int(m.group('hours')) * 60 * 60
|
res += int(m.group('hours')) * 60 * 60
|
||||||
|
if m.group('days'):
|
||||||
|
res += int(m.group('days')) * 24 * 60 * 60
|
||||||
if m.group('ms'):
|
if m.group('ms'):
|
||||||
res += float(m.group('ms'))
|
res += float(m.group('ms'))
|
||||||
return res
|
return res
|
||||||
@ -1426,7 +1432,7 @@ def uppercase_escape(s):
|
|||||||
|
|
||||||
def escape_rfc3986(s):
|
def escape_rfc3986(s):
|
||||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||||
if sys.version_info < (3, 0) and isinstance(s, unicode):
|
if sys.version_info < (3, 0) and isinstance(s, compat_str):
|
||||||
s = s.encode('utf-8')
|
s = s.encode('utf-8')
|
||||||
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
|
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
|
||||||
|
|
||||||
@ -1542,7 +1548,7 @@ def js_to_json(code):
|
|||||||
res = re.sub(r'''(?x)
|
res = re.sub(r'''(?x)
|
||||||
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
||||||
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
||||||
[a-zA-Z_][a-zA-Z_0-9]*
|
[a-zA-Z_][.a-zA-Z_0-9]*
|
||||||
''', fix_kv, code)
|
''', fix_kv, code)
|
||||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||||
return res
|
return res
|
||||||
@ -1642,3 +1648,33 @@ def is_html(first_bytes):
|
|||||||
s = first_bytes.decode('utf-8', 'replace')
|
s = first_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
return re.match(r'^\s*<', s)
|
return re.match(r'^\s*<', s)
|
||||||
|
|
||||||
|
|
||||||
|
def determine_protocol(info_dict):
|
||||||
|
protocol = info_dict.get('protocol')
|
||||||
|
if protocol is not None:
|
||||||
|
return protocol
|
||||||
|
|
||||||
|
url = info_dict['url']
|
||||||
|
if url.startswith('rtmp'):
|
||||||
|
return 'rtmp'
|
||||||
|
elif url.startswith('mms'):
|
||||||
|
return 'mms'
|
||||||
|
elif url.startswith('rtsp'):
|
||||||
|
return 'rtsp'
|
||||||
|
|
||||||
|
ext = determine_ext(url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
return 'm3u8'
|
||||||
|
elif ext == 'f4m':
|
||||||
|
return 'f4m'
|
||||||
|
|
||||||
|
return compat_urllib_parse_urlparse(url).scheme
|
||||||
|
|
||||||
|
|
||||||
|
def render_table(header_row, data):
|
||||||
|
""" Render a list of rows, each as a list of values """
|
||||||
|
table = [header_row] + data
|
||||||
|
max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
|
||||||
|
format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
|
||||||
|
return '\n'.join(format_str % tuple(row) for row in table)
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2015.01.23.3'
|
__version__ = '2015.02.02.3'
|
||||||
|
Reference in New Issue
Block a user