Compare commits
212 Commits
2014.09.14
...
2014.10.02
Author | SHA1 | Date | |
---|---|---|---|
10606050bc | |||
d9bf465200 | |||
01384d6e4b | |||
08d5230945 | |||
852f8641e8 | |||
18937a50a4 | |||
d5feab9aaa | |||
9e77c60c9a | |||
1414df5ce2 | |||
e80f40e5ca | |||
d3c9af84fc | |||
59d206ca2d | |||
e7b6d12254 | |||
410f3e73ab | |||
07e764439a | |||
f8fb4a7ca8 | |||
e497a7f2ca | |||
a3b6be104d | |||
b7bb0df21e | |||
4dc19c0982 | |||
58ea7ec81e | |||
c0f64ac689 | |||
7a08ad7d59 | |||
2d29ac4f23 | |||
a7a747d687 | |||
fdb4d278bf | |||
59c03a9bfb | |||
e7db973328 | |||
99b67fecc5 | |||
89294b5f50 | |||
72d53356f6 | |||
9e1e67fc15 | |||
1e60e5546e | |||
457749a703 | |||
937f935db0 | |||
80bcefcd77 | |||
8c23945c72 | |||
989b4b2b86 | |||
2a7b4681c6 | |||
8157ae3904 | |||
e50e2fcd4d | |||
6be451f422 | |||
5e4f06197f | |||
761e1645e0 | |||
8ff14175e2 | |||
dbe3043cd6 | |||
a8eb5a8e61 | |||
6043f1df4e | |||
12548cd933 | |||
2593039522 | |||
35d3e63d24 | |||
27aede9074 | |||
f5b7e6a842 | |||
a1f934b171 | |||
a43ee88c6f | |||
e2dce53781 | |||
1770ed9e86 | |||
457ac58cc7 | |||
9c44d2429b | |||
d2e32f7df5 | |||
67077b182b | |||
5f4c318844 | |||
dfee83234b | |||
7f5c0c4a19 | |||
4bc77c8417 | |||
22dd3fad86 | |||
d6e6a42256 | |||
76e7d1e74b | |||
38c4d41b74 | |||
f0b8e3607d | |||
51ee08c4bb | |||
c841789772 | |||
c121a75b36 | |||
5a8b77551d | |||
0217aee154 | |||
b14f3a4c1d | |||
92f7963f6e | |||
88fbe4c2cc | |||
394599f422 | |||
ed9266db90 | |||
f4b1c7adb8 | |||
c95eeb7b80 | |||
5e43e3803c | |||
a89435a7a8 | |||
a0a90b3ba1 | |||
c664182323 | |||
6be1cd4ddb | |||
ee0d90707a | |||
f776d8f608 | |||
b3ac3a51ac | |||
0b75c2a88b | |||
7b7518124e | |||
68b0973046 | |||
3a203b8bfa | |||
70752ccefd | |||
0155549d6c | |||
b66745288e | |||
2a1325fdde | |||
2f9e8776df | |||
497339fa0e | |||
8e6f8051f0 | |||
11b3ce8509 | |||
6a5af6acb9 | |||
9a0d98bb40 | |||
fbd3162e49 | |||
54e9a4af95 | |||
8a32b82e46 | |||
fec02bcc90 | |||
c6e90caaa6 | |||
4bbf157794 | |||
6b08cdf626 | |||
b686fc18da | |||
0b97f3a936 | |||
eb73f2649f | |||
f0b5d6af74 | |||
2f771f6c99 | |||
3b2f933b01 | |||
cc746841e7 | |||
ac7553d031 | |||
cdc628a498 | |||
69ea8ca42c | |||
4bc3a23ec5 | |||
bd5650ac64 | |||
86916dae4b | |||
f7d159cf95 | |||
632e5684ce | |||
094d42fe44 | |||
63cddb6477 | |||
273dea4248 | |||
f90d95edeb | |||
45c85d7ba1 | |||
d0df92928b | |||
df8f53f752 | |||
e35cb78c40 | |||
3ef7d11acd | |||
224ce0d872 | |||
dd41e8c82b | |||
b509a4b176 | |||
b28c8403b2 | |||
7bd4b4229a | |||
72e450c555 | |||
522c55b7f2 | |||
58e7071a2c | |||
516812df41 | |||
752297631f | |||
34e14a9beb | |||
ffb5b05db1 | |||
3e8fcd9fa1 | |||
746c67d72f | |||
5aa38e75b2 | |||
532f5bff70 | |||
f566d9f1d5 | |||
7267bd536f | |||
589d3d7c7a | |||
46f74bcf5c | |||
37bfe8ace4 | |||
0529eef5a4 | |||
fd78a4d3e6 | |||
1de33fafd9 | |||
e2e5dae64d | |||
09b23c902b | |||
109a540e7a | |||
2914e5f00f | |||
2f834e9381 | |||
9296738f20 | |||
0e59b9fffb | |||
67abbe9527 | |||
944a3de278 | |||
5a13fe9ed2 | |||
6b6096d0b7 | |||
d0246d07f1 | |||
727a98c3ee | |||
997987d568 | |||
c001f939e4 | |||
e825c38082 | |||
a04aa7a9e6 | |||
7cdd5339b3 | |||
38349518f1 | |||
64892c0b79 | |||
dc9f356846 | |||
ed86ee3b4a | |||
7bb5df1cda | |||
37a81dff04 | |||
fc96eb4e21 | |||
ae369738b0 | |||
e2037b3f7d | |||
5419033935 | |||
2eebf060af | |||
acd9db5902 | |||
d0e8b3d59b | |||
c15dd15388 | |||
0003a5c416 | |||
21f2927f70 | |||
e5a79071a5 | |||
ca0e7a2b17 | |||
b523bb71ab | |||
a020a0dc20 | |||
6d1f2431bd | |||
fdea3abdf8 | |||
59d284c316 | |||
98703c7fbf | |||
b04c8f7358 | |||
56d1912f1d | |||
eb3bd7ba8d | |||
3b11e86eeb | |||
2bca84e345 | |||
984e8e14ea | |||
d05cfe0600 | |||
37419b4f99 | |||
a8aa99442f | |||
94b539d155 | |||
b8874d4d4e |
1
.gitignore
vendored
1
.gitignore
vendored
@ -11,6 +11,7 @@ MANIFEST
|
||||
README.txt
|
||||
youtube-dl.1
|
||||
youtube-dl.bash-completion
|
||||
youtube-dl.fish
|
||||
youtube-dl
|
||||
youtube-dl.exe
|
||||
youtube-dl.tar.gz
|
||||
|
@ -2,5 +2,6 @@ include README.md
|
||||
include test/*.py
|
||||
include test/*.json
|
||||
include youtube-dl.bash-completion
|
||||
include youtube-dl.fish
|
||||
include youtube-dl.1
|
||||
recursive-include docs Makefile conf.py *.rst
|
||||
|
20
Makefile
20
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.fish
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
@ -29,6 +29,8 @@ install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
|
||||
install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
|
||||
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
|
||||
install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
|
||||
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
|
||||
install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
|
||||
|
||||
test:
|
||||
#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
|
||||
@ -36,9 +38,9 @@ test:
|
||||
|
||||
tar: youtube-dl.tar.gz
|
||||
|
||||
.PHONY: all clean install test tar bash-completion pypi-files
|
||||
.PHONY: all clean install test tar bash-completion pypi-files fish-completion
|
||||
|
||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1
|
||||
pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
|
||||
|
||||
youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
zip --quiet youtube-dl youtube_dl/*.py youtube_dl/*/*.py
|
||||
@ -64,7 +66,12 @@ youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-co
|
||||
|
||||
bash-completion: youtube-dl.bash-completion
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
|
||||
python devscripts/fish-completion.py
|
||||
|
||||
fish-completion: youtube-dl.fish
|
||||
|
||||
youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish
|
||||
@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
@ -78,5 +85,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
|
||||
youtube-dl.fish setup.py \
|
||||
youtube-dl
|
||||
|
45
README.md
45
README.md
@ -227,12 +227,15 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of
|
||||
preference using slashes: "-f 22/17/18".
|
||||
"-f mp4" and "-f flv" are also supported.
|
||||
You can also use the special names "best",
|
||||
"bestvideo", "bestaudio", "worst",
|
||||
"worstvideo" and "worstaudio". By default,
|
||||
youtube-dl will pick the best quality.
|
||||
preference using slashes: -f 22/17/18 . -f
|
||||
mp4 , -f m4a and -f flv are also
|
||||
supported. You can also use the special
|
||||
names "best", "bestvideo", "bestaudio",
|
||||
"worst", "worstvideo" and "worstaudio". By
|
||||
default, youtube-dl will pick the best
|
||||
quality. Use commas to download multiple
|
||||
audio formats, such as -f
|
||||
136/137/mp4/bestvideo,140/m4a/bestaudio
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific
|
||||
one is requested
|
||||
@ -345,21 +348,34 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
|
||||
|
||||
# FAQ
|
||||
|
||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||
### How do I update youtube-dl?
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
|
||||
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version.
|
||||
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
|
||||
|
||||
Alternatively, uninstall the youtube-dl package and follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html). In a pinch, this should do if you used `apt-get` before to install youtube-dl:
|
||||
If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distributions serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
|
||||
|
||||
As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
|
||||
|
||||
sudo apt-get remove -y youtube-dl
|
||||
|
||||
Afterwards, simply follow [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html):
|
||||
|
||||
```
|
||||
sudo apt-get remove -y youtube-dl
|
||||
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
||||
hash -r
|
||||
```
|
||||
|
||||
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
||||
|
||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to report bugs to the Ubuntu packaging guys - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||
|
||||
### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
|
||||
|
||||
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
|
||||
@ -439,8 +455,6 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
@ -448,7 +462,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://yourextractor.com/watch/42',
|
||||
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
|
||||
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'ext': 'mp4',
|
||||
@ -463,8 +477,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
5
devscripts/fish-completion.in
Normal file
5
devscripts/fish-completion.in
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
{{commands}}
|
||||
|
||||
|
||||
complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
|
48
devscripts/fish-completion.py
Executable file
48
devscripts/fish-completion.py
Executable file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import optparse
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
|
||||
sys.path.append(dirn(dirn((os.path.abspath(__file__)))))
|
||||
import youtube_dl
|
||||
from youtube_dl.utils import shell_quote
|
||||
|
||||
FISH_COMPLETION_FILE = 'youtube-dl.fish'
|
||||
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
||||
|
||||
EXTRA_ARGS = {
|
||||
'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'],
|
||||
|
||||
# Options that need a file parameter
|
||||
'download-archive': ['--require-parameter'],
|
||||
'cookies': ['--require-parameter'],
|
||||
'load-info': ['--require-parameter'],
|
||||
'batch-file': ['--require-parameter'],
|
||||
}
|
||||
|
||||
def build_completion(opt_parser):
|
||||
commands = []
|
||||
|
||||
for group in opt_parser.option_groups:
|
||||
for option in group.option_list:
|
||||
long_option = option.get_opt_string().strip('-')
|
||||
help_msg = shell_quote([option.help])
|
||||
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
|
||||
if option._short_opts:
|
||||
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
|
||||
if option.help != optparse.SUPPRESS_HELP:
|
||||
complete_cmd += ['--description', option.help]
|
||||
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
|
||||
commands.append(shell_quote(complete_cmd))
|
||||
|
||||
with open(FISH_COMPLETION_TEMPLATE) as f:
|
||||
template = f.read()
|
||||
filled_template = template.replace('{{commands}}', '\n'.join(commands))
|
||||
with open(FISH_COMPLETION_FILE, 'w') as f:
|
||||
f.write(filled_template)
|
||||
|
||||
parser = youtube_dl.parseOpts()[0]
|
||||
build_completion(parser)
|
1
setup.py
1
setup.py
@ -48,6 +48,7 @@ if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
|
||||
else:
|
||||
files_spec = [
|
||||
('etc/bash_completion.d', ['youtube-dl.bash-completion']),
|
||||
('etc/fish/completions', ['youtube-dl.fish']),
|
||||
('share/doc/youtube_dl', ['README.txt']),
|
||||
('share/man/man1', ['youtube-dl.1'])
|
||||
]
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
import io
|
||||
import hashlib
|
||||
@ -12,6 +14,7 @@ from youtube_dl import YoutubeDL
|
||||
from youtube_dl.utils import (
|
||||
compat_str,
|
||||
preferredencoding,
|
||||
write_string,
|
||||
)
|
||||
|
||||
|
||||
@ -40,10 +43,10 @@ def report_warning(message):
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
_msg_header = u'\033[0;33mWARNING:\033[0m'
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = u'WARNING:'
|
||||
output = u'%s %s\n' % (_msg_header, message)
|
||||
_msg_header = 'WARNING:'
|
||||
output = '%s %s\n' % (_msg_header, message)
|
||||
if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3:
|
||||
output = output.encode(preferredencoding())
|
||||
sys.stderr.write(output)
|
||||
@ -103,22 +106,22 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str),
|
||||
u'Expected a %s object, but got %s for field %s' % (
|
||||
'Expected a %s object, but got %s for field %s' % (
|
||||
compat_str.__name__, type(got).__name__, info_field))
|
||||
self.assertTrue(
|
||||
match_rex.match(got),
|
||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||
'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(got_dict.get(info_field))
|
||||
else:
|
||||
got = got_dict.get(info_field)
|
||||
self.assertEqual(expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
if got_dict.get('_type') != 'playlist':
|
||||
@ -126,7 +129,7 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
||||
self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
|
||||
|
||||
# Are checkable fields missing from the test case definition?
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
@ -134,7 +137,15 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||
if missing_keys:
|
||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
||||
def _repr(v):
|
||||
if isinstance(v, compat_str):
|
||||
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'")
|
||||
else:
|
||||
return repr(v)
|
||||
info_dict_str = ''.join(
|
||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||
for k, v in test_info_dict.items())
|
||||
write_string('\n"info_dict": {' + info_dict_str + '}\n', out=sys.stderr)
|
||||
self.assertFalse(
|
||||
missing_keys,
|
||||
'Missing keys in test definition: %s' % (
|
||||
|
@ -139,7 +139,9 @@ def generator(test_case):
|
||||
|
||||
if is_playlist:
|
||||
self.assertEqual(res_dict['_type'], 'playlist')
|
||||
self.assertTrue('entries' in res_dict)
|
||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||
|
||||
if 'playlist_mincount' in test_case:
|
||||
assertGreaterEqual(
|
||||
self,
|
||||
@ -188,7 +190,7 @@ def generator(test_case):
|
||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
||||
finally:
|
||||
try_rm_tcs_files()
|
||||
if is_playlist and res_dict is not None:
|
||||
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
||||
# Remove all other files that may have been extracted if the
|
||||
# extractor returns full results even with extract_flat
|
||||
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
||||
|
@ -22,7 +22,8 @@ from youtube_dl.utils import (
|
||||
fix_xml_ampersands,
|
||||
get_meta_content,
|
||||
orderedSet,
|
||||
PagedList,
|
||||
OnDemandPagedList,
|
||||
InAdvancePagedList,
|
||||
parse_duration,
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
@ -40,6 +41,10 @@ from youtube_dl.utils import (
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
uppercase_escape,
|
||||
limit_length,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
@ -134,6 +139,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||
self.assertEqual(unified_strdate('1968-12-10'), '19681210')
|
||||
self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@ -243,10 +249,14 @@ class TestUtil(unittest.TestCase):
|
||||
for i in range(firstid, upto):
|
||||
yield i
|
||||
|
||||
pl = PagedList(get_page, pagesize)
|
||||
pl = OnDemandPagedList(get_page, pagesize)
|
||||
got = pl.getslice(*sliceargs)
|
||||
self.assertEqual(got, expected)
|
||||
|
||||
iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize)
|
||||
got = iapl.getslice(*sliceargs)
|
||||
self.assertEqual(got, expected)
|
||||
|
||||
testPL(5, 2, (), [0, 1, 2, 3, 4])
|
||||
testPL(5, 2, (1,), [1, 2, 3, 4])
|
||||
testPL(5, 2, (2,), [2, 3, 4])
|
||||
@ -286,5 +296,64 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
||||
def test_limit_length(self):
|
||||
self.assertEqual(limit_length(None, 12), None)
|
||||
self.assertEqual(limit_length('foo', 12), 'foo')
|
||||
self.assertTrue(
|
||||
limit_length('foo bar baz asd', 12).startswith('foo bar'))
|
||||
self.assertTrue('...' in limit_length('foo bar baz asd', 12))
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~'
|
||||
self.assertEqual(escape_rfc3986(reserved), reserved)
|
||||
self.assertEqual(escape_rfc3986(unreserved), unreserved)
|
||||
self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||
self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82')
|
||||
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
|
||||
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
|
||||
|
||||
def test_escape_url(self):
|
||||
self.assertEqual(
|
||||
escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||
'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||
'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/фрагмент'),
|
||||
'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||
)
|
||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
|
||||
def test_js_to_json_realworld(self):
|
||||
inp = '''{
|
||||
'clip':{'provider':'pseudo'}
|
||||
}'''
|
||||
self.assertEqual(js_to_json(inp), '''{
|
||||
"clip":{"provider":"pseudo"}
|
||||
}''')
|
||||
json.loads(js_to_json(inp))
|
||||
|
||||
inp = '''{
|
||||
'playlist':[{'controls':{'all':null}}]
|
||||
}'''
|
||||
self.assertEqual(js_to_json(inp), '''{
|
||||
"playlist":[{"controls":{"all":null}}]
|
||||
}''')
|
||||
|
||||
def test_js_to_json_edgecases(self):
|
||||
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
|
||||
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
|
||||
|
||||
on = js_to_json('{"abc": true}')
|
||||
self.assertEqual(json.loads(on), {'abc': True})
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -10,7 +10,6 @@ from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
YoutubeUserIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeChannelIE(dl)
|
||||
#test paginated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||
self.assertTrue(len(result['entries']) > 90)
|
||||
#test autogenerated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
self.assertTrue(len(result['entries']) >= 18)
|
||||
|
||||
def test_youtube_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeUserIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||
self.assertTrue(len(result['entries']) >= 320)
|
||||
|
||||
def test_youtube_show(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeShowIE(dl)
|
||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||
self.assertTrue(len(result) >= 3)
|
||||
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_toplist(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeTopListIE(dl)
|
||||
result = ie.extract('yttoplist:music:Trending')
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
def test_youtube_search_url(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeSearchURLIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
||||
entries = result['entries']
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'youtube-dl test video')
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -47,18 +47,6 @@ _TESTS = [
|
||||
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
),
|
||||
(
|
||||
'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
|
||||
'swf',
|
||||
86,
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
|
||||
),
|
||||
(
|
||||
'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
|
||||
'swf',
|
||||
'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
|
||||
'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
'js',
|
||||
|
@ -28,6 +28,7 @@ from .utils import (
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
escape_url,
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
@ -707,7 +708,7 @@ class YoutubeDL(object):
|
||||
if video_formats:
|
||||
return video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp']
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
|
||||
if format_spec in extensions:
|
||||
filter_f = lambda f: f['ext'] == format_spec
|
||||
else:
|
||||
@ -808,28 +809,29 @@ class YoutubeDL(object):
|
||||
if req_format in ('-1', 'all'):
|
||||
formats_to_download = formats
|
||||
else:
|
||||
# We can accept formats requested in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = req_format.split('/')
|
||||
for rf in req_formats:
|
||||
if re.match(r'.+?\+.+?', rf) is not None:
|
||||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
}
|
||||
for rfstr in req_format.split(','):
|
||||
# We can accept formats requested in the format: 34/5/best, we pick
|
||||
# the first that is available, starting from left
|
||||
req_formats = rfstr.split('/')
|
||||
for rf in req_formats:
|
||||
if re.match(r'.+?\+.+?', rf) is not None:
|
||||
# Two formats have been requested like '137+139'
|
||||
format_1, format_2 = rf.split('+')
|
||||
formats_info = (self.select_format(format_1, formats),
|
||||
self.select_format(format_2, formats))
|
||||
if all(formats_info):
|
||||
selected_format = {
|
||||
'requested_formats': formats_info,
|
||||
'format': rf,
|
||||
'ext': formats_info[0]['ext'],
|
||||
}
|
||||
else:
|
||||
selected_format = None
|
||||
else:
|
||||
selected_format = None
|
||||
else:
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download = [selected_format]
|
||||
break
|
||||
selected_format = self.select_format(rf, formats)
|
||||
if selected_format is not None:
|
||||
formats_to_download.append(selected_format)
|
||||
break
|
||||
if not formats_to_download:
|
||||
raise ExtractorError('requested format not available',
|
||||
expected=True)
|
||||
@ -1241,6 +1243,26 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
|
||||
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
||||
# always respected by websites, some tend to give out URLs with non percent-encoded
|
||||
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||
# To work around aforementioned issue we will replace request's original URL with
|
||||
# percent-encoded one
|
||||
req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
|
||||
url = req if req_is_string else req.get_full_url()
|
||||
url_escaped = escape_url(url)
|
||||
|
||||
# Substitute URL if any change after escaping
|
||||
if url != url_escaped:
|
||||
if req_is_string:
|
||||
req = url_escaped
|
||||
else:
|
||||
req = compat_urllib_request.Request(
|
||||
url_escaped, data=req.data, headers=req.headers,
|
||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
||||
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
|
@ -75,6 +75,10 @@ __authors__ = (
|
||||
'Ole Ernst',
|
||||
'Aaron McDaniel (mcd1992)',
|
||||
'Magnus Kolstad',
|
||||
'Hari Padmanaban',
|
||||
'Carlos Ramos',
|
||||
'5moufl',
|
||||
'lenaten',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import FileDownloader
|
||||
from .hls import HlsFD
|
||||
from .hls import NativeHlsFD
|
||||
from .http import HttpFD
|
||||
from .mplayer import MplayerFD
|
||||
from .rtmp import RtmpFD
|
||||
@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):
|
||||
|
||||
if url.startswith('rtmp'):
|
||||
return RtmpFD
|
||||
if protocol == 'm3u8_native':
|
||||
return NativeHlsFD
|
||||
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
||||
return HlsFD
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
|
@ -42,6 +42,7 @@ class FileDownloader(object):
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
|
||||
_TEST_FILE_SIZE = 10241
|
||||
params = None
|
||||
|
||||
def __init__(self, ydl, params):
|
||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
||||
format_bytes,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
|
||||
# We only download the first fragment
|
||||
fragments_list = fragments_list[:1]
|
||||
total_frags = len(fragments_list)
|
||||
# For some akamai manifests we'll need to add a query to the fragment url
|
||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
|
||||
for (seg_i, frag_i) in fragments_list:
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
url = base_url + name
|
||||
if akamai_pv:
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
|
@ -1,8 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
compat_urllib_request,
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
)
|
||||
@ -43,3 +48,57 @@ class HlsFD(FileDownloader):
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'%s exited with code %d' % (program, retval))
|
||||
return False
|
||||
|
||||
|
||||
class NativeHlsFD(FileDownloader):
|
||||
""" A more limited implementation that does not require ffmpeg """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
|
||||
data = self.ydl.urlopen(url).read()
|
||||
s = data.decode('utf-8', 'ignore')
|
||||
segment_urls = []
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
segment_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(url, line))
|
||||
segment_urls.append(segment_url)
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||
byte_counter = 0
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
for i, segurl in enumerate(segment_urls):
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading segment %d / %d' %
|
||||
(info_dict['id'], i + 1, len(segment_urls)))
|
||||
seg_req = compat_urllib_request.Request(segurl)
|
||||
if remaining_bytes is not None:
|
||||
seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||
|
||||
segment = self.ydl.urlopen(seg_req).read()
|
||||
if remaining_bytes is not None:
|
||||
segment = segment[:remaining_bytes]
|
||||
remaining_bytes -= len(segment)
|
||||
outf.write(segment)
|
||||
byte_counter += len(segment)
|
||||
if remaining_bytes is not None and remaining_bytes <= 0:
|
||||
break
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
self.try_rename(tmpfilename, filename)
|
||||
return True
|
||||
|
||||
|
@ -14,8 +14,6 @@ from ..utils import (
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
_TEST_FILE_SIZE = 10241
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
@ -25,6 +25,7 @@ from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
@ -83,6 +84,7 @@ from .dropbox import DropboxIE
|
||||
from .ebaumsworld import EbaumsWorldIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .ellentv import (
|
||||
EllenTVIE,
|
||||
@ -133,12 +135,14 @@ from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .godtube import GodTubeIE
|
||||
from .golem import GolemIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .grooveshark import GroovesharkIE
|
||||
from .hark import HarkIE
|
||||
from .heise import HeiseIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
@ -186,6 +190,7 @@ from .livestream import (
|
||||
LivestreamOriginalIE,
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .lrt import LRTIE
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
@ -197,6 +202,7 @@ from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
@ -206,6 +212,7 @@ from .mpora import MporaIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .moniker import MonikerIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
@ -218,6 +225,7 @@ from .mtv import (
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
@ -235,6 +243,7 @@ from .ndtv import NDTVIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
@ -244,7 +253,10 @@ from .nosvideo import NosVideoIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .npo import NPOIE
|
||||
from .npo import (
|
||||
NPOIE,
|
||||
TegenlichtVproIE,
|
||||
)
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKTVIE,
|
||||
@ -252,6 +264,7 @@ from .nrk import (
|
||||
from .ntv import NTVIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nuvid import NuvidIE
|
||||
from .oktoberfesttv import OktoberfestTVIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
@ -262,6 +275,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .played import PlayedIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playvid import PlayvidIE
|
||||
from .podomatic import PodomaticIE
|
||||
@ -330,6 +344,7 @@ from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
@ -340,6 +355,7 @@ from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tapely import TapelyIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@ -357,6 +373,10 @@ from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .thvideo import (
|
||||
THVideoIE,
|
||||
THVideoPlaylistIE
|
||||
)
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@ -365,6 +385,7 @@ from .trutube import TruTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
@ -389,17 +410,19 @@ from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videomega import VideoMegaIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
from .vidme import VidmeIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VimeoChannelIE,
|
||||
VimeoUserIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoChannelIE,
|
||||
VimeoGroupsIE,
|
||||
VimeoLikesIE,
|
||||
VimeoReviewIE,
|
||||
VimeoUserIE,
|
||||
VimeoWatchLaterIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
@ -438,9 +461,11 @@ from .yahoo import (
|
||||
YahooNewsIE,
|
||||
YahooSearchIE,
|
||||
)
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
|
@ -22,8 +22,7 @@ class ABCIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
urls_info_json = self._search_regex(
|
||||
|
@ -35,7 +35,7 @@ class AnySexIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
||||
r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
@ -43,7 +43,7 @@ class AnySexIE(InfoExtractor):
|
||||
r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
|
||||
r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
|
||||
|
||||
|
@ -8,8 +8,6 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
qualities,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_parse,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
@ -51,9 +49,6 @@ class ARDMediathekIE(InfoExtractor):
|
||||
else:
|
||||
video_id = m.group('video_id')
|
||||
|
||||
urlp = compat_urllib_parse_urlparse(url)
|
||||
url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||
|
@ -86,11 +86,15 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
upload_date_str = player_info.get('shootingDate')
|
||||
if not upload_date_str:
|
||||
upload_date_str = player_info.get('VDA', '').split(' ')[0]
|
||||
|
||||
info_dict = {
|
||||
'id': player_info['VID'],
|
||||
'title': player_info['VTI'],
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(player_info.get('VDA', '').split(' ')[0]),
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
}
|
||||
|
||||
|
@ -15,13 +15,23 @@ class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'file': '1812978515.mp3',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
'info_dict': {
|
||||
"title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||
"duration": 9.8485,
|
||||
'id': '1812978515',
|
||||
'ext': 'mp3',
|
||||
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||
'duration': 9.8485,
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '2b68e5851514c20efdff2afc5603b8b4',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'mp3',
|
||||
'title': 'Lanius (Battle)',
|
||||
'uploader': 'Ben Prunty Music',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -59,9 +69,9 @@ class BandcampIE(InfoExtractor):
|
||||
raise ExtractorError('No free songs found')
|
||||
|
||||
download_link = m_download.group(1)
|
||||
video_id = re.search(
|
||||
r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
|
||||
webpage, re.MULTILINE | re.DOTALL).group('id')
|
||||
video_id = self._search_regex(
|
||||
r'var TralbumData = {.*?id: (?P<id>\d+),?$',
|
||||
webpage, 'video id', flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascript code
|
||||
|
53
youtube_dl/extractor/behindkink.py
Normal file
53
youtube_dl/extractor/behindkink.py
Normal file
@ -0,0 +1,53 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_basename
|
||||
|
||||
|
||||
class BehindKinkIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
|
||||
'md5': '41ad01222b8442089a55528fec43ec01',
|
||||
'info_dict': {
|
||||
'id': '36370',
|
||||
'ext': 'mp4',
|
||||
'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
|
||||
'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
|
||||
'upload_date': '20140814',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
year = mobj.group('year')
|
||||
month = mobj.group('month')
|
||||
day = mobj.group('day')
|
||||
upload_date = year + month + day
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"'file':\s*'([^']+)'",
|
||||
webpage, 'URL base')
|
||||
|
||||
video_id = url_basename(video_url)
|
||||
video_id = video_id.split('_')[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
}
|
@ -26,6 +26,8 @@ class BRIE(InfoExtractor):
|
||||
'title': 'Wenn das Traditions-Theater wackelt',
|
||||
'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
|
||||
'duration': 34,
|
||||
'uploader': 'BR',
|
||||
'upload_date': '20140802',
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -66,8 +68,7 @@ class BRIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
display_id = self._match_id(url)
|
||||
page = self._download_webpage(url, display_id)
|
||||
xml_url = self._search_regex(
|
||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
|
||||
|
@ -35,7 +35,6 @@ class CliphunterIE(InfoExtractor):
|
||||
'title': 'Fun Jynx Maze solo',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
'duration': 1317,
|
||||
}
|
||||
}
|
||||
|
||||
@ -86,14 +85,11 @@ class CliphunterIE(InfoExtractor):
|
||||
thumbnail = self._search_regex(
|
||||
r"var\s+mov_thumb\s*=\s*'([^']+)';",
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'pl_dur\s*=\s*([0-9]+)', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@ -9,6 +9,8 @@ from ..utils import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse,
|
||||
remove_end,
|
||||
HEADRequest,
|
||||
compat_HTTPError,
|
||||
)
|
||||
|
||||
|
||||
@ -21,6 +23,7 @@ class CloudyIE(InfoExtractor):
|
||||
'''
|
||||
_EMBED_URL = 'http://www.%s/embed.php?id=%s'
|
||||
_API_URL = 'http://www.%s/api/player.api.php?%s'
|
||||
_MAX_TRIES = 2
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
@ -42,24 +45,30 @@ class CloudyIE(InfoExtractor):
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if try_num > self._MAX_TRIES - 1:
|
||||
raise ExtractorError('Unable to extract video URL', expected=True)
|
||||
|
||||
file_key = self._search_regex(
|
||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode({
|
||||
form = {
|
||||
'file': video_id,
|
||||
'key': file_key,
|
||||
}))
|
||||
}
|
||||
|
||||
if error_url:
|
||||
form.update({
|
||||
'numOfErrors': try_num,
|
||||
'errorCode': '404',
|
||||
'errorUrl': error_url,
|
||||
})
|
||||
|
||||
data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
|
||||
player_data = self._download_webpage(
|
||||
data_url, video_id, 'Downloading player data')
|
||||
data = compat_parse_qs(player_data)
|
||||
|
||||
try_num += 1
|
||||
|
||||
if 'error' in data:
|
||||
raise ExtractorError(
|
||||
'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
|
||||
@ -69,16 +78,31 @@ class CloudyIE(InfoExtractor):
|
||||
if title:
|
||||
title = remove_end(title, '&asdasdas').strip()
|
||||
|
||||
formats = []
|
||||
video_url = data.get('url', [None])[0]
|
||||
|
||||
if video_url:
|
||||
formats.append({
|
||||
'format_id': 'sd',
|
||||
'url': video_url,
|
||||
})
|
||||
try:
|
||||
self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
|
||||
self.report_warning('Invalid video URL, requesting another', video_id)
|
||||
return self._extract_video(video_host, video_id, file_key, video_url, try_num)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = self._EMBED_URL % (video_host, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
file_key = self._search_regex(
|
||||
r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
|
||||
|
||||
return self._extract_video(video_host, video_id, file_key)
|
||||
|
@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import netrc
|
||||
@ -15,11 +16,13 @@ from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
sanitize_filename,
|
||||
@ -130,6 +133,8 @@ class InfoExtractor(object):
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
is_live: True, False, or None (=unknown). Whether this video is a
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@ -161,6 +166,14 @@ class InfoExtractor(object):
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
return cls._VALID_URL_RE.match(url) is not None
|
||||
|
||||
@classmethod
|
||||
def _match_id(cls, url):
|
||||
if '_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||
m = cls._VALID_URL_RE.match(url)
|
||||
assert m
|
||||
return m.group('id')
|
||||
|
||||
@classmethod
|
||||
def working(cls):
|
||||
"""Getter method for _WORKING."""
|
||||
@ -321,7 +334,11 @@ class InfoExtractor(object):
|
||||
try:
|
||||
return json.loads(json_string)
|
||||
except ValueError as ve:
|
||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
||||
errmsg = '%s: Failed to parse JSON ' % video_id
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, cause=ve)
|
||||
else:
|
||||
self.report_warning(errmsg + str(ve))
|
||||
|
||||
def report_warning(self, msg, video_id=None):
|
||||
idstr = '' if video_id is None else '%s: ' % video_id
|
||||
@ -638,7 +655,9 @@ class InfoExtractor(object):
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None):
|
||||
|
||||
formats = [{
|
||||
'format_id': 'm3u8-meta',
|
||||
'url': m3u8_url,
|
||||
@ -649,6 +668,11 @@ class InfoExtractor(object):
|
||||
'format_note': 'Quality selection URL',
|
||||
}]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
||||
last_info = None
|
||||
kv_rex = re.compile(
|
||||
@ -665,15 +689,17 @@ class InfoExtractor(object):
|
||||
continue
|
||||
else:
|
||||
if last_info is None:
|
||||
formats.append({'url': line})
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
|
||||
f = {
|
||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
||||
'url': line.strip(),
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
@ -693,6 +719,34 @@ class InfoExtractor(object):
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _live_title(self, name):
|
||||
""" Generate the title for a live video """
|
||||
now = datetime.datetime.now()
|
||||
now_str = now.strftime("%Y-%m-%d %H:%M")
|
||||
return name + ' ' + now_str
|
||||
|
||||
def _int(self, v, name, fatal=False, **kwargs):
|
||||
res = int_or_none(v, **kwargs)
|
||||
if 'get_attr' in kwargs:
|
||||
print(getattr(v, kwargs['get_attr']))
|
||||
if res is None:
|
||||
msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
|
||||
if fatal:
|
||||
raise ExtractorError(msg)
|
||||
else:
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _float(self, v, name, fatal=False, **kwargs):
|
||||
res = float_or_none(v, **kwargs)
|
||||
if res is None:
|
||||
msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
|
||||
if fatal:
|
||||
raise ExtractorError(msg)
|
||||
else:
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@ -9,7 +9,7 @@ import xml.etree.ElementTree
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse,
|
||||
@ -26,7 +26,7 @@ from ..aes import (
|
||||
)
|
||||
|
||||
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
@ -271,6 +271,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
else:
|
||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
|
@ -11,10 +11,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P<id>[^?#&]+)'
|
||||
IE_NAME = 'daum.net'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||
'info_dict': {
|
||||
'id': '52554690',
|
||||
@ -24,11 +24,17 @@ class DaumIE(InfoExtractor):
|
||||
'upload_date': '20130831',
|
||||
'duration': 3868,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||
webpage = self._download_webpage(canonical_url, video_id)
|
||||
full_id = self._search_regex(
|
||||
@ -42,7 +48,6 @@ class DaumIE(InfoExtractor):
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||
formats = []
|
||||
for format_el in urls.findall('result/output_list/output_list'):
|
||||
profile = format_el.attrib['profile']
|
||||
@ -52,7 +57,7 @@ class DaumIE(InfoExtractor):
|
||||
})
|
||||
url_doc = self._download_xml(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||
video_id, note=False)
|
||||
video_id, note='Downloading video data for %s format' % profile)
|
||||
format_url = url_doc.find('result/url').text
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE):
|
||||
IE_NAME = 'divxstage'
|
||||
IE_DESC = 'DivxStage'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}
|
||||
|
||||
_HOST = 'www.divxstage.eu'
|
||||
|
||||
@ -24,4 +24,4 @@ class DivxStageIE(NovaMovIE):
|
||||
'title': 'youtubedl test video',
|
||||
'description': 'This is a test video for youtubedl.',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,28 +5,32 @@ import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse_unquote
|
||||
from ..utils import compat_urllib_parse_unquote, url_basename
|
||||
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
'info_dict': {
|
||||
'id': 'nelirfsxnmcfbfh',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
fn = compat_urllib_parse_unquote(mobj.group('title'))
|
||||
fn = compat_urllib_parse_unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
video_url = (
|
||||
re.sub(r'[?&]dl=0', '', url) +
|
||||
('?' if '?' in url else '&') + 'dl=1')
|
||||
video_url = re.sub(r'[?&]dl=0', '', url)
|
||||
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -19,7 +19,7 @@ class DrTuberIE(InfoExtractor):
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': list, # NSFW
|
||||
'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'],
|
||||
'thumbnail': 're:https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
@ -52,9 +52,9 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<span class="comments_count">([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
cats_str = self._html_search_regex(
|
||||
r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
|
||||
categories = None if cats_str is None else cats_str.split(' ')
|
||||
cats_str = self._search_regex(
|
||||
r'<span>Categories:</span><div>(.+?)</div>', webpage, 'categories', fatal=False)
|
||||
categories = [] if not cats_str else re.findall(r'<a title="([^"]+)"', cats_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
61
youtube_dl/extractor/einthusan.py
Normal file
61
youtube_dl/extractor/einthusan.py
Normal file
@ -0,0 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?einthusan\.com/movies/watch.php\?([^#]*?)id=(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=2447',
|
||||
'md5': 'af244f4458cd667205e513d75da5b8b1',
|
||||
'info_dict': {
|
||||
'id': '2447',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ek Villain',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:9d29fc91a7abadd4591fb862fa560d93',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.einthusan.com/movies/watch.php?id=1671',
|
||||
'md5': 'ef63c7a803e22315880ed182c10d1c5c',
|
||||
'info_dict': {
|
||||
'id': '1671',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soodhu Kavvuum',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''',
|
||||
webpage, 'video url')
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
|
||||
webpage, "thumbnail url", fatal=False)
|
||||
if thumbnail is not None:
|
||||
thumbnail = thumbnail.replace('..', 'http://www.einthusan.com')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
@ -1,4 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -7,20 +9,20 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
IE_NAME = u'eitb.tv'
|
||||
IE_NAME = 'eitb.tv'
|
||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'add_ie': ['Brightcove'],
|
||||
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||
u'md5': u'edf4436247185adee3ea18ce64c47998',
|
||||
u'info_dict': {
|
||||
u'id': u'2743577154001',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
|
||||
'add_ie': ['Brightcove'],
|
||||
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||
'md5': 'edf4436247185adee3ea18ce64c47998',
|
||||
'info_dict': {
|
||||
'id': '2743577154001',
|
||||
'ext': 'mp4',
|
||||
'title': '60 minutos (Lasa y Zabala, 30 años)',
|
||||
# All videos from eitb has this description in the brightcove info
|
||||
u'description': u'.',
|
||||
u'uploader': u'Euskal Telebista',
|
||||
'description': '.',
|
||||
'uploader': 'Euskal Telebista',
|
||||
},
|
||||
}
|
||||
|
||||
@ -30,7 +32,7 @@ class EitbIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, chapter_id)
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if bc_url is None:
|
||||
raise ExtractorError(u'Could not extract the Brightcove url')
|
||||
raise ExtractorError('Could not extract the Brightcove url')
|
||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||
# it manually
|
||||
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||
|
@ -14,11 +14,11 @@ class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '3b427ae4b9d60619106de3185c2987cd',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
'id': '95008',
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
'duration': 194,
|
||||
'view_count': int,
|
||||
|
@ -7,6 +7,7 @@ from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
@ -20,6 +21,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||
'uploader': 'unknown',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
@ -39,8 +41,12 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
|
||||
fatal=False)
|
||||
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||
r'video_url=(.+?)&', webpage, 'video_url'))
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
@ -51,6 +57,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'uploader': uploader,
|
||||
'view_count': view_count,
|
||||
'url': video_url,
|
||||
'format': format,
|
||||
'format_id': format,
|
||||
|
@ -12,8 +12,8 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
|
||||
ExtractorError,
|
||||
limit_length,
|
||||
)
|
||||
|
||||
|
||||
@ -35,7 +35,15 @@ class FacebookIE(InfoExtractor):
|
||||
'id': '637842556329505',
|
||||
'ext': 'mp4',
|
||||
'duration': 38,
|
||||
'title': 'Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam fin...',
|
||||
'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
|
||||
}
|
||||
}, {
|
||||
'note': 'Video without discernible title',
|
||||
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #274175099429670',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
@ -131,8 +139,7 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', default=None)
|
||||
if len(video_title) > 80 + 3:
|
||||
video_title = video_title[:80] + '...'
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
|
||||
|
@ -10,13 +10,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class FlickrIE(InfoExtractor):
|
||||
"""Information Extractor for Flickr videos"""
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||
_TEST = {
|
||||
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
|
||||
'file': '5645318632.mp4',
|
||||
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
|
||||
'info_dict': {
|
||||
'id': '5645318632',
|
||||
'ext': 'mp4',
|
||||
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
||||
"uploader_id": "forestwander-nature-pictures",
|
||||
"title": "Dark Hollow Waterfalls"
|
||||
@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to extract video url')
|
||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id': video_uploader_id,
|
||||
}]
|
||||
}
|
||||
|
@ -4,16 +4,21 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FranceInterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})'
|
||||
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
||||
'file': '793962.mp3',
|
||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||
"info_dict": {
|
||||
"title": "L’Histoire dans les jeux vidéo",
|
||||
'id': '793962',
|
||||
'ext': 'mp3',
|
||||
'title': 'L’Histoire dans les jeux vidéo',
|
||||
'description': 'md5:7e93ddb4451e7530022792240a3049c7',
|
||||
'timestamp': 1387369800,
|
||||
'upload_date': '20131218',
|
||||
},
|
||||
}
|
||||
|
||||
@ -22,17 +27,26 @@ class FranceInterIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title')
|
||||
|
||||
path = self._search_regex(
|
||||
r'&urlAOD=(.*?)&startTime', webpage, 'video url')
|
||||
r'<a id="player".+?href="([^"]+)"', webpage, 'video url')
|
||||
video_url = 'http://www.franceinter.fr/' + path
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span class="title">(.+?)</span>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<span class="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'data-date="(\d+)"', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
'vcodec': 'none',
|
||||
}],
|
||||
'title': title,
|
||||
}
|
||||
|
@ -8,45 +8,68 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
parse_duration,
|
||||
compat_urllib_parse_urlparse,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_video(self, video_id):
|
||||
info = self._download_xml(
|
||||
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||
'getInfosOeuvre.php?id-diffusion='
|
||||
+ video_id, video_id, 'Downloading XML config')
|
||||
def _extract_video(self, video_id, catalogue):
|
||||
info = self._download_json(
|
||||
'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
|
||||
% (video_id, catalogue),
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
manifest_url = info.find('videos/video/url').text
|
||||
manifest_url = manifest_url.replace('/z/', '/i/')
|
||||
|
||||
if manifest_url.startswith('rtmp'):
|
||||
formats = [{'url': manifest_url, 'ext': 'flv'}]
|
||||
else:
|
||||
formats = []
|
||||
available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
|
||||
for index, format_descr in enumerate(available_formats.split(',')):
|
||||
format_info = {
|
||||
'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
|
||||
'ext': 'mp4',
|
||||
}
|
||||
m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
|
||||
if m_resolution is not None:
|
||||
format_info.update({
|
||||
'width': int(m_resolution.group('width')),
|
||||
'height': int(m_resolution.group('height')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
if info.get('status') == 'NOK':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, info['message']), expected=True)
|
||||
|
||||
thumbnail_path = info.find('image').text
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
continue
|
||||
video_url = video['url']
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = video['format']
|
||||
if video_url.endswith('.f4m'):
|
||||
video_url_parsed = compat_urllib_parse_urlparse(video_url)
|
||||
f4m_url = self._download_webpage(
|
||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
for f4m_format in f4m_formats:
|
||||
f4m_format['preference'] = 1
|
||||
formats.extend(f4m_formats)
|
||||
elif video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'preference': 1,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'preference': 2,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('titre').text,
|
||||
'title': info['titre'],
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||
'description': info.find('synopsis').text,
|
||||
}
|
||||
|
||||
|
||||
@ -61,7 +84,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
webpage = self._download_webpage(url, title)
|
||||
video_id = self._search_regex(
|
||||
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||
return self._extract_video(video_id)
|
||||
return self._extract_video(video_id, 'Pluzz')
|
||||
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
@ -70,13 +93,13 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
'md5': '9cecf35f99c4079c199e9817882a9a1c',
|
||||
'info_dict': {
|
||||
'id': '84981923',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Soir 3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'upload_date': '20130826',
|
||||
'timestamp': 1377548400,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
@ -88,15 +111,17 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
}
|
||||
},
|
||||
'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
@ -112,91 +137,77 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
|
||||
# france2
|
||||
{
|
||||
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
|
||||
'file': '75540104.mp4',
|
||||
'md5': 'c03fc87cb85429ffd55df32b9fc05523',
|
||||
'info_dict': {
|
||||
'title': '13h15, le samedi...',
|
||||
'description': 'md5:2e5b58ba7a2d3692b35c792be081a03d',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'id': '109169362',
|
||||
'ext': 'flv',
|
||||
'title': '13h15, le dimanche...',
|
||||
'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
|
||||
'upload_date': '20140914',
|
||||
'timestamp': 1410693600,
|
||||
},
|
||||
},
|
||||
# france3
|
||||
{
|
||||
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
|
||||
'md5': '679bb8f8921f8623bd658fa2f8364da0',
|
||||
'info_dict': {
|
||||
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le scandale du prix des médicaments',
|
||||
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'upload_date': '20131113',
|
||||
'timestamp': 1384380000,
|
||||
},
|
||||
},
|
||||
# france4
|
||||
{
|
||||
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
|
||||
'info_dict': {
|
||||
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hero Corp Making of - Extrait 1',
|
||||
'description': 'md5:c87d54871b1790679aec1197e73d650a',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'upload_date': '20131106',
|
||||
'timestamp': 1383766500,
|
||||
},
|
||||
},
|
||||
# france5
|
||||
{
|
||||
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/92837968',
|
||||
'md5': '78f0f4064f9074438e660785bbf2c5d9',
|
||||
'info_dict': {
|
||||
'id': '92837968',
|
||||
'ext': 'mp4',
|
||||
'id': '108961659',
|
||||
'ext': 'flv',
|
||||
'title': 'C à dire ?!',
|
||||
'description': 'md5:fb1db1cbad784dcce7c7a7bd177c8e2f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'description': 'md5:1a4aeab476eb657bf57c4ff122129f81',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410795000,
|
||||
},
|
||||
},
|
||||
# franceo
|
||||
{
|
||||
'url': 'http://www.franceo.fr/jt/info-afrique/04-12-2013',
|
||||
'md5': '52f0bfe202848b15915a2f39aaa8981b',
|
||||
'info_dict': {
|
||||
'id': '92327925',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infô-Afrique',
|
||||
'id': '108634970',
|
||||
'ext': 'flv',
|
||||
'title': 'Infô Afrique',
|
||||
'description': 'md5:ebf346da789428841bee0fd2a935ea55',
|
||||
'upload_date': '20140915',
|
||||
'timestamp': 1410822000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'The id changes frequently',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj.group('key'):
|
||||
webpage = self._download_webpage(url, mobj.group('key'))
|
||||
id_res = [
|
||||
(r'''(?x)<div\s+class="video-player">\s*
|
||||
<a\s+href="http://videos.francetv.fr/video/([0-9]+)"\s+
|
||||
class="francetv-video-player">'''),
|
||||
(r'<a id="player_direct" href="http://info\.francetelevisions'
|
||||
'\.fr/\?id-video=([^"/&]+)'),
|
||||
(r'<a class="video" id="ftv_player_(.+?)"'),
|
||||
]
|
||||
video_id = self._html_search_regex(id_res, webpage, 'video ID')
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
return self._extract_video(video_id)
|
||||
webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
|
||||
video_id, catalogue = self._html_search_regex(
|
||||
r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
webpage, 'video ID').split('@')
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
||||
|
||||
class GenerationQuoiIE(InfoExtractor):
|
||||
@ -232,16 +243,15 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
|
||||
'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
|
||||
'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
|
||||
'info_dict': {
|
||||
'id': 'EV_6785',
|
||||
'ext': 'mp4',
|
||||
'title': 'Einstein on the beach au Théâtre du Châtelet',
|
||||
'description': 'md5:9ce2888b1efefc617b5e58b3f6200eeb',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'id': 'EV_22853',
|
||||
'ext': 'flv',
|
||||
'title': 'Dans les jardins de William Christie - Le Camus',
|
||||
'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
|
||||
'upload_date': '20140829',
|
||||
'timestamp': 1409317200,
|
||||
},
|
||||
}
|
||||
|
||||
@ -249,5 +259,7 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
video_id = self._search_regex(r'"http://videos\.francetv\.fr/video/(.*?)"', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
|
||||
|
||||
return self._extract_video(video_id, catalogue)
|
||||
|
@ -21,7 +21,7 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||
'md5': 'ff4d83318f89776ed0250634cfaa8d36',
|
||||
'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad',
|
||||
'info_dict': {
|
||||
'id': 'e402820827',
|
||||
'ext': 'mp4',
|
||||
|
@ -155,7 +155,6 @@ class GenericIE(InfoExtractor):
|
||||
# funnyordie embed
|
||||
{
|
||||
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
|
||||
'md5': '7cf780be104d40fea7bae52eed4a470e',
|
||||
'info_dict': {
|
||||
'id': '18e820ec3f',
|
||||
'ext': 'mp4',
|
||||
@ -180,13 +179,13 @@ class GenericIE(InfoExtractor):
|
||||
# Embedded TED video
|
||||
{
|
||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||
'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
|
||||
'md5': '65fdff94098e4a607385a60c5177c638',
|
||||
'info_dict': {
|
||||
'id': '981',
|
||||
'id': '1969',
|
||||
'ext': 'mp4',
|
||||
'title': 'My web playroom',
|
||||
'uploader': 'Ze Frank',
|
||||
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
|
||||
'title': 'Hidden miracles of the natural world',
|
||||
'uploader': 'Louie Schwartzberg',
|
||||
'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
|
||||
}
|
||||
},
|
||||
# Embeded Ustream video
|
||||
@ -226,21 +225,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': 'Requires rtmpdump'
|
||||
}
|
||||
},
|
||||
# smotri embed
|
||||
{
|
||||
'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
|
||||
'md5': 'ec40048448e9284c9a1de77bb188108b',
|
||||
'info_dict': {
|
||||
'id': 'v27008541fad',
|
||||
'ext': 'mp4',
|
||||
'title': 'Крым и Севастополь вошли в состав России',
|
||||
'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
|
||||
'duration': 900,
|
||||
'upload_date': '20140318',
|
||||
'uploader': 'rbctv_2012_4',
|
||||
'uploader_id': 'rbctv_2012_4',
|
||||
},
|
||||
},
|
||||
# Condé Nast embed
|
||||
{
|
||||
'url': 'http://www.wired.com/2014/04/honda-asimo/',
|
||||
@ -295,13 +279,13 @@ class GenericIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
|
||||
'info_dict': {
|
||||
'id': 'jpSGZsgga_I',
|
||||
'id': '4vAffPZIT44',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asphalt 8: Airborne - Launch Trailer',
|
||||
'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
|
||||
'uploader': 'Gameloft',
|
||||
'uploader_id': 'gameloft',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
|
||||
'upload_date': '20140828',
|
||||
'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -382,14 +366,21 @@ class GenericIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
},
|
||||
# Wistia embed
|
||||
{
|
||||
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||
'md5': '8788b683c777a5cf25621eaf286d0c23',
|
||||
'info_dict': {
|
||||
'id': '1cfaf6b7ea',
|
||||
'ext': 'mov',
|
||||
'title': 'md5:51364a8d3d009997ba99656004b5e20d',
|
||||
'duration': 643.0,
|
||||
'filesize': 182808282,
|
||||
'uploader': 'education-portal.com',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
"""Report webpage download."""
|
||||
if not self._downloader.params.get('test', False):
|
||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||
super(GenericIE, self).report_download_webpage(video_id)
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
"""Report information extraction."""
|
||||
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
|
||||
@ -489,6 +480,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
force_videoid = None
|
||||
is_intentional = smuggled_data and smuggled_data.get('to_generic')
|
||||
if smuggled_data and 'force_videoid' in smuggled_data:
|
||||
force_videoid = smuggled_data['force_videoid']
|
||||
video_id = force_videoid
|
||||
@ -531,6 +523,9 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||
|
||||
try:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
except ValueError:
|
||||
@ -584,7 +579,9 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Helper method
|
||||
def _playlist_from_matches(matches, getter, ie=None):
|
||||
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
|
||||
urlrs = orderedSet(
|
||||
self.url_result(self._proto_relative_url(getter(m)), ie)
|
||||
for m in matches)
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
@ -629,11 +626,11 @@ class GenericIE(InfoExtractor):
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||
(?:embed|v)/.+?)
|
||||
(?:embed|v|p)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
@ -654,6 +651,16 @@ class GenericIE(InfoExtractor):
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||
if match:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': match.group('id')
|
||||
}
|
||||
|
||||
# Look for embedded blip.tv player
|
||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||
@ -877,7 +884,7 @@ class GenericIE(InfoExtractor):
|
||||
if not found:
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
||||
webpage)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
|
@ -36,16 +36,16 @@ class GodTubeIE(InfoExtractor):
|
||||
'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
|
||||
video_id, 'Downloading player config XML')
|
||||
|
||||
video_url = config.find('.//file').text
|
||||
uploader = config.find('.//author').text
|
||||
timestamp = parse_iso8601(config.find('.//date').text)
|
||||
duration = parse_duration(config.find('.//duration').text)
|
||||
thumbnail = config.find('.//image').text
|
||||
video_url = config.find('file').text
|
||||
uploader = config.find('author').text
|
||||
timestamp = parse_iso8601(config.find('date').text)
|
||||
duration = parse_duration(config.find('duration').text)
|
||||
thumbnail = config.find('image').text
|
||||
|
||||
media = self._download_xml(
|
||||
'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
|
||||
|
||||
title = media.find('.//title').text
|
||||
title = media.find('title').text
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
69
youtube_dl/extractor/golem.py
Normal file
69
youtube_dl/extractor/golem.py
Normal file
@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class GolemIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||
_TEST = {
|
||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||
'info_dict': {
|
||||
'id': '14095',
|
||||
'format_id': 'high',
|
||||
'ext': 'mp4',
|
||||
'title': 'iPhone 6 und 6 Plus - Test',
|
||||
'duration': 300.44,
|
||||
'filesize': 65309548,
|
||||
}
|
||||
}
|
||||
|
||||
_PREFIX = 'http://video.golem.de'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_xml(
|
||||
'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': config.findtext('./title', 'golem'),
|
||||
'duration': self._float(config.findtext('./playtime'), 'duration'),
|
||||
}
|
||||
|
||||
formats = []
|
||||
for e in config:
|
||||
url = e.findtext('./url')
|
||||
if not url:
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'format_id': e.tag,
|
||||
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||
'height': self._int(e.get('height'), 'height'),
|
||||
'width': self._int(e.get('width'), 'width'),
|
||||
'filesize': self._int(e.findtext('filesize'), 'filesize'),
|
||||
'ext': determine_ext(e.findtext('./filename')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
|
||||
thumbnails = []
|
||||
for e in config.findall('.//teaser'):
|
||||
url = e.findtext('./url')
|
||||
if not url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||
'width': self._int(e.get('width'), 'thumbnail width'),
|
||||
'height': self._int(e.get('height'), 'thumbnail height'),
|
||||
})
|
||||
info['thumbnails'] = thumbnails
|
||||
|
||||
return info
|
81
youtube_dl/extractor/heise.py
Normal file
81
youtube_dl/extractor/heise.py
Normal file
@ -0,0 +1,81 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_meta_content,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class HeiseIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?heise\.de/video/artikel/
|
||||
.+?(?P<id>[0-9]+)\.html(?:$|[?#])
|
||||
'''
|
||||
_TEST = {
|
||||
'url': (
|
||||
'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
|
||||
),
|
||||
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||
'info_dict': {
|
||||
'id': '2404147',
|
||||
'ext': 'mp4',
|
||||
'title': (
|
||||
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
||||
),
|
||||
'format_id': 'mp4_720',
|
||||
'timestamp': 1411812600,
|
||||
'upload_date': '20140927',
|
||||
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_url = self._search_regex(
|
||||
r'json_url:\s*"([^"]+)"', webpage, 'json URL')
|
||||
config = self._download_json(json_url, video_id)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'thumbnail': config.get('poster'),
|
||||
'timestamp': parse_iso8601(get_meta_content('date', webpage)),
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
||||
|
||||
title = get_meta_content('fulltitle', webpage)
|
||||
if title:
|
||||
info['title'] = title
|
||||
elif config.get('title'):
|
||||
info['title'] = config['title']
|
||||
else:
|
||||
info['title'] = self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
for t, rs in config['formats'].items():
|
||||
if not rs or not hasattr(rs, 'items'):
|
||||
self._downloader.report_warning(
|
||||
'formats: {0}: no resolutions'.format(t))
|
||||
continue
|
||||
|
||||
for height_str, obj in rs.items():
|
||||
format_id = '{0}_{1}'.format(t, height_str)
|
||||
|
||||
if not obj or not obj.get('url'):
|
||||
self._downloader.report_warning(
|
||||
'formats: {0}: no url'.format(format_id))
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': obj['url'],
|
||||
'format_id': format_id,
|
||||
'height': self._int(height_str, 'height'),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
|
||||
return info
|
@ -71,6 +71,7 @@ class IGNIE(InfoExtractor):
|
||||
|
||||
def _find_video_id(self, webpage):
|
||||
res_id = [
|
||||
r'"video_id"\s*:\s*"(.*?)"',
|
||||
r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
@ -85,10 +86,15 @@ class IGNIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, name_or_id)
|
||||
if page_type != 'video':
|
||||
multiple_urls = re.findall(
|
||||
'<param name="flashvars" value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||
'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||
webpage)
|
||||
if multiple_urls:
|
||||
return [self.url_result(u, ie='IGN') for u in multiple_urls]
|
||||
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': name_or_id,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
video_id = self._find_video_id(webpage)
|
||||
result = self._get_video_info(video_id)
|
||||
@ -111,13 +117,13 @@ class IGNIE(InfoExtractor):
|
||||
|
||||
|
||||
class OneUPIE(IGNIE):
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://gamevideos.1up.com/video/id/34976',
|
||||
'url': 'http://gamevideos.1up.com/video/id/34976.html',
|
||||
'md5': '68a54ce4ebc772e4b71e3123d413163d',
|
||||
'info_dict': {
|
||||
'id': '34976',
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -12,12 +14,13 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
|
||||
u'file': u'452693.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'SKYFALL',
|
||||
u'description': u'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||
u'duration': 153,
|
||||
'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
|
||||
'info_dict': {
|
||||
'id': '452693',
|
||||
'ext': 'mp4',
|
||||
'title': 'SKYFALL',
|
||||
'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
|
||||
'duration': 149,
|
||||
},
|
||||
}
|
||||
|
||||
@ -42,7 +45,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
url = self._build_url(query)
|
||||
|
||||
flashconfiguration = self._download_xml(url, video_id,
|
||||
u'Downloading flash configuration')
|
||||
'Downloading flash configuration')
|
||||
file_url = flashconfiguration.find('file').text
|
||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||
# Replace some of the parameters in the query to get the best quality
|
||||
@ -51,7 +54,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
lambda m: self._clean_query(m.group()),
|
||||
file_url)
|
||||
info = self._download_xml(file_url, video_id,
|
||||
u'Downloading video info')
|
||||
'Downloading video info')
|
||||
item = info.find('channel/item')
|
||||
|
||||
def _bp(p):
|
||||
|
@ -63,7 +63,8 @@ class IzleseneIE(InfoExtractor):
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
thumbnail = self._proto_relative_url(
|
||||
self._og_search_thumbnail(webpage), scheme='http:')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r"adduserUsername\s*=\s*'([^']+)';",
|
||||
|
@ -1,8 +1,6 @@
|
||||
# coding=utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@ -12,14 +10,14 @@ from ..utils import (
|
||||
|
||||
class JpopsukiIE(InfoExtractor):
|
||||
IE_NAME = 'jpopsuki.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/video/(.*?)/(?P<id>\S+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
|
||||
'md5': '88018c0c1a9b1387940e90ec9e7e198e',
|
||||
'file': '00be659d23b0b40508169cdee4545771.mp4',
|
||||
'info_dict': {
|
||||
'id': '00be659d23b0b40508169cdee4545771',
|
||||
'ext': 'mp4',
|
||||
'title': 'ayumi hamasaki - evolution',
|
||||
'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
|
||||
'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
|
||||
@ -30,8 +28,7 @@ class JpopsukiIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@ -47,11 +44,9 @@ class JpopsukiIE(InfoExtractor):
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
|
||||
webpage, 'video uploader_id', fatal=False)
|
||||
upload_date = self._html_search_regex(
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
|
||||
fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
fatal=False))
|
||||
view_count_str = self._html_search_regex(
|
||||
r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
|
||||
fatal=False)
|
||||
|
@ -11,10 +11,9 @@ from ..utils import (
|
||||
|
||||
|
||||
class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<id>[a-z0-9\-]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||
'md5': '1574e9b4d6438446d5b7dbcdf2786276',
|
||||
'info_dict': {
|
||||
'id': 'r303r',
|
||||
'ext': 'flv',
|
||||
@ -24,8 +23,7 @@ class JukeboxIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
html = self._download_webpage(url, video_id)
|
||||
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
||||
|
69
youtube_dl/extractor/lrt.py
Normal file
69
youtube_dl/extractor/lrt.py
Normal file
@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class LRTIE(InfoExtractor):
|
||||
IE_NAME = 'lrt.lt'
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
||||
'info_dict': {
|
||||
'id': '54391',
|
||||
'ext': 'mp4',
|
||||
'title': 'Septynios Kauno dienos',
|
||||
'description': 'Kauno miesto ir apskrities naujienos',
|
||||
'duration': 1783,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # HLS download
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r"'duration':\s*'([^']+)',", webpage,
|
||||
'duration', fatal=False, default=None))
|
||||
|
||||
formats = []
|
||||
for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
|
||||
data = json.loads(js_to_json(js))
|
||||
if data['provider'] == 'rtmp':
|
||||
formats.append({
|
||||
'format_id': 'rtmp',
|
||||
'ext': determine_ext(data['file']),
|
||||
'url': data['streamer'],
|
||||
'play_path': 'mp4:%s' % data['file'],
|
||||
'preference': -1,
|
||||
})
|
||||
else:
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(data['file'], video_id, 'mp4'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
}
|
87
youtube_dl/extractor/mgoon.py
Normal file
87
youtube_dl/extractor/mgoon.py
Normal file
@ -0,0 +1,87 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MgoonIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)|
|
||||
video\.mgoon\.com)/(?P<id>[0-9]+)'''
|
||||
_API_URL = 'http://mpos.mgoon.com/player/video?id={0:}'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mgoon.com/ch/hi6618/v/5582148',
|
||||
'md5': 'dd46bb66ab35cf6d51cc812fd82da79d',
|
||||
'info_dict': {
|
||||
'id': '5582148',
|
||||
'uploader_id': 'hi6618',
|
||||
'duration': 240.419,
|
||||
'upload_date': '20131220',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:543aa4c27a4931d371c3f433e8cebebc',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.mgoon.com/play/view/5582148',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://video.mgoon.com/5582148',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(self._API_URL.format(video_id), video_id)
|
||||
|
||||
if data.get('errorInfo', {}).get('code') != 'NONE':
|
||||
raise ExtractorError('%s encountered an error: %s' % (
|
||||
self.IE_NAME, data['errorInfo']['message']), expected=True)
|
||||
|
||||
v_info = data['videoInfo']
|
||||
title = v_info.get('v_title')
|
||||
thumbnail = v_info.get('v_thumbnail')
|
||||
duration = v_info.get('v_duration')
|
||||
upload_date = unified_strdate(v_info.get('v_reg_date'))
|
||||
uploader_id = data.get('userInfo', {}).get('u_alias')
|
||||
if duration:
|
||||
duration /= 1000.0
|
||||
|
||||
age_limit = None
|
||||
if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT':
|
||||
age_limit = 18
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['360p', '480p', '720p', '1080p'])
|
||||
for fmt in data['videoFiles']:
|
||||
formats.append({
|
||||
'format_id': fmt['label'],
|
||||
'quality': get_quality(fmt['label']),
|
||||
'url': fmt['url'],
|
||||
'ext': fmt['format'],
|
||||
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'age_limit': age_limit,
|
||||
}
|
70
youtube_dl/extractor/moniker.py
Normal file
70
youtube_dl/extractor/moniker.py
Normal file
@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class MonikerIE(InfoExtractor):
|
||||
IE_DESC = 'allmyvideos.net and vidspot.net'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'jih3nce3x6wn',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vidspot.net/l2ngsmhs8ci5',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'l2ngsmhs8ci5',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
|
||||
#Could be several links with different quality
|
||||
links = re.findall(r'"file" : "?(.+?)",', webpage)
|
||||
# Assume the links are ordered in quality
|
||||
formats = [{
|
||||
'url': l,
|
||||
'quality': i,
|
||||
} for i, l in enumerate(links)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
76
youtube_dl/extractor/muenchentv.py
Normal file
76
youtube_dl/extractor/muenchentv.py
Normal file
@ -0,0 +1,76 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class MuenchenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
|
||||
IE_DESC = 'münchen.tv'
|
||||
_TEST = {
|
||||
'url': 'http://www.muenchen.tv/livestream/',
|
||||
'info_dict': {
|
||||
'id': '5334',
|
||||
'display_id': 'live',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
'thumbnail': 're:^https?://.*\.jpg$'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = 'live'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._live_title(self._og_search_title(webpage))
|
||||
|
||||
data_js = self._search_regex(
|
||||
r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
|
||||
webpage, 'playlist configuration')
|
||||
data_json = js_to_json(data_js)
|
||||
data = json.loads(data_json)[0]
|
||||
|
||||
video_id = data['mediaid']
|
||||
thumbnail = data.get('image')
|
||||
|
||||
formats = []
|
||||
for format_num, s in enumerate(data['sources']):
|
||||
ext = determine_ext(s['file'], None)
|
||||
label_str = s.get('label')
|
||||
if label_str is None:
|
||||
label_str = '_%d' % format_num
|
||||
|
||||
if ext is None:
|
||||
format_id = label_str
|
||||
else:
|
||||
format_id = '%s-%s' % (ext, label_str)
|
||||
|
||||
formats.append({
|
||||
'url': s['file'],
|
||||
'tbr': int_or_none(s.get('label')),
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'preference': -100 if '.smil' in s['file'] else 0,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
||||
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
|
||||
# md5 checksum is not stable
|
||||
'info_dict': {
|
||||
'id': 'u1RInQZRN7QJ',
|
||||
'id': 'bTmnLCvIbaaH',
|
||||
'ext': 'flv',
|
||||
'title': 'I Am a Firefighter',
|
||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
||||
|
144
youtube_dl/extractor/nfl.py
Normal file
144
youtube_dl/extractor/nfl.py
Normal file
@ -0,0 +1,144 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse_urlparse,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class NFLIE(InfoExtractor):
|
||||
IE_NAME = 'nfl.com'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/
|
||||
(?:.+?/)*
|
||||
(?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
||||
'info_dict': {
|
||||
'id': '0ap3000000398478',
|
||||
'ext': 'mp4',
|
||||
'title': 'Week 3: Redskins vs. Eagles highlights',
|
||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||
'upload_date': '20140921',
|
||||
'timestamp': 1411337580,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
||||
'info_dict': {
|
||||
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Post Game vs. Browns',
|
||||
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
||||
'upload_date': '20131229',
|
||||
'timestamp': 1388354455,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def prepend_host(host, url):
|
||||
if not url.startswith('http'):
|
||||
if not url.startswith('/'):
|
||||
url = '/%s' % url
|
||||
url = 'http://{0:}{1:}'.format(host, url)
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def format_from_stream(stream, protocol, host, path_prefix='',
|
||||
preference=0, note=None):
|
||||
url = '{protocol:}://{host:}/{prefix:}{path:}'.format(
|
||||
protocol=protocol,
|
||||
host=host,
|
||||
prefix=path_prefix,
|
||||
path=stream.get('path'),
|
||||
)
|
||||
return {
|
||||
'url': url,
|
||||
'vbr': int_or_none(stream.get('rate', 0), 1000),
|
||||
'preference': preference,
|
||||
'format_note': note,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, host = mobj.group('id'), mobj.group('host')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
||||
r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL'))
|
||||
config = self._download_json(config_url, video_id,
|
||||
note='Downloading player config')
|
||||
url_template = NFLIE.prepend_host(
|
||||
host, '{contentURLTemplate:}'.format(**config))
|
||||
video_data = self._download_json(
|
||||
url_template.format(id=video_id), video_id)
|
||||
|
||||
formats = []
|
||||
cdn_data = video_data.get('cdnData', {})
|
||||
streams = cdn_data.get('bitrateInfo', [])
|
||||
if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM':
|
||||
parts = compat_urllib_parse_urlparse(cdn_data.get('uri'))
|
||||
protocol, host = parts.scheme, parts.netloc
|
||||
for stream in streams:
|
||||
formats.append(
|
||||
NFLIE.format_from_stream(stream, protocol, host))
|
||||
else:
|
||||
cdns = config.get('cdns')
|
||||
if not cdns:
|
||||
raise ExtractorError('Failed to get CDN data', expected=True)
|
||||
|
||||
for name, cdn in cdns.items():
|
||||
# LimeLight streams don't seem to work
|
||||
if cdn.get('name') == 'LIMELIGHT':
|
||||
continue
|
||||
|
||||
protocol = cdn.get('protocol')
|
||||
host = remove_end(cdn.get('host', ''), '/')
|
||||
if not (protocol and host):
|
||||
continue
|
||||
|
||||
prefix = cdn.get('pathprefix', '')
|
||||
if prefix and not prefix.endswith('/'):
|
||||
prefix = '%s/' % prefix
|
||||
|
||||
preference = 0
|
||||
if protocol == 'rtmp':
|
||||
preference = -2
|
||||
elif 'prog' in name.lower():
|
||||
preference = 1
|
||||
|
||||
for stream in streams:
|
||||
formats.append(
|
||||
NFLIE.format_from_stream(stream, protocol, host,
|
||||
prefix, preference, name))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = None
|
||||
for q in ('xl', 'l', 'm', 's', 'xs'):
|
||||
thumbnail = video_data.get('imagePaths', {}).get(q)
|
||||
if thumbnail:
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data.get('headline'),
|
||||
'formats': formats,
|
||||
'description': video_data.get('caption'),
|
||||
'duration': video_data.get('duration'),
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': int_or_none(video_data.get('posted'), 1000),
|
||||
}
|
@ -46,9 +46,9 @@ class NHLBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
class NHLIE(NHLBaseInfoExtractor):
|
||||
IE_NAME = 'nhl.com'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console(?:\?(?:.*?[?&])?)id=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
|
||||
'info_dict': {
|
||||
'id': '453614',
|
||||
@ -58,7 +58,10 @@ class NHLIE(NHLBaseInfoExtractor):
|
||||
'duration': 18,
|
||||
'upload_date': '20131006',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@ -2,6 +2,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -17,6 +19,7 @@ from ..utils import (
|
||||
class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
@ -55,33 +58,52 @@ class NocoIE(InfoExtractor):
|
||||
login = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id, note):
|
||||
ts = compat_str(int(time.time() * 1000))
|
||||
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||
|
||||
resp = self._download_json(url, video_id, note)
|
||||
|
||||
if isinstance(resp, dict) and resp.get('error'):
|
||||
self._raise_error(resp['error'], resp['description'])
|
||||
|
||||
return resp
|
||||
|
||||
def _raise_error(self, error, description):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (self.IE_NAME, error, description),
|
||||
expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
medias = self._download_json(
|
||||
'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
medias = self._call_api(
|
||||
'shows/%s/medias' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
qualities = self._call_api(
|
||||
'qualities',
|
||||
video_id, 'Downloading qualities JSON')
|
||||
|
||||
formats = []
|
||||
|
||||
for fmt in medias['fr']['video_list']['default']['quality_list']:
|
||||
format_id = fmt['quality_key']
|
||||
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
|
||||
|
||||
file = self._download_json(
|
||||
'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id)
|
||||
|
||||
file_url = file['file']
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
|
||||
if file_url == 'forbidden':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (
|
||||
self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
|
||||
expected=True)
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
@ -91,20 +113,31 @@ class NocoIE(InfoExtractor):
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': fmt['quality_name'],
|
||||
'preference': fmt['priority'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
show = self._download_json(
|
||||
'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
|
||||
show = self._call_api(
|
||||
'shows/by_id/%s' % video_id,
|
||||
video_id, 'Downloading show JSON')[0]
|
||||
|
||||
upload_date = unified_strdate(show['indexed'])
|
||||
upload_date = unified_strdate(show['online_date_start_utc'])
|
||||
uploader = show['partner_name']
|
||||
uploader_id = show['partner_key']
|
||||
duration = show['duration_ms'] / 1000.0
|
||||
thumbnail = show['screenshot']
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_key, thumbnail_url in show.items():
|
||||
m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
|
||||
if not m:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
|
||||
episode = show.get('show_TT') or show.get('show_OT')
|
||||
family = show.get('family_TT') or show.get('family_OT')
|
||||
@ -124,7 +157,7 @@ class NocoIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
|
@ -8,11 +8,11 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
|
||||
_find = lambda el, p: el.find(_x(p)).text.strip()
|
||||
|
||||
|
||||
class NosVideoIE(InfoExtractor):
|
||||
@ -53,9 +53,15 @@ class NosVideoIE(InfoExtractor):
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(_x('.//xspf:track'))
|
||||
title = _find(track, './xspf:title')
|
||||
url = _find(track, './xspf:file')
|
||||
thumbnail = _find(track, './xspf:image')
|
||||
if track is None:
|
||||
raise ExtractorError(
|
||||
'XML playlist is missing the \'track\' element',
|
||||
expected=True)
|
||||
title = xpath_text(track, _x('./xspf:title'), 'title')
|
||||
url = xpath_text(track, _x('./xspf:file'), 'URL', fatal=True)
|
||||
thumbnail = xpath_text(track, _x('./xspf:image'), 'thumbnail')
|
||||
if title is not None:
|
||||
title = title.strip()
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
|
@ -5,7 +5,9 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
qualities,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@ -13,22 +15,50 @@ class NPOIE(InfoExtractor):
|
||||
IE_NAME = 'npo.nl'
|
||||
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
|
||||
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1220719',
|
||||
'ext': 'm4v',
|
||||
'title': 'Nieuwsuur',
|
||||
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
||||
'upload_date': '20140622',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
|
||||
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1220719',
|
||||
'ext': 'm4v',
|
||||
'title': 'Nieuwsuur',
|
||||
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
||||
'upload_date': '20140622',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
|
||||
'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
|
||||
'info_dict': {
|
||||
'id': 'VARA_101191800',
|
||||
'ext': 'm4v',
|
||||
'title': 'De Mega Mike & Mega Thomas show',
|
||||
'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
|
||||
'upload_date': '20090227',
|
||||
'duration': 2400,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
|
||||
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1169289',
|
||||
'ext': 'm4v',
|
||||
'title': 'Tegenlicht',
|
||||
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||
'upload_date': '20130225',
|
||||
'duration': 3000,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
return self._get_info(video_id)
|
||||
|
||||
def _get_info(self, video_id):
|
||||
metadata = self._download_json(
|
||||
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
||||
video_id,
|
||||
@ -43,19 +73,28 @@ class NPOIE(InfoExtractor):
|
||||
token = self._search_regex(r'npoplayer\.token = "(.+?)"', token_page, 'token')
|
||||
|
||||
formats = []
|
||||
quality = qualities(['adaptive', 'h264_sb', 'h264_bb', 'h264_std'])
|
||||
quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std'])
|
||||
for format_id in metadata['pubopties']:
|
||||
streams_info = self._download_json(
|
||||
format_info = self._download_json(
|
||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' % (video_id, format_id, token),
|
||||
video_id, 'Downloading %s streams info' % format_id)
|
||||
stream_info = self._download_json(
|
||||
streams_info['streams'][0] + '&type=json',
|
||||
video_id, 'Downloading %s stream info' % format_id)
|
||||
video_id, 'Downloading %s JSON' % format_id)
|
||||
if format_info.get('error_code', 0) or format_info.get('errorcode', 0):
|
||||
continue
|
||||
streams = format_info.get('streams')
|
||||
if streams:
|
||||
video_info = self._download_json(
|
||||
streams[0] + '&type=json',
|
||||
video_id, 'Downloading %s stream JSON' % format_id)
|
||||
else:
|
||||
video_info = format_info
|
||||
video_url = video_info.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
if format_id == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(stream_info['url'], video_id))
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': stream_info['url'],
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
@ -65,7 +104,35 @@ class NPOIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': metadata['titel'],
|
||||
'description': metadata['info'],
|
||||
'thumbnail': metadata['images'][-1]['url'],
|
||||
'upload_date': unified_strdate(metadata['gidsdatum']),
|
||||
'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'],
|
||||
'upload_date': unified_strdate(metadata.get('gidsdatum')),
|
||||
'duration': parse_duration(metadata.get('tijdsduur')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TegenlichtVproIE(NPOIE):
|
||||
IE_NAME = 'tegenlicht.vpro.nl'
|
||||
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
|
||||
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1169289',
|
||||
'ext': 'm4v',
|
||||
'title': 'Tegenlicht',
|
||||
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||
'upload_date': '20130225',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
urn = self._html_search_meta('mediaurn', webpage)
|
||||
info_page = self._download_json(
|
||||
'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
|
||||
return self._get_info(info_page['mid'])
|
||||
|
47
youtube_dl/extractor/oktoberfesttv.py
Normal file
47
youtube_dl/extractor/oktoberfesttv.py
Normal file
@ -0,0 +1,47 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class OktoberfestTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt',
|
||||
'info_dict': {
|
||||
'id': 'hb-zelt',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._live_title(self._html_search_regex(
|
||||
r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title'))
|
||||
|
||||
clip = self._search_regex(
|
||||
r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip')
|
||||
ncurl = self._search_regex(
|
||||
r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base')
|
||||
video_url = ncurl + clip
|
||||
thumbnail = self._search_regex(
|
||||
r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage,
|
||||
'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -4,6 +4,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
|
||||
@ -11,10 +12,10 @@ from ..utils import (
|
||||
class PBSIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
# Direct video URL
|
||||
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||
# Article with embedded player
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||
# Direct video URL
|
||||
video\.pbs\.org/(?:viralplayer|video)/(?P<id>[0-9]+)/? |
|
||||
# Article with embedded player (or direct video)
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||
# Player
|
||||
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
||||
)
|
||||
@ -65,10 +66,25 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html',
|
||||
'md5': '908f3e5473a693b266b84e25e1cf9703',
|
||||
'info_dict': {
|
||||
'id': '2365160389',
|
||||
'display_id': 'killer-typhoon',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:c741d14e979fc53228c575894094f157',
|
||||
'title': 'Killer Typhoon',
|
||||
'duration': 3172,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140122',
|
||||
}
|
||||
}
|
||||
|
||||
]
|
||||
|
||||
def _extract_ids(self, url):
|
||||
def _extract_webpage(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
@ -76,15 +92,20 @@ class PBSIE(InfoExtractor):
|
||||
if presumptive_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<input type="hidden" id="air_date_[0-9]+" value="([^"]+)"',
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
MEDIA_ID_REGEXES = [
|
||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
r'class="coveplayerid">([^<]+)<', # coveplayer
|
||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||
]
|
||||
|
||||
media_id = self._search_regex(
|
||||
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
|
||||
if media_id:
|
||||
return media_id, presumptive_id
|
||||
return media_id, presumptive_id, upload_date
|
||||
|
||||
url = self._search_regex(
|
||||
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
@ -104,10 +125,10 @@ class PBSIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id
|
||||
return video_id, display_id, None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._extract_ids(url)
|
||||
video_id, display_id, upload_date = self._extract_webpage(url)
|
||||
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info = self._download_json(info_url, display_id)
|
||||
@ -119,6 +140,7 @@ class PBSIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': info['title'],
|
||||
'url': info['alternate_encoding']['url'],
|
||||
'ext': 'mp4',
|
||||
@ -126,4 +148,5 @@ class PBSIE(InfoExtractor):
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
'age_limit': age_limit,
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
55
youtube_dl/extractor/played.py
Normal file
55
youtube_dl/extractor/played.py
Normal file
@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import os.path
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class PlayedIE(InfoExtractor):
|
||||
IE_NAME = 'played.to'
|
||||
_VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://played.to/j2f2sfiiukgt',
|
||||
'md5': 'c2bd75a368e82980e7257bf500c00637',
|
||||
'info_dict': {
|
||||
'id': 'j2f2sfiiukgt',
|
||||
'ext': 'flv',
|
||||
'title': 'youtube-dl_test_video.mp4',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
fields = re.findall(
|
||||
r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
||||
self._sleep(2, video_id)
|
||||
|
||||
post = compat_urllib_parse.urlencode(data)
|
||||
headers = {
|
||||
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||
}
|
||||
req = compat_urllib_request.Request(url, post, headers)
|
||||
webpage = self._download_webpage(
|
||||
req, video_id, note='Downloading video page ...')
|
||||
|
||||
title = os.path.splitext(data['fname'])[0]
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'file: "?(.+?)",', webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
}
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
@ -29,6 +30,7 @@ class PlayFMIE(InfoExtractor):
|
||||
'duration': 5627.428,
|
||||
'upload_date': '20140712',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
@ -51,7 +53,8 @@ class PlayFMIE(InfoExtractor):
|
||||
|
||||
recording = rec_doc.find('./recording')
|
||||
title = recording.find('./title').text
|
||||
view_count = int_or_none(recording.find('./stats/playcount').text)
|
||||
view_count = str_to_int(recording.find('./stats/playcount').text)
|
||||
comment_count = str_to_int(recording.find('./stats/comments').text)
|
||||
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
||||
thumbnail = recording.find('./image').text
|
||||
|
||||
@ -75,6 +78,7 @@ class PlayFMIE(InfoExtractor):
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
|
@ -144,7 +144,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'id': '2156342',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kurztrips zum Valentinstag',
|
||||
'description': 'md5:8ba6301e70351ae0bedf8da00f7ba528',
|
||||
'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.',
|
||||
'duration': 307.24,
|
||||
},
|
||||
'params': {
|
||||
@ -180,12 +180,10 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, page, 'clip id')
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
|
||||
|
||||
access_token = 'testclient'
|
||||
client_name = 'kolibri-1.2.5'
|
||||
@ -234,12 +232,12 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
|
||||
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
||||
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, page, 'title')
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEXES, page, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(page)
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, webpage, 'title')
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, page, 'upload date', default=None))
|
||||
self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
|
||||
|
||||
formats = []
|
||||
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@ -21,12 +21,16 @@ class SBSIE(InfoExtractor):
|
||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||
'info_dict': {
|
||||
'id': '320403011771',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dingo Conservation',
|
||||
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'add_ies': ['generic'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -31,7 +31,8 @@ class SoundcloudIE(InfoExtractor):
|
||||
(?!sets/|likes/?(?:$|[?#]))
|
||||
(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
|
||||
|(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
|
||||
(?:/?\?secret_token=(?P<secret_token>[^&]+?))?$)
|
||||
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
|
||||
)
|
||||
'''
|
||||
@ -80,6 +81,20 @@ class SoundcloudIE(InfoExtractor):
|
||||
'duration': 9,
|
||||
},
|
||||
},
|
||||
# private link (alt format)
|
||||
{
|
||||
'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
|
||||
'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
|
||||
'info_dict': {
|
||||
'id': '123998367',
|
||||
'ext': 'mp3',
|
||||
'title': 'Youtube - Dl Test Video \'\' Ä↭',
|
||||
'uploader': 'jaimeMF',
|
||||
'description': 'test chars: \"\'/\\ä↭',
|
||||
'upload_date': '20131209',
|
||||
'duration': 9,
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
{
|
||||
'url': 'https://soundcloud.com/oddsamples/bus-brakes',
|
||||
@ -197,6 +212,9 @@ class SoundcloudIE(InfoExtractor):
|
||||
if track_id is not None:
|
||||
info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
|
||||
full_title = track_id
|
||||
token = mobj.group('secret_token')
|
||||
if token:
|
||||
info_json_url += "&secret_token=" + token
|
||||
elif mobj.group('player'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
return self.url_result(query['url'][0])
|
||||
@ -220,7 +238,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
|
||||
IE_NAME = 'soundcloud:set'
|
||||
_TESTS = [{
|
||||
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
|
||||
@ -234,14 +252,19 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
uploader = mobj.group('uploader')
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
slug_title = mobj.group('slug_title')
|
||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
|
||||
token = mobj.group('token')
|
||||
if token:
|
||||
full_title += '/' + token
|
||||
url += '/' + token
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
resolv_url = self._resolv_url(url)
|
||||
info = self._download_json(resolv_url, full_title)
|
||||
|
||||
@ -252,7 +275,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [self._extract_info_dict(track) for track in info['tracks']],
|
||||
'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']],
|
||||
'id': info['id'],
|
||||
'title': info['title'],
|
||||
}
|
||||
@ -315,34 +338,38 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [
|
||||
|
||||
{
|
||||
'url': 'http://api.soundcloud.com/playlists/4110309',
|
||||
'info_dict': {
|
||||
'id': '4110309',
|
||||
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
'description': 're:.*?TILT Brass - Bowery Poetry Club',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://api.soundcloud.com/playlists/4110309',
|
||||
'info_dict': {
|
||||
'id': '4110309',
|
||||
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
'description': 're:.*?TILT Brass - Bowery Poetry Club',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
|
||||
|
||||
data = compat_urllib_parse.urlencode({
|
||||
data_dict = {
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
}
|
||||
token = mobj.group('token')
|
||||
|
||||
if token:
|
||||
data_dict['secret_token'] = token
|
||||
|
||||
data = compat_urllib_parse.urlencode(data_dict)
|
||||
data = self._download_json(
|
||||
base_url + data, playlist_id, 'Downloading playlist')
|
||||
|
||||
entries = [
|
||||
self._extract_info_dict(t, quiet=True) for t in data['tracks']]
|
||||
self._extract_info_dict(t, quiet=True, secret_token=token)
|
||||
for t in data['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@ -9,7 +9,6 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
int_or_none,
|
||||
)
|
||||
from ..aes import aes_decrypt_text
|
||||
|
||||
@ -40,31 +39,42 @@ class SpankwireIE(InfoExtractor):
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
||||
title = self._html_search_regex(
|
||||
r'<h1>([^<]+)', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
|
||||
r'<div\s+id="descriptionContent">([^<]+)<',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
||||
r'by:\s*<a [^>]*>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
|
||||
upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = str_to_int(view_count)
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
|
||||
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'</a> on (.+?) at \d+:\d+',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'Comments<span[^>]+>\s*\(([\d,\.]+)\)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
video_urls = list(map(
|
||||
compat_urllib_parse.unquote,
|
||||
re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage)))
|
||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||
password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ')
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
password = self._html_search_regex(
|
||||
r'flashvars\.video_title = "([^"]+)',
|
||||
webpage, 'password').replace('+', ' ')
|
||||
video_urls = list(map(
|
||||
lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'),
|
||||
video_urls))
|
||||
|
||||
formats = []
|
||||
for video_url in video_urls:
|
||||
|
92
youtube_dl/extractor/sport5.py
Normal file
92
youtube_dl/extractor/sport5.py
Normal file
@ -0,0 +1,92 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class Sport5IE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
|
||||
'info_dict': {
|
||||
'id': 's5-Y59xx1-GUh2',
|
||||
'ext': 'mp4',
|
||||
'title': 'ולנסיה-קורדובה 0:3',
|
||||
'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
|
||||
'duration': 228,
|
||||
'categories': list,
|
||||
},
|
||||
'skip': 'Blocked outside of Israel',
|
||||
}, {
|
||||
'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
|
||||
'info_dict': {
|
||||
'id': 's5-SiXxx1-hKh2',
|
||||
'ext': 'mp4',
|
||||
'title': 'GOALS_CELTIC_270914.mp4',
|
||||
'description': '',
|
||||
'duration': 87,
|
||||
'categories': list,
|
||||
},
|
||||
'skip': 'Blocked outside of Israel',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
media_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id')
|
||||
|
||||
metadata = self._download_xml(
|
||||
'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id,
|
||||
video_id)
|
||||
|
||||
error = metadata.find('./Error')
|
||||
if error is not None:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (
|
||||
self.IE_NAME,
|
||||
error.find('./Name').text,
|
||||
error.find('./Description').text),
|
||||
expected=True)
|
||||
|
||||
title = metadata.find('./Title').text
|
||||
description = metadata.find('./Description').text
|
||||
duration = int(metadata.find('./Duration').text)
|
||||
|
||||
posters_el = metadata.find('./PosterLinks')
|
||||
thumbnails = [{
|
||||
'url': thumbnail.text,
|
||||
'width': int(thumbnail.get('width')),
|
||||
'height': int(thumbnail.get('height')),
|
||||
} for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
|
||||
|
||||
categories_el = metadata.find('./Categories')
|
||||
categories = [
|
||||
cat.get('name') for cat in categories_el.findall('./Category')
|
||||
] if categories_el is not None else []
|
||||
|
||||
formats = [{
|
||||
'url': fmt.text,
|
||||
'ext': 'mp4',
|
||||
'vbr': int(fmt.get('bitrate')),
|
||||
'width': int(fmt.get('width')),
|
||||
'height': int(fmt.get('height')),
|
||||
} for fmt in metadata.findall('./PlaybackLinks/FileURL')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
@ -17,11 +17,11 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
||||
'ext': 'mp4',
|
||||
'title': 'LIVE: Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
|
||||
'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
|
||||
'categories': ['Badminton'],
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 're:^Die Badminton-WM 2014 aus Kopenhagen LIVE',
|
||||
'description': 're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
|
||||
'timestamp': int,
|
||||
'upload_date': 're:^201408[23][0-9]$',
|
||||
},
|
||||
|
@ -39,10 +39,10 @@ class SunPornoIE(InfoExtractor):
|
||||
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<span>Duration: (\d+:\d+)</span>', webpage, 'duration', fatal=False))
|
||||
r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<span class="views">(\d+)</span>', webpage, 'view count', fatal=False))
|
||||
r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
|
||||
comment_count = int_or_none(self._html_search_regex(
|
||||
r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
|
||||
|
||||
|
104
youtube_dl/extractor/tapely.py
Normal file
104
youtube_dl/extractor/tapely.py
Normal file
@ -0,0 +1,104 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_urllib_request,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TapelyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tape\.ly/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
|
||||
_API_URL = 'http://tape.ly/showtape?id={0:}'
|
||||
_S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
|
||||
_SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tape.ly/my-grief-as-told-by-water',
|
||||
'info_dict': {
|
||||
'id': 23952,
|
||||
'title': 'my grief as told by water',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'uploader_id': 16484,
|
||||
'timestamp': 1411848286,
|
||||
'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
},
|
||||
{
|
||||
'url': 'http://tape.ly/my-grief-as-told-by-water/1',
|
||||
'md5': '79031f459fdec6530663b854cbc5715c',
|
||||
'info_dict': {
|
||||
'id': 258464,
|
||||
'title': 'Dreaming Awake (My Brightest Diamond)',
|
||||
'ext': 'm4a',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
playlist_url = self._API_URL.format(display_id)
|
||||
request = compat_urllib_request.Request(playlist_url)
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
request.add_header('Accept', 'application/json')
|
||||
|
||||
playlist = self._download_json(request, display_id)
|
||||
|
||||
tape = playlist['tape']
|
||||
|
||||
entries = []
|
||||
for s in tape['songs']:
|
||||
song = s['song']
|
||||
entry = {
|
||||
'id': song['id'],
|
||||
'duration': float_or_none(song.get('songduration'), 1000),
|
||||
'title': song['title'],
|
||||
}
|
||||
if song['source'] == 'S3':
|
||||
entry.update({
|
||||
'url': self._S3_SONG_URL.format(song['filename']),
|
||||
})
|
||||
entries.append(entry)
|
||||
elif song['source'] == 'YT':
|
||||
self.to_screen('YouTube video detected')
|
||||
yt_id = song['filename'].replace('/youtube/', '')
|
||||
entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
|
||||
entries.append(entry)
|
||||
elif song['source'] == 'SC':
|
||||
self.to_screen('SoundCloud song detected')
|
||||
sc_url = self._SOUNDCLOUD_SONG_URL.format(song['filename'])
|
||||
entry.update(self.url_result(sc_url, 'Soundcloud'))
|
||||
entries.append(entry)
|
||||
else:
|
||||
self.report_warning('Unknown song source: %s' % song['source'])
|
||||
|
||||
if mobj.group('songnr'):
|
||||
songnr = int(mobj.group('songnr')) - 1
|
||||
try:
|
||||
return entries[songnr]
|
||||
except IndexError:
|
||||
raise ExtractorError(
|
||||
'No song with index: %s' % mobj.group('songnr'),
|
||||
expected=True)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': tape['id'],
|
||||
'display_id': display_id,
|
||||
'title': tape['name'],
|
||||
'entries': entries,
|
||||
'thumbnail': tape.get('image_url'),
|
||||
'description': clean_html(tape.get('subtext')),
|
||||
'like_count': tape.get('likescount'),
|
||||
'uploader_id': tape.get('user_id'),
|
||||
'timestamp': parse_iso8601(tape.get('published_at')),
|
||||
}
|
@ -149,7 +149,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
thumbnail = 'http://' + thumbnail
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': talk_info['title'],
|
||||
'title': talk_info['title'].strip(),
|
||||
'uploader': talk_info['speaker'],
|
||||
'thumbnail': thumbnail,
|
||||
'description': self._og_search_description(webpage),
|
||||
|
@ -5,6 +5,7 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
xpath_with_ns,
|
||||
)
|
||||
@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):
|
||||
body = meta.find(_x('smil:body'))
|
||||
|
||||
f4m_node = body.find(_x('smil:seq//smil:video'))
|
||||
if f4m_node is not None:
|
||||
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
|
||||
f4m_url = f4m_node.attrib['src']
|
||||
if 'manifest.f4m?' not in f4m_url:
|
||||
f4m_url += '?'
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
|
||||
formats = [{
|
||||
'ext': 'flv',
|
||||
'url': f4m_url,
|
||||
}]
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
else:
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
switch = body.find(_x('smil:switch'))
|
||||
formats = []
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int(attr['width'])
|
||||
height = int(attr['height'])
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
switch = body.find(_x('smil:switch'))
|
||||
if switch is not None:
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int(attr['width'])
|
||||
height = int(attr['height'])
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
switch = body.find(_x('smil:seq//smil:switch'))
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
formats.append({
|
||||
'format_id': compat_str(vbr),
|
||||
'url': attr['src'],
|
||||
'vbr': vbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
84
youtube_dl/extractor/thvideo.py
Normal file
84
youtube_dl/extractor/thvideo.py
Normal file
@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate
|
||||
)
|
||||
|
||||
|
||||
class THVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://thvideo.tv/v/th1987/',
|
||||
'md5': 'fa107b1f73817e325e9433505a70db50',
|
||||
'info_dict': {
|
||||
'id': '1987',
|
||||
'ext': 'mp4',
|
||||
'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览',
|
||||
'display_id': 'th1987',
|
||||
'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg',
|
||||
'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...',
|
||||
'upload_date': '20140722'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# extract download link from mobile player page
|
||||
webpage_player = self._download_webpage(
|
||||
'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id),
|
||||
video_id, note='Downloading video source page')
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.*?)" type', webpage_player, 'video url')
|
||||
|
||||
# extract video info from main page
|
||||
webpage = self._download_webpage(
|
||||
'http://thvideo.tv/v/th%s' % (video_id), video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
display_id = 'th%s' % video_id
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'span itemprop="datePublished" content="(.*?)">', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date
|
||||
}
|
||||
|
||||
|
||||
class THVideoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://thvideo.tv/mylist2',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': '幻想万華鏡',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
list_title = self._html_search_regex(
|
||||
r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://thvideo.tv/v/th' + id, 'THVideo')
|
||||
for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)]
|
||||
|
||||
return self.playlist_result(entries, playlist_id, list_title)
|
@ -14,27 +14,35 @@ from ..aes import aes_decrypt_text
|
||||
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
'md5': '44bf12b98313827dd52d35b8706a4ea0',
|
||||
'info_dict': {
|
||||
'id': '229795',
|
||||
'ext': 'mp4',
|
||||
'description': 'hot teen Kasia grinding',
|
||||
'uploader': 'unknown',
|
||||
'title': 'Kasia music video',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
'md5': '44bf12b98313827dd52d35b8706a4ea0',
|
||||
'info_dict': {
|
||||
'id': '229795',
|
||||
'display_id': 'kasia-music-video',
|
||||
'ext': 'mp4',
|
||||
'description': 'hot teen Kasia grinding',
|
||||
'uploader': 'unknown',
|
||||
'title': 'Kasia music video',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
||||
flashvars = json.loads(self._html_search_regex(
|
||||
r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
|
||||
@ -70,6 +78,7 @@ class Tube8IE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
|
67
youtube_dl/extractor/turbo.py
Normal file
67
youtube_dl/extractor/turbo.py
Normal file
@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class TurboIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
|
||||
_API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
|
||||
_TEST = {
|
||||
'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
|
||||
'md5': '33f4b91099b36b5d5a91f84b5bcba600',
|
||||
'info_dict': {
|
||||
'id': '454443',
|
||||
'ext': 'mp4',
|
||||
'duration': 3715,
|
||||
'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist = self._download_xml(self._API_URL.format(video_id), video_id)
|
||||
item = playlist.find('./channel/item')
|
||||
if item is None:
|
||||
raise ExtractorError('Playlist item was not found', expected=True)
|
||||
|
||||
title = xpath_text(item, './title', 'title')
|
||||
duration = int_or_none(xpath_text(item, './durate', 'duration'))
|
||||
thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['3g', 'sd', 'hq'])
|
||||
for child in item:
|
||||
m = re.search(r'url_video_(?P<quality>.+)', child.tag)
|
||||
if m:
|
||||
quality = m.group('quality')
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': child.text,
|
||||
'quality': get_quality(quality),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
}
|
@ -17,16 +17,16 @@ class TvigleIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/video/brat-2/',
|
||||
'md5': '72cb7eab33e54314e1790da402d3c9c3',
|
||||
'url': 'http://www.tvigle.ru/video/brat/',
|
||||
'md5': 'ff4344a4894b0524441fb6f8218dc716',
|
||||
'info_dict': {
|
||||
'id': '5119390',
|
||||
'display_id': 'brat-2',
|
||||
'id': '5118490',
|
||||
'display_id': 'brat',
|
||||
'ext': 'mp4',
|
||||
'title': 'Брат 2 ',
|
||||
'description': 'md5:5751f4fe345a58e1692585c361294bd8',
|
||||
'duration': 7356.369,
|
||||
'age_limit': 0,
|
||||
'title': 'Брат',
|
||||
'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb',
|
||||
'duration': 5722.6,
|
||||
'age_limit': 16,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -71,6 +71,7 @@ class TvigleIE(InfoExtractor):
|
||||
'format_id': '%s-%s' % (vcodec, quality),
|
||||
'vcodec': vcodec,
|
||||
'height': int(quality[:-1]),
|
||||
'filesize': item['video_files_size'][vcodec][quality],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@ -19,7 +19,7 @@ class Vbox7IE(InfoExtractor):
|
||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||
'info_dict': {
|
||||
'id': '249bb972c2',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||
},
|
||||
}
|
||||
@ -50,7 +50,6 @@ class Vbox7IE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||
"md5": "95ee28ee45e70130e3ab02b0f579ae23",
|
||||
'info_dict': {
|
||||
'id': 'GB1101300280',
|
||||
'ext': 'mp4',
|
||||
@ -40,7 +40,7 @@ class VevoIE(InfoExtractor):
|
||||
}, {
|
||||
'note': 'v3 SMIL format',
|
||||
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||
'md5': '893ec0e0d4426a1d96c01de8f2bdff58',
|
||||
'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
|
||||
'info_dict': {
|
||||
'id': 'USUV71302923',
|
||||
'ext': 'mp4',
|
||||
@ -69,6 +69,21 @@ class VevoIE(InfoExtractor):
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
req = compat_urllib_request.Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token',
|
||||
fatal=False)
|
||||
if webpage is False:
|
||||
self._oauth_token = None
|
||||
else:
|
||||
self._oauth_token = self._search_regex(
|
||||
r'access_token":\s*"([^"]+)"',
|
||||
webpage, 'access token', fatal=False)
|
||||
|
||||
def _formats_from_json(self, video_info):
|
||||
last_version = {'version': -1}
|
||||
for version in video_info['videoVersions']:
|
||||
@ -129,6 +144,26 @@ class VevoIE(InfoExtractor):
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_api_formats(self, video_id):
|
||||
if not self._oauth_token:
|
||||
self._downloader.report_warning(
|
||||
'No oauth token available, skipping API HLS download')
|
||||
return []
|
||||
|
||||
api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
|
||||
video_id, self._oauth_token)
|
||||
api_data = self._download_json(
|
||||
api_url, video_id,
|
||||
note='Downloading HLS formats',
|
||||
errnote='Failed to download HLS format list', fatal=False)
|
||||
if api_data is None:
|
||||
return []
|
||||
|
||||
m3u8_url = api_data[0]['url']
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||
preference=0)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
@ -152,30 +187,8 @@ class VevoIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
# Download SMIL
|
||||
smil_blocks = sorted((
|
||||
f for f in video_info['videoVersions']
|
||||
if f['sourceType'] == 13),
|
||||
key=lambda f: f['version'])
|
||||
|
||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||
if smil_blocks:
|
||||
smil_url_m = self._search_regex(
|
||||
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
||||
fatal=False)
|
||||
if smil_url_m is not None:
|
||||
smil_url = smil_url_m
|
||||
|
||||
try:
|
||||
smil_xml = self._download_webpage(smil_url, video_id,
|
||||
'Downloading SMIL info')
|
||||
formats.extend(self._formats_from_smil(smil_xml))
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError):
|
||||
raise
|
||||
self._downloader.report_warning(
|
||||
'Cannot download SMIL information, falling back to JSON ..')
|
||||
# Download via HLS API
|
||||
formats.extend(self._download_api_formats(video_id))
|
||||
|
||||
self._sort_formats(formats)
|
||||
timestamp_ms = int(self._search_regex(
|
||||
|
@ -31,7 +31,7 @@ class VGTVIE(InfoExtractor):
|
||||
'url': 'http://www.vgtv.no/#!/live/100764/opptak-vgtv-foelger-em-kvalifiseringen',
|
||||
'info_dict': {
|
||||
'id': '100764',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'OPPTAK: VGTV følger EM-kvalifiseringen',
|
||||
'description': 'md5:3772d9c0dc2dff92a886b60039a7d4d3',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
@ -50,7 +50,7 @@ class VGTVIE(InfoExtractor):
|
||||
'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen',
|
||||
'info_dict': {
|
||||
'id': '100015',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!',
|
||||
'description': 'md5:9a60cc23fa349f761628924e56eeec2d',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
|
57
youtube_dl/extractor/videomega.py
Normal file
57
youtube_dl/extractor/videomega.py
Normal file
@ -0,0 +1,57 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class VideoMegaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.)?videomega\.tv/
|
||||
(?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://videomega.tv/?ref=GKeGPVedBe',
|
||||
'md5': '240fb5bcf9199961f48eb17839b084d6',
|
||||
'info_dict': {
|
||||
'id': 'GKeGPVedBe',
|
||||
'ext': 'mp4',
|
||||
'title': 'XXL - All Sports United',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
escaped_data = self._search_regex(
|
||||
r'unescape\("([^"]+)"\)', webpage, 'escaped data')
|
||||
playlist = compat_urllib_parse.unquote(escaped_data)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
|
||||
url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -8,17 +8,19 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
clean_html,
|
||||
get_element_by_attribute,
|
||||
compat_urlparse,
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
RegexNotFoundError,
|
||||
std_headers,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -89,6 +91,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'uploader_id': 'openstreetmapus',
|
||||
'uploader': 'OpenStreetMap US',
|
||||
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
'description': 'md5:380943ec71b89736ff4bf27183233d09',
|
||||
'duration': 1595,
|
||||
},
|
||||
},
|
||||
@ -103,6 +106,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'uploader': 'The BLN & Business of Software',
|
||||
'uploader_id': 'theblnbusinessofsoftware',
|
||||
'duration': 3610,
|
||||
'description': None,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -117,6 +121,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': 'youtube-dl',
|
||||
@ -203,6 +208,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# Extract ID from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
orig_url = url
|
||||
if mobj.group('pro') or mobj.group('player'):
|
||||
url = 'http://player.vimeo.com/video/' + video_id
|
||||
|
||||
@ -273,18 +279,23 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||
|
||||
# Extract video description
|
||||
video_description = None
|
||||
try:
|
||||
video_description = get_element_by_attribute("class", "description_wrapper", webpage)
|
||||
if video_description:
|
||||
video_description = clean_html(video_description)
|
||||
except AssertionError as err:
|
||||
# On some pages like (http://player.vimeo.com/video/54469442) the
|
||||
# html tags are not closed, python 2.6 cannot handle it
|
||||
if err.args[0] == 'we should not get here!':
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
video_description = self._html_search_regex(
|
||||
r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
|
||||
webpage, 'description', default=None)
|
||||
if not video_description:
|
||||
video_description = self._html_search_meta(
|
||||
'description', webpage, default=None)
|
||||
if not video_description and mobj.group('pro'):
|
||||
orig_webpage = self._download_webpage(
|
||||
orig_url, video_id,
|
||||
note='Downloading webpage for description',
|
||||
fatal=False)
|
||||
if orig_webpage:
|
||||
video_description = self._html_search_meta(
|
||||
'description', orig_webpage, default=None)
|
||||
if not video_description and not mobj.group('player'):
|
||||
self._downloader.report_warning('Cannot find video description')
|
||||
|
||||
# Extract video duration
|
||||
video_duration = int_or_none(config["video"].get("duration"))
|
||||
@ -529,3 +540,58 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
||||
|
||||
|
||||
class VimeoLikesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
|
||||
IE_NAME = 'vimeo:likes'
|
||||
IE_DESC = 'Vimeo user likes'
|
||||
_TEST = {
|
||||
'url': 'https://vimeo.com/user755559/likes/',
|
||||
'playlist_mincount': 293,
|
||||
"info_dict": {
|
||||
"description": "See all the videos urza likes",
|
||||
"title": 'Videos urza likes',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
page_count = self._int(
|
||||
self._search_regex(
|
||||
r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
|
||||
.*?</a></li>\s*<li\s+class="pagination_next">
|
||||
''', webpage, 'page count'),
|
||||
'page count', fatal=True)
|
||||
PAGE_SIZE = 12
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
def _get_page(idx):
|
||||
page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
|
||||
self.http_scheme(), user_id, idx + 1)
|
||||
webpage = self._download_webpage(
|
||||
page_url, user_id,
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count))
|
||||
video_list = self._search_regex(
|
||||
r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>',
|
||||
webpage, 'video content')
|
||||
paths = re.findall(
|
||||
r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list)
|
||||
for path in paths:
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': compat_urlparse.urljoin(page_url, path),
|
||||
}
|
||||
|
||||
pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': 'user%s_likes' % user_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': pl,
|
||||
}
|
||||
|
@ -11,22 +11,48 @@ from ..utils import (
|
||||
|
||||
class VpornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
|
||||
'md5': 'facf37c1b86546fa0208058546842c55',
|
||||
'info_dict': {
|
||||
'id': '497944',
|
||||
'display_id': 'violet-on-her-th-birthday',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violet on her 19th birthday',
|
||||
'description': 'Violet dances in front of the camera which is sure to get you horny.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'kileyGrope',
|
||||
'categories': ['Masturbation', 'Teen'],
|
||||
'duration': 393,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
|
||||
'md5': 'facf37c1b86546fa0208058546842c55',
|
||||
'info_dict': {
|
||||
'id': '497944',
|
||||
'display_id': 'violet-on-her-th-birthday',
|
||||
'ext': 'mp4',
|
||||
'title': 'Violet on her 19th birthday',
|
||||
'description': 'Violet dances in front of the camera which is sure to get you horny.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'kileyGrope',
|
||||
'categories': ['Masturbation', 'Teen'],
|
||||
'duration': 393,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vporn.com/female/hana-shower/523564/',
|
||||
'md5': 'ced35a4656198a1664cf2cda1575a25f',
|
||||
'info_dict': {
|
||||
'id': '523564',
|
||||
'display_id': 'hana-shower',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hana Shower',
|
||||
'description': 'Hana showers at the bathroom.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'Hmmmmm',
|
||||
'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
|
||||
'duration': 588,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -64,7 +90,7 @@ class VpornIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
|
||||
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
|
||||
for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
|
||||
video_url = video[1]
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -16,6 +17,24 @@ class VubeIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s',
|
||||
'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42',
|
||||
'info_dict': {
|
||||
'id': 'Y8NUZ69Tf7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Best Drummer Ever [HD]',
|
||||
'description': 'md5:2d63c4b277b85c2277761c2cf7337d71',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'uploader': 'William',
|
||||
'timestamp': 1406876915,
|
||||
'upload_date': '20140801',
|
||||
'duration': 258.051,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||
'info_dict': {
|
||||
@ -32,7 +51,8 @@ class VubeIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
|
||||
}
|
||||
},
|
||||
'skip': 'Removed due to DMCA',
|
||||
},
|
||||
{
|
||||
'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
|
||||
@ -51,7 +71,8 @@ class VubeIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['seraina', 'jessica', 'krewella', 'alive'],
|
||||
}
|
||||
},
|
||||
'skip': 'Removed due to DMCA',
|
||||
}, {
|
||||
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
||||
'md5': '0584fc13b50f887127d9d1007589d27f',
|
||||
@ -69,7 +90,8 @@ class VubeIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
|
||||
}
|
||||
},
|
||||
'skip': 'Removed due to DMCA',
|
||||
}
|
||||
]
|
||||
|
||||
@ -102,6 +124,11 @@ class VubeIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not formats and video.get('vst') == 'dmca':
|
||||
raise ExtractorError(
|
||||
'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
|
||||
expected=True)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
|
||||
|
@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
qualities,
|
||||
)
|
||||
@ -14,13 +15,12 @@ class VuClipIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434',
|
||||
'md5': '92ac9d1ccefec4f0bb474661ab144fcf',
|
||||
'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html',
|
||||
'info_dict': {
|
||||
'id': '843902317',
|
||||
'id': '922692425',
|
||||
'ext': '3gp',
|
||||
'title': 'Movie Trailer: Noah',
|
||||
'duration': 139,
|
||||
'title': 'The Toy Soldiers - Hollywood Movie Trailer',
|
||||
'duration': 180,
|
||||
}
|
||||
}
|
||||
|
||||
@ -37,16 +37,32 @@ class VuClipIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
adfree_url, video_id, note='Download post-ad page')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r'<p class="message">(.*?)</p>', webpage, 'error message',
|
||||
default=None)
|
||||
if error_msg:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error_msg), expected=True)
|
||||
|
||||
# These clowns alternate between two page types
|
||||
links_code = self._search_regex(
|
||||
r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage,
|
||||
'links')
|
||||
r'''(?xs)
|
||||
(?:
|
||||
<img\s+src="/im/play.gif".*?>|
|
||||
<!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
|
||||
)
|
||||
(.*?)
|
||||
(?:
|
||||
<a\s+href="fblike|<div\s+class="social">
|
||||
)
|
||||
''', webpage, 'links')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
|
||||
|
||||
quality_order = qualities(['Reg', 'Hi'])
|
||||
formats = []
|
||||
for url, q in re.findall(
|
||||
r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code):
|
||||
r'<a\s+href="(?P<url>[^"]+)".*?>(?:<button[^>]*>)?(?P<q>[^<]+)(?:</button>)?</a>', links_code):
|
||||
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@ -56,7 +72,7 @@ class VuClipIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False))
|
||||
r'\(([0-9:]+)\)</span>', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -5,7 +5,10 @@ import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
@ -37,6 +40,7 @@ class WatIE(InfoExtractor):
|
||||
'upload_date': '20140816',
|
||||
'duration': 2910,
|
||||
},
|
||||
'skip': "Ce contenu n'est pas disponible pour l'instant.",
|
||||
},
|
||||
]
|
||||
|
||||
@ -57,6 +61,11 @@ class WatIE(InfoExtractor):
|
||||
|
||||
video_info = self.download_video_info(real_id)
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
|
||||
geo_list = video_info.get('geoList')
|
||||
country = geo_list[0] if geo_list else ''
|
||||
|
||||
|
@ -1,13 +1,14 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, compat_urllib_request
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
|
||||
_API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
@ -24,11 +25,13 @@ class WistiaIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._html_search_regex(
|
||||
r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
|
||||
|
||||
data = json.loads(data_json)
|
||||
request = compat_urllib_request.Request(self._API_URL.format(video_id))
|
||||
request.add_header('Referer', url) # Some videos require this.
|
||||
data_json = self._download_json(request, video_id)
|
||||
if data_json.get('error'):
|
||||
raise ExtractorError('Error while getting the playlist',
|
||||
expected=True)
|
||||
data = data_json['media']
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
|
@ -13,37 +13,35 @@ class WorldStarHipHopIE(InfoExtractor):
|
||||
"info_dict": {
|
||||
"id": "wshh6a7q1ny0G34ZwuIO",
|
||||
"ext": "mp4",
|
||||
"title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||
"title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
webpage_src = self._download_webpage(url, video_id)
|
||||
|
||||
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
||||
webpage_src)
|
||||
m_vevo_id = re.search(r'videoId=(.*?)&?', webpage)
|
||||
if m_vevo_id is not None:
|
||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'so\.addVariable\("file","(.*?)"\)', webpage_src, 'video URL')
|
||||
r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL')
|
||||
|
||||
if 'youtube' in video_url:
|
||||
return self.url_result(video_url, ie='Youtube')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r"<title>(.*)</title>", webpage_src, 'title')
|
||||
r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
|
||||
webpage, 'title')
|
||||
|
||||
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
|
||||
thumbnail = self._html_search_regex(
|
||||
r'rel="image_src" href="(.*)" />', webpage_src, 'thumbnail',
|
||||
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
|
||||
fatal=False)
|
||||
if not thumbnail:
|
||||
_title = r"""candytitles.*>(.*)</span>"""
|
||||
mobj = re.search(_title, webpage_src)
|
||||
_title = r'candytitles.*>(.*)</span>'
|
||||
mobj = re.search(_title, webpage)
|
||||
if mobj is not None:
|
||||
video_title = mobj.group(1)
|
||||
|
||||
|
@ -37,16 +37,6 @@ class YahooIE(InfoExtractor):
|
||||
'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html',
|
||||
'md5': '410b7104aa9893b765bc22787a22f3d9',
|
||||
'info_dict': {
|
||||
'id': '516ed8e2-2c4f-339f-a211-7a8b49d30845',
|
||||
'ext': 'mp4',
|
||||
'title': 'The World Loves Spider-Man',
|
||||
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
|
||||
'md5': '60e8ac193d8fb71997caa8fce54c6460',
|
||||
|
52
youtube_dl/extractor/ynet.py
Normal file
52
youtube_dl/extractor/ynet.py
Normal file
@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse
|
||||
|
||||
|
||||
class YnetIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
|
||||
'md5': '4b29cb57c3dddd57642b3f051f535b07',
|
||||
'info_dict': {
|
||||
'id': 'L-11659-99244',
|
||||
'ext': 'flv',
|
||||
'title': 'איש לא יודע מאיפה באנו',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
|
||||
'md5': '8194c2ea221e9a639cac96b6b0753dc5',
|
||||
'info_dict': {
|
||||
'id': 'L-8859-84418',
|
||||
'ext': 'flv',
|
||||
'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage))
|
||||
config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
|
||||
f4m_url = config['clip']['url']
|
||||
title = self._og_search_title(webpage)
|
||||
m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
|
||||
if m:
|
||||
title = m.group('title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_f4m_formats(f4m_url, video_id),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
|
||||
import json
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import math
|
||||
import random
|
||||
import re
|
||||
@ -13,18 +14,25 @@ from ..utils import (
|
||||
|
||||
|
||||
class YoukuIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)'
|
||||
_TEST = {
|
||||
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
|
||||
u"file": u"XNDgyMDQ2NTQw_part00.flv",
|
||||
u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
|
||||
u"params": {u"test": False},
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
|
||||
youku:)
|
||||
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html',
|
||||
'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b',
|
||||
'params': {
|
||||
'test': False
|
||||
},
|
||||
'info_dict': {
|
||||
'id': 'XNDgyMDQ2NTQw_part00',
|
||||
'ext': 'flv',
|
||||
'title': 'youtube-dl test video "\'/\\ä↭𝕐'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _gen_sid(self):
|
||||
nowTime = int(time.time() * 1000)
|
||||
random1 = random.randint(1000,1998)
|
||||
@ -55,49 +63,42 @@ class YoukuIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('ID')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
|
||||
|
||||
jsondata = self._download_webpage(info_url, video_id)
|
||||
config = self._download_json(info_url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
try:
|
||||
config = json.loads(jsondata)
|
||||
error_code = config['data'][0].get('error_code')
|
||||
if error_code:
|
||||
# -8 means blocked outside China.
|
||||
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
||||
raise ExtractorError(error or u'Server reported error %i' % error_code,
|
||||
expected=True)
|
||||
error_code = config['data'][0].get('error_code')
|
||||
if error_code:
|
||||
# -8 means blocked outside China.
|
||||
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
||||
raise ExtractorError(error or 'Server reported error %i' % error_code,
|
||||
expected=True)
|
||||
|
||||
video_title = config['data'][0]['title']
|
||||
seed = config['data'][0]['seed']
|
||||
video_title = config['data'][0]['title']
|
||||
seed = config['data'][0]['seed']
|
||||
|
||||
format = self._downloader.params.get('format', None)
|
||||
supported_format = list(config['data'][0]['streamfileids'].keys())
|
||||
format = self._downloader.params.get('format', None)
|
||||
supported_format = list(config['data'][0]['streamfileids'].keys())
|
||||
|
||||
if format is None or format == 'best':
|
||||
if 'hd2' in supported_format:
|
||||
format = 'hd2'
|
||||
else:
|
||||
format = 'flv'
|
||||
ext = u'flv'
|
||||
elif format == 'worst':
|
||||
format = 'mp4'
|
||||
ext = u'mp4'
|
||||
# TODO proper format selection
|
||||
if format is None or format == 'best':
|
||||
if 'hd2' in supported_format:
|
||||
format = 'hd2'
|
||||
else:
|
||||
format = 'flv'
|
||||
ext = u'flv'
|
||||
ext = 'flv'
|
||||
elif format == 'worst':
|
||||
format = 'mp4'
|
||||
ext = 'mp4'
|
||||
else:
|
||||
format = 'flv'
|
||||
ext = 'flv'
|
||||
|
||||
|
||||
fileid = config['data'][0]['streamfileids'][format]
|
||||
keys = [s['k'] for s in config['data'][0]['segs'][format]]
|
||||
# segs is usually a dictionary, but an empty *list* if an error occured.
|
||||
except (UnicodeDecodeError, ValueError, KeyError):
|
||||
raise ExtractorError(u'Unable to extract info section')
|
||||
fileid = config['data'][0]['streamfileids'][format]
|
||||
keys = [s['k'] for s in config['data'][0]['segs'][format]]
|
||||
# segs is usually a dictionary, but an empty *list* if an error occured.
|
||||
|
||||
files_info=[]
|
||||
sid = self._gen_sid()
|
||||
@ -106,9 +107,8 @@ class YoukuIE(InfoExtractor):
|
||||
#column 8,9 of fileid represent the segment number
|
||||
#fileid[7:9] should be changed
|
||||
for index, key in enumerate(keys):
|
||||
|
||||
temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
|
||||
download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
|
||||
download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
|
||||
|
||||
info = {
|
||||
'id': '%s_part%02d' % (video_id, index),
|
||||
|
58
youtube_dl/extractor/yourupload.py
Normal file
58
youtube_dl/extractor/yourupload.py
Normal file
@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class YourUploadIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:yourupload\.com/watch|
|
||||
embed\.yourupload\.com|
|
||||
embed\.yucache\.net
|
||||
)/(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://yourupload.com/watch/14i14h',
|
||||
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
|
||||
'info_dict': {
|
||||
'id': '14i14h',
|
||||
'ext': 'mp4',
|
||||
'title': 'BigBuckBunny_320x180.mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.yourupload.com/14i14h',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.yucache.net/14i14h?client_file_id=803349',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://embed.yucache.net/{0:}'.format(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
url = self._og_search_video_url(webpage)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -26,7 +26,7 @@ from ..utils import (
|
||||
get_element_by_attribute,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
PagedList,
|
||||
OnDemandPagedList,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
orderedSet,
|
||||
@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _set_language(self):
|
||||
return bool(self._download_webpage(
|
||||
self._LANG_URL, None,
|
||||
note=u'Setting language', errnote='unable to set language',
|
||||
note='Setting language', errnote='unable to set language',
|
||||
fatal=False))
|
||||
|
||||
def _login(self):
|
||||
@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return True
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
note=u'Downloading login page',
|
||||
errnote=u'unable to fetch login page', fatal=False)
|
||||
note='Downloading login page',
|
||||
errnote='unable to fetch login page', fatal=False)
|
||||
if login_page is False:
|
||||
return
|
||||
|
||||
@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||
login_results = self._download_webpage(
|
||||
req, None,
|
||||
note=u'Logging in', errnote=u'unable to log in', fatal=False)
|
||||
note='Logging in', errnote='unable to log in', fatal=False)
|
||||
if login_results is False:
|
||||
return False
|
||||
|
||||
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
|
||||
raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||
|
||||
# Two-Factor
|
||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||
@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
tfa_code = self._get_tfa_info()
|
||||
|
||||
if tfa_code is None:
|
||||
self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>')
|
||||
self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
|
||||
self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
return False
|
||||
|
||||
# Unlike the first login form, secTok and timeStmp are both required for the TFA form
|
||||
|
||||
match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning(u'Failed to get secTok - did the page structure change?')
|
||||
self._downloader.report_warning('Failed to get secTok - did the page structure change?')
|
||||
secTok = match.group(1)
|
||||
match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?')
|
||||
self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
|
||||
timeStmp = match.group(1)
|
||||
|
||||
tfa_form_strs = {
|
||||
@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
||||
tfa_results = self._download_webpage(
|
||||
tfa_req, None,
|
||||
note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False)
|
||||
note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
|
||||
|
||||
if tfa_results is False:
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
|
||||
self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||
self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||
return False
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in - did the page structure change?')
|
||||
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||
return False
|
||||
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
||||
self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return False
|
||||
return True
|
||||
|
||||
@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
self._download_webpage(
|
||||
req, None,
|
||||
note=u'Confirming age', errnote=u'Unable to confirm age')
|
||||
note='Confirming age', errnote='Unable to confirm age')
|
||||
return True
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
||||
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
IE_NAME = 'youtube'
|
||||
_TESTS = [
|
||||
{
|
||||
u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
|
||||
u"file": u"BaW_jenozKc.mp4",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
|
||||
u"uploader": u"Philipp Hagemeister",
|
||||
u"uploader_id": u"phihag",
|
||||
u"upload_date": u"20121002",
|
||||
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
|
||||
u"categories": [u'Science & Technology'],
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'phihag',
|
||||
'upload_date': '20121002',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
|
||||
u"file": u"UxxajLWwzqY.mp4",
|
||||
u"note": u"Test generic use_cipher_signature video (#897)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120506",
|
||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
||||
u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
|
||||
u"uploader": u"Icona Pop",
|
||||
u"uploader_id": u"IconaPop"
|
||||
'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
|
||||
'note': 'Test generic use_cipher_signature video (#897)',
|
||||
'info_dict': {
|
||||
'id': 'UxxajLWwzqY',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120506',
|
||||
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||
'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
|
||||
'uploader': 'Icona Pop',
|
||||
'uploader_id': 'IconaPop',
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
|
||||
u"file": u"07FYdnEawAQ.mp4",
|
||||
u"note": u"Test VEVO video with age protection (#956)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20130703",
|
||||
u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
|
||||
u"description": u"md5:64249768eec3bc4276236606ea996373",
|
||||
u"uploader": u"justintimberlakeVEVO",
|
||||
u"uploader_id": u"justintimberlakeVEVO"
|
||||
'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
|
||||
'note': 'Test VEVO video with age protection (#956)',
|
||||
'info_dict': {
|
||||
'id': '07FYdnEawAQ',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130703',
|
||||
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
|
||||
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||
'uploader': 'justintimberlakeVEVO',
|
||||
'uploader_id': 'justintimberlakeVEVO',
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
|
||||
u"file": u"yZIXLfi8CZQ.mp4",
|
||||
u"note": u"Embed-only video (#1746)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120608",
|
||||
u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
|
||||
u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
|
||||
u"uploader": u"SET India",
|
||||
u"uploader_id": u"setindia"
|
||||
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
|
||||
'note': 'Embed-only video (#1746)',
|
||||
'info_dict': {
|
||||
'id': 'yZIXLfi8CZQ',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120608',
|
||||
'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
|
||||
'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
|
||||
'uploader': 'SET India',
|
||||
'uploader_id': 'setindia'
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
|
||||
u"file": u"a9LDPn-MO4I.m4a",
|
||||
u"note": u"256k DASH audio (format 141) via DASH manifest",
|
||||
u"info_dict": {
|
||||
u"upload_date": "20121002",
|
||||
u"uploader_id": "8KVIDEO",
|
||||
u"description": '',
|
||||
u"uploader": "8KVIDEO",
|
||||
u"title": "UHDTV TEST 8K VIDEO.mp4"
|
||||
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
||||
'note': '256k DASH audio (format 141) via DASH manifest',
|
||||
'info_dict': {
|
||||
'id': 'a9LDPn-MO4I',
|
||||
'ext': 'm4a',
|
||||
'upload_date': '20121002',
|
||||
'uploader_id': '8KVIDEO',
|
||||
'description': '',
|
||||
'uploader': '8KVIDEO',
|
||||
'title': 'UHDTV TEST 8K VIDEO.mp4'
|
||||
},
|
||||
u"params": {
|
||||
u"youtube_include_dash_manifest": True,
|
||||
u"format": "141",
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141',
|
||||
},
|
||||
},
|
||||
# DASH manifest with encrypted signature
|
||||
@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'uploader_id': 'AfrojackVEVO',
|
||||
'upload_date': '20131011',
|
||||
},
|
||||
u"params": {
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141',
|
||||
},
|
||||
@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
def report_video_info_webpage_download(self, video_id):
|
||||
"""Report attempt to download video info webpage."""
|
||||
self.to_screen(u'%s: Downloading video info webpage' % video_id)
|
||||
self.to_screen('%s: Downloading video info webpage' % video_id)
|
||||
|
||||
def report_information_extraction(self, video_id):
|
||||
"""Report attempt to extract video information."""
|
||||
self.to_screen(u'%s: Extracting video information' % video_id)
|
||||
self.to_screen('%s: Extracting video information' % video_id)
|
||||
|
||||
def report_unavailable_format(self, video_id, format):
|
||||
"""Report extracted video URL."""
|
||||
self.to_screen(u'%s: Format %s not available' % (video_id, format))
|
||||
self.to_screen('%s: Format %s not available' % (video_id, format))
|
||||
|
||||
def report_rtmp_download(self):
|
||||
"""Indicate the download will use the RTMP protocol."""
|
||||
self.to_screen(u'RTMP download detected')
|
||||
self.to_screen('RTMP download detected')
|
||||
|
||||
def _signature_cache_id(self, example_sig):
|
||||
""" Return a string representation of a signature """
|
||||
@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_type, player_id, self._signature_cache_id(example_sig))
|
||||
assert os.path.basename(func_id) == func_id
|
||||
|
||||
cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
|
||||
cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
|
||||
if cache_spec is not None:
|
||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||
|
||||
if player_type == 'js':
|
||||
code = self._download_webpage(
|
||||
player_url, video_id,
|
||||
note=u'Downloading %s player %s' % (player_type, player_id),
|
||||
errnote=u'Download of %s failed' % player_url)
|
||||
note='Downloading %s player %s' % (player_type, player_id),
|
||||
errnote='Download of %s failed' % player_url)
|
||||
res = self._parse_sig_js(code)
|
||||
elif player_type == 'swf':
|
||||
urlh = self._request_webpage(
|
||||
player_url, video_id,
|
||||
note=u'Downloading %s player %s' % (player_type, player_id),
|
||||
errnote=u'Download of %s failed' % player_url)
|
||||
note='Downloading %s player %s' % (player_type, player_id),
|
||||
errnote='Download of %s failed' % player_url)
|
||||
code = urlh.read()
|
||||
res = self._parse_sig_swf(code)
|
||||
else:
|
||||
@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
|
||||
self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
|
||||
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
def gen_sig_code(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = '' if start == 0 else str(start)
|
||||
ends = (u':%d' % (end+step)) if end + step >= 0 else ':'
|
||||
steps = '' if step == 1 else (u':%d' % step)
|
||||
ends = (':%d' % (end+step)) if end + step >= 0 else ':'
|
||||
steps = '' if step == 1 else (':%d' % step)
|
||||
return 's[%s%s%s]' % (starts, ends, steps)
|
||||
|
||||
step = None
|
||||
@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
expr_code = ' + '.join(gen_sig_code(cache_spec))
|
||||
signature_id_tuple = '(%s)' % (
|
||||
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
|
||||
code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||
code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||
' return %s\n') % (signature_id_tuple, expr_code)
|
||||
self.to_screen(u'Extracted signature function:\n' + code)
|
||||
self.to_screen('Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
funcname = self._search_regex(
|
||||
@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
|
||||
if player_url is None:
|
||||
raise ExtractorError(u'Cannot decrypt signature without player_url')
|
||||
raise ExtractorError('Cannot decrypt signature without player_url')
|
||||
|
||||
if player_url.startswith(u'//'):
|
||||
if player_url.startswith('//'):
|
||||
player_url = 'https:' + player_url
|
||||
try:
|
||||
player_id = (player_url, self._signature_cache_id(s))
|
||||
@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||
video_id, note=False)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
||||
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
|
||||
return {}
|
||||
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
||||
|
||||
@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url = 'https://www.youtube.com/api/timedtext?' + params
|
||||
sub_lang_list[lang] = url
|
||||
if not sub_lang_list:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
return sub_lang_list
|
||||
|
||||
@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||
if mobj is None:
|
||||
@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
caption_list = self._download_xml(list_url, video_id)
|
||||
original_lang_node = caption_list.find('track')
|
||||
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
||||
self._downloader.report_warning('Video doesn\'t have automatic captions')
|
||||
return {}
|
||||
original_lang = original_lang_node.attrib['lang_code']
|
||||
|
||||
@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
def extract_id(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
def _extract_annotations(self, video_id):
|
||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
|
||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||
|
||||
def _real_extract(self, url):
|
||||
proto = (
|
||||
@ -650,6 +655,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# Get video webpage
|
||||
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
pref_cookies = [
|
||||
c for c in self._downloader.cookiejar
|
||||
if c.domain == '.youtube.com' and c.name == 'PREF']
|
||||
for pc in pref_cookies:
|
||||
if 'hl=' in pc.value:
|
||||
pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value)
|
||||
else:
|
||||
if pc.value:
|
||||
pc.value += '&'
|
||||
pc.value += 'hl=en'
|
||||
video_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Attempt to extract SWF player URL
|
||||
@ -705,14 +720,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# Check for "rental" videos
|
||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||
raise ExtractorError(u'"rental" videos not supported')
|
||||
raise ExtractorError('"rental" videos not supported')
|
||||
|
||||
# Start extracting information
|
||||
self.report_information_extraction(video_id)
|
||||
|
||||
# uploader
|
||||
if 'author' not in video_info:
|
||||
raise ExtractorError(u'Unable to extract uploader name')
|
||||
raise ExtractorError('Unable to extract uploader name')
|
||||
video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
|
||||
|
||||
# uploader_id
|
||||
@ -721,13 +736,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if mobj is not None:
|
||||
video_uploader_id = mobj.group(1)
|
||||
else:
|
||||
self._downloader.report_warning(u'unable to extract uploader nickname')
|
||||
self._downloader.report_warning('unable to extract uploader nickname')
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = video_info['title'][0]
|
||||
else:
|
||||
self._downloader.report_warning(u'Unable to extract video title')
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
video_title = '_'
|
||||
|
||||
# thumbnail image
|
||||
@ -737,7 +752,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if m_thumb is not None:
|
||||
video_thumbnail = m_thumb.group(1)
|
||||
elif 'thumbnail_url' not in video_info:
|
||||
self._downloader.report_warning(u'unable to extract video thumbnail')
|
||||
self._downloader.report_warning('unable to extract video thumbnail')
|
||||
video_thumbnail = None
|
||||
else: # don't panic if we can't find it
|
||||
video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
|
||||
@ -791,8 +806,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if count is not None:
|
||||
return int(count.replace(',', ''))
|
||||
return None
|
||||
like_count = _extract_count(u'like')
|
||||
dislike_count = _extract_count(u'dislike')
|
||||
like_count = _extract_count('like')
|
||||
dislike_count = _extract_count('dislike')
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
@ -802,7 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return
|
||||
|
||||
if 'length_seconds' not in video_info:
|
||||
self._downloader.report_warning(u'unable to extract video duration')
|
||||
self._downloader.report_warning('unable to extract video duration')
|
||||
video_duration = None
|
||||
else:
|
||||
video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
|
||||
@ -823,11 +838,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
||||
# this signatures are encrypted
|
||||
if 'url_encoded_fmt_stream_map' not in args:
|
||||
raise ValueError(u'No stream_map present') # caught below
|
||||
raise ValueError('No stream_map present') # caught below
|
||||
re_signature = re.compile(r'[&,]s=')
|
||||
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
|
||||
if m_s is not None:
|
||||
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
|
||||
self.to_screen('%s: Encrypted signatures detected.' % video_id)
|
||||
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
|
||||
m_s = re_signature.search(args.get('adaptive_fmts', ''))
|
||||
if m_s is not None:
|
||||
@ -905,7 +920,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen(u'{%s} signature length %s, %s' %
|
||||
self.to_screen('{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
|
||||
signature = self._decrypt_signature(
|
||||
@ -920,7 +935,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
formats = _map_to_format_list(url_map)
|
||||
else:
|
||||
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
# Look for the DASH manifest
|
||||
if (self._downloader.params.get('youtube_include_dash_manifest', False)):
|
||||
@ -941,9 +956,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
|
||||
dash_doc = self._download_xml(
|
||||
dash_manifest_url, video_id,
|
||||
note=u'Downloading DASH manifest',
|
||||
errnote=u'Could not download DASH manifest')
|
||||
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||
note='Downloading DASH manifest',
|
||||
errnote='Could not download DASH manifest')
|
||||
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
||||
if url_el is None:
|
||||
continue
|
||||
@ -969,7 +984,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
existing_format.update(f)
|
||||
|
||||
except (ExtractorError, KeyError) as e:
|
||||
self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
|
||||
self.report_warning('Skipping DASH manifest: %s' % e, video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -1000,7 +1015,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
(?:\w+\.)?
|
||||
youtube\.com/
|
||||
(?:
|
||||
(?:course|view_play_list|my_playlists|artist|playlist|watch)
|
||||
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
@ -1056,6 +1071,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'YDL_safe_search',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'note': 'embedded',
|
||||
'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'JODA15',
|
||||
}
|
||||
}, {
|
||||
'note': 'Embedded SWF player',
|
||||
'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'JODA7',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -1090,7 +1119,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
# Extract playlist id
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
playlist_id = mobj.group(1) or mobj.group(2)
|
||||
|
||||
# Check if it's a video-specific URL
|
||||
@ -1098,16 +1127,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
if 'v' in query_dict:
|
||||
video_id = query_dict['v'][0]
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
else:
|
||||
self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
if playlist_id.startswith('RD'):
|
||||
# Mixes require a custom extraction process
|
||||
return self._extract_mix(playlist_id)
|
||||
if playlist_id.startswith('TL'):
|
||||
raise ExtractorError(u'For downloading YouTube.com top lists, use '
|
||||
raise ExtractorError('For downloading YouTube.com top lists, use '
|
||||
'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
||||
|
||||
url = self._TEMPLATE_URL % playlist_id
|
||||
@ -1152,19 +1181,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
class YoutubeTopListIE(YoutubePlaylistIE):
|
||||
IE_NAME = 'youtube:toplist'
|
||||
IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||
' (Example: "yttoplist:music:Top Tracks")')
|
||||
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
||||
_TESTS = []
|
||||
_TESTS = [{
|
||||
'url': 'yttoplist:music:Trending',
|
||||
'playlist_mincount': 5,
|
||||
'skip': 'Only works for logged-in users',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel = mobj.group('chann')
|
||||
title = mobj.group('title')
|
||||
query = compat_urllib_parse.urlencode({'title': title})
|
||||
playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
|
||||
channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
|
||||
link = self._html_search_regex(playlist_re, channel_page, 'list')
|
||||
channel_page = self._download_webpage(
|
||||
'https://www.youtube.com/%s' % channel, title)
|
||||
link = self._html_search_regex(
|
||||
r'''(?x)
|
||||
<a\s+href="([^"]+)".*?>\s*
|
||||
<span\s+class="branded-page-module-title-text">\s*
|
||||
<span[^>]*>.*?%s.*?</span>''' % re.escape(query),
|
||||
channel_page, 'list')
|
||||
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
|
||||
|
||||
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
|
||||
@ -1190,6 +1228,11 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
||||
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
||||
IE_NAME = 'youtube:channel'
|
||||
_TESTS = [{
|
||||
'note': 'paginated channel',
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'playlist_mincount': 91,
|
||||
}]
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
@ -1202,7 +1245,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
# Extract channel id
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
# Download channel page
|
||||
channel_id = mobj.group(1)
|
||||
@ -1224,7 +1267,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_json(
|
||||
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||
url, channel_id, note='Downloading page #%s' % pagenum,
|
||||
transform_source=uppercase_escape)
|
||||
|
||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||
@ -1233,7 +1276,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||
break
|
||||
|
||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||
|
||||
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in video_ids]
|
||||
@ -1248,6 +1291,17 @@ class YoutubeUserIE(InfoExtractor):
|
||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||
IE_NAME = 'youtube:user'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/user/TheLinuxFoundation',
|
||||
'playlist_mincount': 320,
|
||||
'info_dict': {
|
||||
'title': 'TheLinuxFoundation',
|
||||
}
|
||||
}, {
|
||||
'url': 'ytuser:phihag',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
# Don't return True if the url can be extracted with other youtube
|
||||
@ -1260,7 +1314,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
# Extract username
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
username = mobj.group(1)
|
||||
|
||||
@ -1281,7 +1335,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
try:
|
||||
response = json.loads(page)
|
||||
except ValueError as err:
|
||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
||||
raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
|
||||
if 'entry' not in response['feed']:
|
||||
return
|
||||
|
||||
@ -1297,7 +1351,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
||||
url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
|
||||
url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(url_results, playlist_title=username)
|
||||
|
||||
@ -1322,9 +1376,9 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
||||
compat_urllib_parse.quote_plus(query.encode('utf-8')),
|
||||
(PAGE_SIZE * pagenum) + 1)
|
||||
data_json = self._download_webpage(
|
||||
result_url, video_id=u'query "%s"' % query,
|
||||
note=u'Downloading page %s' % (pagenum + 1),
|
||||
errnote=u'Unable to download API page')
|
||||
result_url, video_id='query "%s"' % query,
|
||||
note='Downloading page %s' % (pagenum + 1),
|
||||
errnote='Unable to download API page')
|
||||
data = json.loads(data_json)
|
||||
api_response = data['data']
|
||||
|
||||
@ -1356,6 +1410,13 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
IE_DESC = 'YouTube.com search URLs'
|
||||
IE_NAME = 'youtube:search_url'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'title': 'youtube-dl test video',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -1390,17 +1451,38 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
|
||||
class YoutubeShowIE(InfoExtractor):
|
||||
IE_DESC = 'YouTube.com (multi-season) shows'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
||||
IE_NAME = 'youtube:show'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.youtube.com/show/airdisasters',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': 'airdisasters',
|
||||
'title': 'Air Disasters',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_name = mobj.group(1)
|
||||
webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, 'Downloading show webpage')
|
||||
# There's one playlist for each season of the show
|
||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
|
||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
|
||||
for season in m_seasons
|
||||
]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
|
@ -87,7 +87,7 @@ def parseOpts(overrideArguments=None):
|
||||
for private_opt in ['-p', '--password', '-u', '--username', '--video-password']:
|
||||
try:
|
||||
i = opts.index(private_opt)
|
||||
opts[i+1] = '<PRIVATE>'
|
||||
opts[i+1] = 'PRIVATE'
|
||||
except ValueError:
|
||||
pass
|
||||
return opts
|
||||
@ -218,7 +218,7 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', default=None,
|
||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
|
||||
help='video format code, specify the order of preference using slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
video_format.add_option('--prefer-free-formats',
|
||||
|
@ -673,6 +673,8 @@ class ExtractorError(Exception):
|
||||
expected = True
|
||||
if video_id is not None:
|
||||
msg = video_id + ': ' + msg
|
||||
if cause:
|
||||
msg += u' (caused by %r)' % cause
|
||||
if not expected:
|
||||
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
|
||||
super(ExtractorError, self).__init__(msg)
|
||||
@ -799,6 +801,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
del req.headers['User-agent']
|
||||
req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
|
||||
del req.headers['Youtubedl-user-agent']
|
||||
|
||||
if sys.version_info < (2, 7) and '#' in req.get_full_url():
|
||||
# Python 2.6 is brain-dead when it comes to fragments
|
||||
req._Request__original = req._Request__original.partition('#')[0]
|
||||
req._Request__r_type = req._Request__r_type.partition('#')[0]
|
||||
|
||||
return req
|
||||
|
||||
def http_response(self, req, resp):
|
||||
@ -884,6 +892,7 @@ def unified_strdate(date_str):
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
'%d.%m.%Y %H:%M',
|
||||
'%d.%m.%Y %H.%M',
|
||||
@ -1384,14 +1393,16 @@ def check_executable(exe, args=[]):
|
||||
|
||||
|
||||
class PagedList(object):
|
||||
def __init__(self, pagefunc, pagesize):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagesize = pagesize
|
||||
|
||||
def __len__(self):
|
||||
# This is only useful for tests
|
||||
return len(self.getslice())
|
||||
|
||||
|
||||
class OnDemandPagedList(PagedList):
|
||||
def __init__(self, pagefunc, pagesize):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagesize = pagesize
|
||||
|
||||
def getslice(self, start=0, end=None):
|
||||
res = []
|
||||
for pagenum in itertools.count(start // self._pagesize):
|
||||
@ -1430,6 +1441,35 @@ class PagedList(object):
|
||||
return res
|
||||
|
||||
|
||||
class InAdvancePagedList(PagedList):
|
||||
def __init__(self, pagefunc, pagecount, pagesize):
|
||||
self._pagefunc = pagefunc
|
||||
self._pagecount = pagecount
|
||||
self._pagesize = pagesize
|
||||
|
||||
def getslice(self, start=0, end=None):
|
||||
res = []
|
||||
start_page = start // self._pagesize
|
||||
end_page = (
|
||||
self._pagecount if end is None else (end // self._pagesize + 1))
|
||||
skip_elems = start - start_page * self._pagesize
|
||||
only_more = None if end is None else end - start
|
||||
for pagenum in range(start_page, end_page):
|
||||
page = list(self._pagefunc(pagenum))
|
||||
if skip_elems:
|
||||
page = page[skip_elems:]
|
||||
skip_elems = None
|
||||
if only_more is not None:
|
||||
if len(page) < only_more:
|
||||
only_more -= len(page)
|
||||
else:
|
||||
page = page[:only_more]
|
||||
res.extend(page)
|
||||
break
|
||||
res.extend(page)
|
||||
return res
|
||||
|
||||
|
||||
def uppercase_escape(s):
|
||||
unicode_escape = codecs.getdecoder('unicode_escape')
|
||||
return re.sub(
|
||||
@ -1437,6 +1477,24 @@ def uppercase_escape(s):
|
||||
lambda m: unicode_escape(m.group(0))[0],
|
||||
s)
|
||||
|
||||
|
||||
def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, unicode):
|
||||
s = s.encode('utf-8')
|
||||
return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
|
||||
|
||||
|
||||
def escape_url(url):
|
||||
"""Escape URL as suggested by RFC 3986"""
|
||||
url_parsed = compat_urllib_parse_urlparse(url)
|
||||
return url_parsed._replace(
|
||||
path=escape_rfc3986(url_parsed.path),
|
||||
params=escape_rfc3986(url_parsed.params),
|
||||
query=escape_rfc3986(url_parsed.query),
|
||||
fragment=escape_rfc3986(url_parsed.fragment)
|
||||
).geturl()
|
||||
|
||||
try:
|
||||
struct.pack(u'!I', 0)
|
||||
except TypeError:
|
||||
@ -1522,27 +1580,24 @@ def strip_jsonp(code):
|
||||
|
||||
def js_to_json(code):
|
||||
def fix_kv(m):
|
||||
key = m.group(2)
|
||||
if key.startswith("'"):
|
||||
assert key.endswith("'")
|
||||
assert '"' not in key
|
||||
key = '"%s"' % key[1:-1]
|
||||
elif not key.startswith('"'):
|
||||
key = '"%s"' % key
|
||||
|
||||
value = m.group(4)
|
||||
if value.startswith("'"):
|
||||
assert value.endswith("'")
|
||||
assert '"' not in value
|
||||
value = '"%s"' % value[1:-1]
|
||||
|
||||
return m.group(1) + key + m.group(3) + value
|
||||
v = m.group(0)
|
||||
if v in ('true', 'false', 'null'):
|
||||
return v
|
||||
if v.startswith('"'):
|
||||
return v
|
||||
if v.startswith("'"):
|
||||
v = v[1:-1]
|
||||
v = re.sub(r"\\\\|\\'|\"", lambda m: {
|
||||
'\\\\': '\\\\',
|
||||
"\\'": "'",
|
||||
'"': '\\"',
|
||||
}[m.group(0)], v)
|
||||
return '"%s"' % v
|
||||
|
||||
res = re.sub(r'''(?x)
|
||||
([{,]\s*)
|
||||
("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
|
||||
(:\s*)
|
||||
([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{)
|
||||
"(?:[^"\\]*(?:\\\\|\\")?)*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\')?)*'|
|
||||
[a-zA-Z_][a-zA-Z_0-9]*
|
||||
''', fix_kv, code)
|
||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||
return res
|
||||
@ -1571,3 +1626,13 @@ except AttributeError:
|
||||
if ret:
|
||||
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
||||
return output
|
||||
|
||||
|
||||
def limit_length(s, length):
|
||||
""" Add ellipses to overly long strings """
|
||||
if s is None:
|
||||
return None
|
||||
ELLIPSES = '...'
|
||||
if len(s) > length:
|
||||
return s[:length - len(ELLIPSES)] + ELLIPSES
|
||||
return s
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.09.14.1'
|
||||
__version__ = '2014.10.02'
|
||||
|
Reference in New Issue
Block a user