Compare commits
377 Commits
2014.04.30
...
2014.07.11
Author | SHA1 | Date | |
---|---|---|---|
|
fada438acf | ||
|
1df0ae2170 | ||
|
d96b9d40f0 | ||
|
fa19dfccf9 | ||
|
cdc22cb886 | ||
|
04c77a54b0 | ||
|
64a8c39a1f | ||
|
3d55f2806e | ||
|
1eb867f33f | ||
|
e93f4f7578 | ||
|
45ead916d1 | ||
|
3a0879c8c8 | ||
|
ebf361ce18 | ||
|
953b358668 | ||
|
3dfd25b3aa | ||
|
6f66eedc5d | ||
|
4094b6e36d | ||
|
c09cbf0ed9 | ||
|
391d53e1dd | ||
|
f64ebfe3e5 | ||
|
fc040bfd05 | ||
|
c8bf86d50d | ||
|
61989fb5e9 | ||
|
6f9d4d542f | ||
|
b3a8878080 | ||
|
f4d66a99cf | ||
|
537ba6f381 | ||
|
411f691b21 | ||
|
d6aa1967ad | ||
|
6e1e0e4b5b | ||
|
3941669d69 | ||
|
1aac03797e | ||
|
459af43494 | ||
|
f4f7e3cf41 | ||
|
1fd015516e | ||
|
76bafa8ffe | ||
|
8d5797b00f | ||
|
7571c02c8a | ||
|
49cbe7c8e3 | ||
|
ba4133c9eb | ||
|
b67f1840a1 | ||
|
165c46690f | ||
|
16bc9ab601 | ||
|
15ce1338b4 | ||
|
0ff30c5333 | ||
|
6feb2d5e80 | ||
|
1e07fea200 | ||
|
7aeb67b39b | ||
|
93881db22a | ||
|
64ed7a38f9 | ||
|
2fd466fcfc | ||
|
dc2fc73691 | ||
|
c4808c6009 | ||
|
c67f584eb3 | ||
|
29f6ed78e8 | ||
|
7807ee664d | ||
|
d518d06efd | ||
|
25a0cc44b9 | ||
|
825cdcec3c | ||
|
41b610acab | ||
|
0364fa8b65 | ||
|
849086a1ae | ||
|
36fbc6887f | ||
|
a8a98e43f2 | ||
|
57bdc730e2 | ||
|
31a196d7f5 | ||
|
9b27e6c3b4 | ||
|
62f1f9507f | ||
|
ee8dda41ae | ||
|
01ba178097 | ||
|
78ff59d052 | ||
|
f3f1cd6b3b | ||
|
803540e811 | ||
|
458ade6361 | ||
|
a69969ee05 | ||
|
f2b8db57eb | ||
|
331ae266ff | ||
|
4242001863 | ||
|
78338f71ca | ||
|
f5172a3084 | ||
|
c7df67edbd | ||
|
d410fee91d | ||
|
ba7aa464de | ||
|
8333034dce | ||
|
637b6af80f | ||
|
1044f8afd2 | ||
|
2f775107f9 | ||
|
85342674b2 | ||
|
fd69098a45 | ||
|
8867f908fc | ||
|
b7c33124c8 | ||
|
89a8c423c7 | ||
|
cea2582df2 | ||
|
e423e0baaa | ||
|
60b2dd1285 | ||
|
36ddd8b3f7 | ||
|
7575d52a73 | ||
|
9a2dc4f7ac | ||
|
c5cd249e41 | ||
|
8940c1c058 | ||
|
27ec04b232 | ||
|
d2824416aa | ||
|
18061bbab0 | ||
|
4ecbbcbcea | ||
|
55c97a03e1 | ||
|
98aeac6ea9 | ||
|
8bfb6723cb | ||
|
a20575e8ae | ||
|
7724572519 | ||
|
d763637f6a | ||
|
c26e9ac4b2 | ||
|
896bf55352 | ||
|
a23ba9b53c | ||
|
38a9339baf | ||
|
def8b4039f | ||
|
a14e1538fe | ||
|
5f28a1acad | ||
|
25e9953c6f | ||
|
f9df094ca5 | ||
|
b60a469023 | ||
|
7012631257 | ||
|
e6c9f80c48 | ||
|
895ce482b1 | ||
|
e5da4021eb | ||
|
2371053565 | ||
|
33bf9033e0 | ||
|
35eacd0dae | ||
|
96bef88f5f | ||
|
5524b242a7 | ||
|
a013eba65f | ||
|
36755d40b4 | ||
|
7d568f5ab8 | ||
|
a7207cd580 | ||
|
e8ef659cd9 | ||
|
b0adbe98fb | ||
|
0c361c41b8 | ||
|
e66ab17a36 | ||
|
cb437dc2ad | ||
|
0d933b2ad5 | ||
|
c5469e046a | ||
|
4d2f143ce5 | ||
|
8f93030c85 | ||
|
fdb9aebead | ||
|
3141feb73b | ||
|
9706f3f802 | ||
|
d5e944359e | ||
|
826ec77fb2 | ||
|
2656f4eb6a | ||
|
2b88feedf7 | ||
|
23566e0d78 | ||
|
828553b614 | ||
|
3048e82a94 | ||
|
09ffa08ba1 | ||
|
e0b4cc489f | ||
|
15e423407f | ||
|
702e522044 | ||
|
77abae55df | ||
|
617c0b2239 | ||
|
814d4257df | ||
|
23ae281b31 | ||
|
94128d6b0d | ||
|
059009c592 | ||
|
9cc977f104 | ||
|
1c0ade7afa | ||
|
f2741c8d3a | ||
|
6ab8f3584a | ||
|
8ae5ce1726 | ||
|
eb92077720 | ||
|
90e0fd4bad | ||
|
05741e05d9 | ||
|
9aa6637644 | ||
|
d30d28156d | ||
|
be6d722904 | ||
|
d551980823 | ||
|
f0a6c3d2bc | ||
|
4e0fb1280a | ||
|
24f5251cce | ||
|
ac1390eee8 | ||
|
4a5b4d34dc | ||
|
63adb0cc61 | ||
|
3c80377b69 | ||
|
24577db241 | ||
|
566bd96da8 | ||
|
ebdb64d605 | ||
|
a6ffb92f0b | ||
|
3217377b3c | ||
|
24da5893fc | ||
|
087ca2cb07 | ||
|
b4e7447458 | ||
|
a45e6aadd7 | ||
|
70e322695d | ||
|
6a15923b77 | ||
|
7ffad0af5a | ||
|
0e3ae92441 | ||
|
b3ae826f7a | ||
|
dede691aca | ||
|
fb6a5b965b | ||
|
6340716b3a | ||
|
b675b32e6b | ||
|
6a3fa81ffb | ||
|
df53a98f2b | ||
|
db23d8d2a2 | ||
|
0d69795014 | ||
|
3374f3fdc2 | ||
|
4bf0727b1f | ||
|
263bd4ec50 | ||
|
b7e8b6e37a | ||
|
ceb7a17f34 | ||
|
1a2f2e1e66 | ||
|
6803016858 | ||
|
9b7c4fd981 | ||
|
dc31942f42 | ||
|
1f6b8f3115 | ||
|
9c7b79acd9 | ||
|
9168308579 | ||
|
7e8fdb1aae | ||
|
386ba39cac | ||
|
236d0cd07c | ||
|
ed86f38a11 | ||
|
6db80ad2db | ||
|
14470ac87b | ||
|
0cdf576d86 | ||
|
4ffeca4ea2 | ||
|
211fd6c674 | ||
|
6ebb46c106 | ||
|
0f97c9a06f | ||
|
77fb72646f | ||
|
aae74e3832 | ||
|
894e730911 | ||
|
63961d87a6 | ||
|
87fe568c28 | ||
|
46531b374d | ||
|
9e8753911c | ||
|
5c6b1e578c | ||
|
8f0c8fb452 | ||
|
b702ecebf0 | ||
|
950dc95e97 | ||
|
d9dd3584e1 | ||
|
15a9f36849 | ||
|
d0087d4ff2 | ||
|
cc5ada6f4c | ||
|
dfb2e1a325 | ||
|
65bab327b4 | ||
|
9eeb7abc6b | ||
|
c70df21099 | ||
|
418424e5f5 | ||
|
8477466125 | ||
|
865dbd4a26 | ||
|
b1e6f55912 | ||
|
4d78f3b770 | ||
|
7f739999e9 | ||
|
0f8a01d4f3 | ||
|
e2bf499b14 | ||
|
7cf4547ab6 | ||
|
8ae980807a | ||
|
eec4d8ef96 | ||
|
1c783bca88 | ||
|
ac73651f66 | ||
|
e5ceb3bfda | ||
|
c2ef29234c | ||
|
1a1826c1af | ||
|
c7c6d43fe1 | ||
|
2902d44f99 | ||
|
d6e4ba287b | ||
|
e5c3a4b549 | ||
|
f50ee8d1c3 | ||
|
0e67ab0d8e | ||
|
1d0668ed5a | ||
|
77541837e5 | ||
|
e3a6576f35 | ||
|
89bb8e97ee | ||
|
375696b1b1 | ||
|
4ea5c7b70d | ||
|
305d068362 | ||
|
a231ce87b5 | ||
|
a84d20fc14 | ||
|
9e30092361 | ||
|
10d5c7aa5f | ||
|
412f356e04 | ||
|
8dfa187b8a | ||
|
c1ed1f7055 | ||
|
1514f74967 | ||
|
2e8323e3f7 | ||
|
69f8364042 | ||
|
79981f039b | ||
|
34d863f3fc | ||
|
91994c2c81 | ||
|
3ee4b60d56 | ||
|
76e92371ac | ||
|
08af0205f9 | ||
|
a725fb1f43 | ||
|
05ee2b6dad | ||
|
b74feacac5 | ||
|
426b52fc5d | ||
|
5c30b26846 | ||
|
f07b74fc18 | ||
|
a5a45015ba | ||
|
beee53de06 | ||
|
8712f2bea7 | ||
|
ea102818c9 | ||
|
0a871f6880 | ||
|
481efc84a8 | ||
|
01ed5c9be3 | ||
|
ad3bc6acd5 | ||
|
5afa7f8bee | ||
|
ec8deefc27 | ||
|
a2d5a4ee64 | ||
|
dffcc2ea0c | ||
|
1800eeefed | ||
|
d7e7dedbde | ||
|
d19bb9c0aa | ||
|
3ef79a974a | ||
|
bc6800fbed | ||
|
65314dccf8 | ||
|
feb7221209 | ||
|
56a94d8cbb | ||
|
24e6ec8ac8 | ||
|
87724af7a8 | ||
|
b65c3e77e8 | ||
|
5301304bf2 | ||
|
948bcc60df | ||
|
25dfe0eb10 | ||
|
8e71456a81 | ||
|
ccdd34ed78 | ||
|
26d886354f | ||
|
a172b258ac | ||
|
7b93c2c204 | ||
|
57c7411f46 | ||
|
d0a122348e | ||
|
e4cbb5f382 | ||
|
c1bce22f23 | ||
|
e3abbbe301 | ||
|
55b36e3710 | ||
|
877bea9ce1 | ||
|
33c7ff861e | ||
|
749fe60c1e | ||
|
63b31b059c | ||
|
1476b497eb | ||
|
e399853d0c | ||
|
fdb205b19e | ||
|
fbe8053120 | ||
|
ea783d01e1 | ||
|
b7d73595dc | ||
|
e97e53eeed | ||
|
342f630dbf | ||
|
69c8fb9e5d | ||
|
5f0f8013ac | ||
|
b5368acee8 | ||
|
f71959fcf5 | ||
|
5c9f3b8b16 | ||
|
bebd6f9308 | ||
|
84a2806c16 | ||
|
d0111a7409 | ||
|
aab8874c55 | ||
|
fcf5b01746 | ||
|
4de9e9a6db | ||
|
0067d6c4be | ||
|
2099125333 | ||
|
b48f147d5a | ||
|
4f3e943080 | ||
|
7558830fa3 | ||
|
867274e997 | ||
|
6515778305 | ||
|
3b1dfc0f2f | ||
|
d664de44b7 | ||
|
bbe99d26ec | ||
|
50fc59968e | ||
|
b8b01bb92a | ||
|
eb45133451 | ||
|
10c0e2d818 | ||
|
669f0e7cda | ||
|
32fd27ec98 | ||
|
0c13f378de | ||
|
0049594efb | ||
|
113c7d3eb0 | ||
|
549371fc99 | ||
|
957f27e5bb |
@@ -3,6 +3,7 @@ python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
14
CHANGELOG
14
CHANGELOG
@@ -1,14 +0,0 @@
|
||||
2013.01.02 Codename: GIULIA
|
||||
|
||||
* Add support for ComedyCentral clips <nto>
|
||||
* Corrected Vimeo description fetching <Nick Daniels>
|
||||
* Added the --no-post-overwrites argument <Barbu Paul - Gheorghe>
|
||||
* --verbose offers more environment info
|
||||
* New info_dict field: uploader_id
|
||||
* New updates system, with signature checking
|
||||
* New IEs: NBA, JustinTV, FunnyOrDie, TweetReel, Steam, Ustream
|
||||
* Fixed IEs: BlipTv
|
||||
* Fixed for Python 3 IEs: Xvideo, Youku, XNXX, Dailymotion, Vimeo, InfoQ
|
||||
* Simplified IEs and test code
|
||||
* Various (Python 3 and other) fixes
|
||||
* Revamped and expanded tests
|
8
Makefile
8
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
@@ -55,7 +55,9 @@ README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
youtube-dl.1: README.md
|
||||
pandoc -s -f markdown -t man README.md -o youtube-dl.1
|
||||
python devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||
rm -f youtube-dl.1.temp.md
|
||||
|
||||
youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
|
||||
python devscripts/bash-completion.py
|
||||
@@ -75,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
CHANGELOG LICENSE README.md README.txt \
|
||||
LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||
youtube-dl
|
||||
|
26
README.md
26
README.md
@@ -1,11 +1,24 @@
|
||||
% YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
**youtube-dl** [OPTIONS] URL [URL...]
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
|
||||
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
|
||||
sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
||||
|
||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||
|
||||
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a small command-line program to download videos from
|
||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||
@@ -57,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
example "gvsearch2:" downloads two videos
|
||||
from google videos for youtube-dl "large
|
||||
apple". By default (with value "auto")
|
||||
youtube-dl guesses.
|
||||
apple". Use the value "auto" to let
|
||||
youtube-dl guess. The default value "error"
|
||||
just throws an error.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: do not read the user
|
||||
@@ -458,7 +472,7 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
|
@@ -15,7 +15,7 @@ header = oldreadme[:oldreadme.index('# OPTIONS')]
|
||||
footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
|
||||
|
||||
options = helptext[helptext.index(' General Options:') + 19:]
|
||||
options = re.sub(r'^ (\w.+)$', r'## \1', options, flags=re.M)
|
||||
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
|
||||
options = '# OPTIONS\n' + options + '\n'
|
||||
|
||||
with io.open(README_FILE, 'w', encoding='utf-8') as f:
|
||||
|
20
devscripts/prepare_manpage.py
Normal file
20
devscripts/prepare_manpage.py
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
import io
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
PREFIX = '%YOUTUBE-DL(1)\n\n# NAME\n'
|
||||
readme = re.sub(r'(?s)# INSTALLATION.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
print(readme.encode('utf-8'))
|
||||
else:
|
||||
print(readme)
|
@@ -45,9 +45,9 @@ fi
|
||||
/bin/echo -e "\n### Changing version in version.py..."
|
||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
|
||||
/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
|
||||
/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
|
||||
make README.md
|
||||
git add CHANGELOG README.md youtube_dl/version.py
|
||||
git add README.md youtube_dl/version.py
|
||||
git commit -m "release $version"
|
||||
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
|
@@ -107,7 +107,7 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
|
||||
u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(got_dict.get(info_field))
|
||||
|
@@ -67,7 +67,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['ext'], 'mp4')
|
||||
|
||||
# No prefer_free_formats => prefer mp4 and flv for greater compatibilty
|
||||
# No prefer_free_formats => prefer mp4 and flv for greater compatibility
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
@@ -279,7 +279,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
self.assertEqual(ydl._format_note({}), '')
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'vbr': 10,
|
||||
}), '^x\s*10k$')
|
||||
}), '^\s*10k$')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -13,7 +13,7 @@ from youtube_dl import YoutubeDL
|
||||
|
||||
|
||||
def _download_restricted(url, filename, age):
|
||||
""" Returns true iff the file has been downloaded """
|
||||
""" Returns true if the file has been downloaded """
|
||||
|
||||
params = {
|
||||
'age_limit': age,
|
||||
|
@@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_youtube_show_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
||||
|
||||
def test_youtube_truncated(self):
|
||||
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
||||
|
||||
def test_youtube_search_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
@@ -10,6 +10,7 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import (
|
||||
assertRegexpMatches,
|
||||
expect_info_dict,
|
||||
FakeYDL,
|
||||
)
|
||||
@@ -22,10 +23,14 @@ from youtube_dl.extractor import (
|
||||
VimeoUserIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoGroupsIE,
|
||||
VineUserIE,
|
||||
UstreamChannelIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE,
|
||||
TeacherTubeUserIE,
|
||||
LivestreamIE,
|
||||
LivestreamOriginalIE,
|
||||
NHLVideocenterIE,
|
||||
BambuserChannelIE,
|
||||
BandcampAlbumIE,
|
||||
@@ -36,6 +41,7 @@ from youtube_dl.extractor import (
|
||||
KhanAcademyIE,
|
||||
EveryonesMixtapeIE,
|
||||
RutubeChannelIE,
|
||||
RutubePersonIE,
|
||||
GoogleSearchIE,
|
||||
GenericIE,
|
||||
TEDIE,
|
||||
@@ -100,13 +106,20 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], 'Rolex Awards for Enterprise')
|
||||
self.assertTrue(len(result['entries']) > 72)
|
||||
|
||||
def test_vine_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = VineUserIE(dl)
|
||||
result = ie.extract('https://vine.co/Visa')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertTrue(len(result['entries']) >= 50)
|
||||
|
||||
def test_ustream_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = UstreamChannelIE(dl)
|
||||
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
|
||||
result = ie.extract('http://www.ustream.tv/channel/channeljapan')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '5124905')
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
self.assertEqual(result['id'], '10874166')
|
||||
self.assertTrue(len(result['entries']) >= 54)
|
||||
|
||||
def test_soundcloud_set(self):
|
||||
dl = FakeYDL()
|
||||
@@ -124,6 +137,25 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['id'], '9615865')
|
||||
self.assertTrue(len(result['entries']) >= 12)
|
||||
|
||||
def test_soundcloud_likes(self):
|
||||
dl = FakeYDL()
|
||||
ie = SoundcloudUserIE(dl)
|
||||
result = ie.extract('https://soundcloud.com/the-concept-band/likes')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '9615865')
|
||||
self.assertTrue(len(result['entries']) >= 1)
|
||||
|
||||
def test_soundcloud_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = SoundcloudPlaylistIE(dl)
|
||||
result = ie.extract('http://api.soundcloud.com/playlists/4110309')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '4110309')
|
||||
self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
|
||||
assertRegexpMatches(
|
||||
self, result['description'], r'TILT Brass - Bowery Poetry Club')
|
||||
self.assertEqual(len(result['entries']), 6)
|
||||
|
||||
def test_livestream_event(self):
|
||||
dl = FakeYDL()
|
||||
ie = LivestreamIE(dl)
|
||||
@@ -132,6 +164,14 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], 'TEDCity2.0 (English)')
|
||||
self.assertTrue(len(result['entries']) >= 4)
|
||||
|
||||
def test_livestreamoriginal_folder(self):
|
||||
dl = FakeYDL()
|
||||
ie = LivestreamOriginalIE(dl)
|
||||
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
|
||||
self.assertTrue(len(result['entries']) >= 28)
|
||||
|
||||
def test_nhl_videocenter(self):
|
||||
dl = FakeYDL()
|
||||
ie = NHLVideocenterIE(dl)
|
||||
@@ -188,20 +228,20 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_ivi_compilation(self):
|
||||
dl = FakeYDL()
|
||||
ie = IviCompilationIE(dl)
|
||||
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
|
||||
result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'dezhurnyi_angel')
|
||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
|
||||
self.assertTrue(len(result['entries']) >= 23)
|
||||
self.assertEqual(result['id'], 'dvoe_iz_lartsa')
|
||||
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
|
||||
self.assertTrue(len(result['entries']) >= 24)
|
||||
|
||||
def test_ivi_compilation_season(self):
|
||||
dl = FakeYDL()
|
||||
ie = IviCompilationIE(dl)
|
||||
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
|
||||
result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
|
||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
|
||||
self.assertTrue(len(result['entries']) >= 7)
|
||||
self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
|
||||
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
|
||||
self.assertTrue(len(result['entries']) >= 12)
|
||||
|
||||
def test_imdb_list(self):
|
||||
dl = FakeYDL()
|
||||
@@ -234,10 +274,18 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_rutube_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = RutubeChannelIE(dl)
|
||||
result = ie.extract('http://rutube.ru/tags/video/1409')
|
||||
result = ie.extract('http://rutube.ru/tags/video/1800/')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '1409')
|
||||
self.assertTrue(len(result['entries']) >= 34)
|
||||
self.assertEqual(result['id'], '1800')
|
||||
self.assertTrue(len(result['entries']) >= 68)
|
||||
|
||||
def test_rutube_person(self):
|
||||
dl = FakeYDL()
|
||||
ie = RutubePersonIE(dl)
|
||||
result = ie.extract('http://rutube.ru/video/person/313878/')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '313878')
|
||||
self.assertTrue(len(result['entries']) >= 37)
|
||||
|
||||
def test_multiple_brightcove_videos(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2283
|
||||
@@ -339,5 +387,13 @@ class TestPlaylists(unittest.TestCase):
|
||||
result['title'], 'Brace Yourself - Today\'s Weirdest News')
|
||||
self.assertTrue(len(result['entries']) >= 10)
|
||||
|
||||
def test_TeacherTubeUser(self):
|
||||
dl = FakeYDL()
|
||||
ie = TeacherTubeUserIE(dl)
|
||||
result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'rbhagwati2')
|
||||
self.assertTrue(len(result['entries']) >= 179)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -112,11 +112,11 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
|
||||
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 20)
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
|
||||
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
|
||||
|
||||
def test_youtube_toptracks(self):
|
||||
print('Skipping: The playlist page gives error 500')
|
||||
|
@@ -33,6 +33,12 @@ _TESTS = [
|
||||
90,
|
||||
u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
),
|
||||
(
|
||||
u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
u'js',
|
||||
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
|
||||
def make_tfunc(url, stype, sig_length, expected_sig):
|
||||
def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
basename = url.rpartition('/')[2]
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
|
||||
assert m, '%r should follow URL format' % basename
|
||||
@@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig):
|
||||
with open(fn, 'rb') as testf:
|
||||
swfcode = testf.read()
|
||||
func = ie._parse_sig_swf(swfcode)
|
||||
src_sig = compat_str(string.printable[:sig_length])
|
||||
src_sig = (
|
||||
compat_str(string.printable[:sig_input])
|
||||
if isinstance(sig_input, int) else sig_input)
|
||||
got_sig = func(src_sig)
|
||||
self.assertEqual(got_sig, expected_sig)
|
||||
|
||||
|
@@ -717,6 +717,17 @@ class YoutubeDL(object):
|
||||
info_dict['playlist'] = None
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
thumbnails = info_dict.get('thumbnails')
|
||||
if thumbnails:
|
||||
thumbnails.sort(key=lambda t: (
|
||||
t.get('width'), t.get('height'), t.get('url')))
|
||||
for t in thumbnails:
|
||||
if 'width' in t and 'height' in t:
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
|
||||
if thumbnails and 'thumbnail' not in info_dict:
|
||||
info_dict['thumbnail'] = thumbnails[-1]['url']
|
||||
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
@@ -982,6 +993,8 @@ class YoutubeDL(object):
|
||||
fd = get_suitable_downloader(info)(self, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
if self.params.get('verbose'):
|
||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
||||
return fd.download(name, info)
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
downloaded = []
|
||||
|
@@ -53,6 +53,16 @@ __authors__ = (
|
||||
'Mattias Harrysson',
|
||||
'phaer',
|
||||
'Sainyam Kapoor',
|
||||
'Nicolas Évrard',
|
||||
'Jason Normore',
|
||||
'Hoje Lee',
|
||||
'Adam Thalhammer',
|
||||
'Georg Jähnig',
|
||||
'Ralf Haring',
|
||||
'Koki Takahashi',
|
||||
'Ariset Llerena',
|
||||
'Adam Malcontenti-Wilson',
|
||||
'Tobias Bell',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@@ -263,7 +273,7 @@ def parseOpts(overrideArguments=None):
|
||||
general.add_option(
|
||||
'--default-search',
|
||||
dest='default_search', metavar='PREFIX',
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.')
|
||||
help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
|
||||
general.add_option(
|
||||
'--ignore-config',
|
||||
action='store_true',
|
||||
@@ -677,7 +687,7 @@ def _real_main(argv=None):
|
||||
if not opts.audioquality.isdigit():
|
||||
parser.error(u'invalid audio quality specified')
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||
parser.error(u'invalid video recode format specified')
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
|
@@ -25,7 +25,7 @@ class HlsFD(FileDownloader):
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
|
@@ -110,7 +110,7 @@ class HttpFD(FileDownloader):
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and data_len > self._TEST_FILE_SIZE:
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
if data_len is not None:
|
||||
|
@@ -10,6 +10,7 @@ from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@@ -95,6 +96,7 @@ class RtmpFD(FileDownloader):
|
||||
flash_version = info_dict.get('flash_version', None)
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
@@ -104,7 +106,7 @@ class RtmpFD(FileDownloader):
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run')
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
@@ -127,8 +129,13 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--flashVer', flash_version]
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
if isinstance(conn, list):
|
||||
for entry in conn:
|
||||
basic_args += ['--conn', entry]
|
||||
elif isinstance(conn, compat_str):
|
||||
basic_args += ['--conn', conn]
|
||||
if protocol is not None:
|
||||
basic_args += ['--protocol', protocol]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
|
@@ -3,6 +3,7 @@ from .addanime import AddAnimeIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
@@ -63,6 +64,7 @@ from .dailymotion import (
|
||||
from .daum import DaumIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drtv import DRTVIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .divxstage import DivxStageIE
|
||||
@@ -72,6 +74,7 @@ from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elpais import ElPaisIE
|
||||
from .empflix import EmpflixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .escapist import EscapistIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
@@ -79,6 +82,7 @@ from .exfm import ExfmIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
@@ -101,18 +105,23 @@ from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gameone import GameOneIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .hark import HarkIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
from .ign import IGNIE, OneUPIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
@@ -138,10 +147,15 @@ from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .keek import KeekIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .ku6 import Ku6IE
|
||||
from .la7 import LA7IE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import LivestreamIE, LivestreamOriginalIE
|
||||
from .livestream import (
|
||||
LivestreamIE,
|
||||
LivestreamOriginalIE,
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .lynda import (
|
||||
LyndaIE,
|
||||
LyndaCourseIE
|
||||
@@ -159,11 +173,13 @@ from .mpora import MporaIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movshare import MovShareIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
)
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
@@ -180,6 +196,7 @@ from .nbc import (
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nfb import NFBIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
@@ -189,8 +206,14 @@ from .normalboots import NormalbootsIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .nrk import NRKIE
|
||||
from .npo import NPOIE
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntv import NTVIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nuvid import NuvidIE
|
||||
from .oe1 import OE1IE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
@@ -205,6 +228,7 @@ from .pornotube import PornotubeIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import RaiIE
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .redtube import RedTubeIE
|
||||
from .ringtv import RingTVIE
|
||||
@@ -227,6 +251,7 @@ from .scivee import SciVeeIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
SmotriCommunityIE,
|
||||
@@ -234,7 +259,13 @@ from .smotri import (
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||
from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE
|
||||
)
|
||||
from .soundgasm import SoundgasmIE
|
||||
from .southparkstudios import (
|
||||
SouthParkStudiosIE,
|
||||
SouthparkDeIE,
|
||||
@@ -242,17 +273,25 @@ from .southparkstudios import (
|
||||
from .space import SpaceIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .statigram import StatigramIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
)
|
||||
from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .theplatform import ThePlatformIE
|
||||
@@ -282,12 +321,14 @@ from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vh1 import VH1IE
|
||||
from .viddler import ViddlerIE
|
||||
from .videobam import VideoBamIE
|
||||
from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
@@ -298,21 +339,29 @@ from .vimeo import (
|
||||
VimeoReviewIE,
|
||||
VimeoWatchLaterIE,
|
||||
)
|
||||
from .vine import VineIE
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
)
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wrzuta import WrzutaIE
|
||||
from .xbef import XBefIE
|
||||
from .xhamster import XHamsterIE
|
||||
from .xnxx import XNXXIE
|
||||
|
@@ -1,7 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -16,6 +15,7 @@ class AftonbladetIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'timestamp': 1394142732,
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
@@ -27,17 +27,17 @@ class AftonbladetIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
internal_meta_id = self._html_search_regex(
|
||||
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
|
||||
internal_meta_url = META_URL % internal_meta_id
|
||||
internal_meta_url = meta_url % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = FORMATS_URL % internal_video_id
|
||||
internal_formats_url = format_url % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
@@ -54,16 +54,13 @@ class AftonbladetIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
|
||||
upload_date = timestamp.strftime('%Y%m%d')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json['imageUrl'],
|
||||
'description': internal_meta_json['shortPreamble'],
|
||||
'upload_date': upload_date,
|
||||
'timestamp': internal_meta_json['timePublished'],
|
||||
'duration': internal_meta_json['duration'],
|
||||
'view_count': internal_meta_json['views'],
|
||||
}
|
||||
|
89
youtube_dl/extractor/allocine.py
Normal file
89
youtube_dl/extractor/allocine.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
qualities,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class AllocineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
|
||||
'md5': '0c9fcf59a841f65635fa300ac43d8269',
|
||||
'info_dict': {
|
||||
'id': '19546517',
|
||||
'ext': 'mp4',
|
||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html',
|
||||
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
|
||||
'info_dict': {
|
||||
'id': '19540403',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planes 2 Bande-annonce VF',
|
||||
'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
|
||||
'md5': '101250fb127ef9ca3d73186ff22a47ce',
|
||||
'info_dict': {
|
||||
'id': '19544709',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dragons 2 - Bande annonce finale VF',
|
||||
'description': 'md5:e74a4dc750894bac300ece46c7036490',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
typ = mobj.group('typ')
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if typ == 'film':
|
||||
video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
|
||||
else:
|
||||
player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player')
|
||||
|
||||
player_data = json.loads(player)
|
||||
video_id = compat_str(player_data['refMedia'])
|
||||
|
||||
xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
|
||||
|
||||
video = xml.find('.//AcVisionVideo').attrib
|
||||
quality = qualities(['ld', 'md', 'hd'])
|
||||
|
||||
formats = []
|
||||
for k, v in video.items():
|
||||
if re.match(r'.+_path', k):
|
||||
format_id = k.split('_')[0]
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
'url': v,
|
||||
'ext': determine_ext(v),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['videoTitle'],
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
@@ -1,22 +1,24 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AnitubeIE(InfoExtractor):
|
||||
IE_NAME = u'anitube.se'
|
||||
IE_NAME = 'anitube.se'
|
||||
_VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.anitube.se/video/36621',
|
||||
u'md5': u'59d0eeae28ea0bc8c05e7af429998d43',
|
||||
u'file': u'36621.mp4',
|
||||
u'info_dict': {
|
||||
u'id': u'36621',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Recorder to Randoseru 01',
|
||||
'url': 'http://www.anitube.se/video/36621',
|
||||
'md5': '59d0eeae28ea0bc8c05e7af429998d43',
|
||||
'info_dict': {
|
||||
'id': '36621',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recorder to Randoseru 01',
|
||||
'duration': 180.19,
|
||||
},
|
||||
u'skip': u'Blocked in the US',
|
||||
'skip': 'Blocked in the US',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
|
||||
webpage, u'key')
|
||||
key = self._html_search_regex(
|
||||
r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
|
||||
|
||||
config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
|
||||
key)
|
||||
config_xml = self._download_xml(
|
||||
'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
|
||||
|
||||
video_title = config_xml.find('title').text
|
||||
thumbnail = config_xml.find('image').text
|
||||
duration = float(config_xml.find('duration').text)
|
||||
|
||||
formats = []
|
||||
video_url = config_xml.find('file')
|
||||
@@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats
|
||||
}
|
||||
|
@@ -38,37 +38,43 @@ class ARDIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
streams = [
|
||||
mo.groupdict()
|
||||
for mo in re.finditer(
|
||||
r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
|
||||
|
||||
media_info = self._download_json(
|
||||
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
|
||||
# The second element of the _mediaArray contains the standard http urls
|
||||
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
|
||||
if not streams:
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError('This video is only available after 20:00')
|
||||
|
||||
formats = []
|
||||
for s in streams:
|
||||
format = {
|
||||
'quality': int(s['quality']),
|
||||
}
|
||||
if s.get('rtmp_url'):
|
||||
format['protocol'] = 'rtmp'
|
||||
format['url'] = s['rtmp_url']
|
||||
format['playpath'] = s['video_url']
|
||||
else:
|
||||
format['url'] = s['video_url']
|
||||
|
||||
quality_name = self._search_regex(
|
||||
r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
|
||||
'quality name', default='NA')
|
||||
format['format_id'] = '%s-%s-%s-%s' % (
|
||||
determine_ext(format['url']), quality_name, s['media_type'],
|
||||
s['quality'])
|
||||
for s in streams:
|
||||
if type(s['_stream']) == list:
|
||||
for index, url in enumerate(s['_stream'][::-1]):
|
||||
quality = s['_quality'] + index
|
||||
formats.append({
|
||||
'quality': quality,
|
||||
'url': url,
|
||||
'format_id': '%s-%s' % (determine_ext(url), quality)
|
||||
})
|
||||
continue
|
||||
|
||||
format = {
|
||||
'quality': s['_quality'],
|
||||
'url': s['_stream'],
|
||||
}
|
||||
|
||||
format['format_id'] = '%s-%s' % (
|
||||
determine_ext(format['url']), format['quality'])
|
||||
|
||||
formats.append(format)
|
||||
|
||||
|
@@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
formats = [{
|
||||
'forma_id': q.attrib['quality'],
|
||||
'url': q.text,
|
||||
# The playpath starts at 'mp4:', if we don't manually
|
||||
# split the url, rtmpdump will incorrectly parse them
|
||||
'url': q.text.split('mp4:', 1)[0],
|
||||
'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
|
||||
'ext': 'flv',
|
||||
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
|
||||
} for q in config.findall('./urls/url')]
|
||||
@@ -111,7 +114,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
if not formats:
|
||||
# Some videos are only available in the 'Originalversion'
|
||||
# they aren't tagged as being in French or German
|
||||
if all(f['versionCode'] == 'VO' for f in all_formats):
|
||||
if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
|
||||
formats = all_formats
|
||||
else:
|
||||
raise ExtractorError(u'The formats list is empty')
|
||||
@@ -121,14 +124,17 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
||||
else:
|
||||
def sort_key(f):
|
||||
versionCode = f.get('versionCode')
|
||||
if versionCode is None:
|
||||
versionCode = ''
|
||||
return (
|
||||
# Sort first by quality
|
||||
int(f.get('height',-1)),
|
||||
int(f.get('bitrate',-1)),
|
||||
int(f.get('height', -1)),
|
||||
int(f.get('bitrate', -1)),
|
||||
# The original version with subtitles has lower relevance
|
||||
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||
re.match(r'VO-ST(F|A)', versionCode) is None,
|
||||
# The version with sourds/mal subtitles has also lower relevance
|
||||
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||
re.match(r'VO?(F|A)-STM\1', versionCode) is None,
|
||||
# Prefer http downloads over m3u8
|
||||
0 if f['url'].endswith('m3u8') else 1,
|
||||
)
|
||||
@@ -186,9 +192,10 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
_TEST = {
|
||||
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
|
||||
'info_dict': {
|
||||
'id': '050940-003',
|
||||
'id': '5201',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les champignons au secours de la planète',
|
||||
'upload_date': '20131101',
|
||||
},
|
||||
}
|
||||
|
||||
|
@@ -12,14 +12,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'file': '1812978515.mp3',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
'info_dict': {
|
||||
"title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||
"duration": 10,
|
||||
"duration": 9.8485,
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}]
|
||||
@@ -28,36 +28,32 @@ class BandcampIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
# We get the link to the free download page
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if m_download is None:
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)
|
||||
d = data[0]
|
||||
data = json.loads(json_code)[0]
|
||||
|
||||
duration = int(round(d['duration']))
|
||||
formats = []
|
||||
for format_id, format_url in d['file'].items():
|
||||
ext, _, abr_str = format_id.partition('-')
|
||||
|
||||
for format_id, format_url in data['file'].items():
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': format_id.partition('-')[0],
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': format_id.partition('-')[0],
|
||||
'abr': int(format_id.partition('-')[2]),
|
||||
'acodec': ext,
|
||||
'abr': int(abr_str),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(d['id']),
|
||||
'title': d['title'],
|
||||
'id': compat_str(data['id']),
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'duration': float(data['duration']),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('No free songs found')
|
||||
@@ -67,11 +63,9 @@ class BandcampIE(InfoExtractor):
|
||||
r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
|
||||
webpage, re.MULTILINE | re.DOTALL).group('id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, video_id,
|
||||
'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascrip code
|
||||
info = re.search(r'items: (.*?),$',
|
||||
download_webpage, re.MULTILINE).group(1)
|
||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascript code
|
||||
info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
|
||||
info = json.loads(info)[0]
|
||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||
mp3_info = info['downloads']['mp3-320']
|
||||
@@ -100,7 +94,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -123,13 +117,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
},
|
||||
'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('subdomain')
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
display_id = title or playlist_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
@@ -139,6 +135,8 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
@@ -56,7 +56,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
||||
|
||||
player_params = compat_parse_qs(self._html_search_regex(
|
||||
r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
|
||||
r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
|
||||
webpage, 'player params'))
|
||||
|
||||
if 'cid' in player_params:
|
||||
|
@@ -1,13 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
)
|
||||
from ..utils import remove_start
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
@@ -16,18 +13,21 @@ class BlinkxIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
|
||||
'file': '8aQUy7GV.mp4',
|
||||
'md5': '2e9a07364af40163a908edbf10bb2492',
|
||||
'info_dict': {
|
||||
"title": "Police Car Rolls Away",
|
||||
"uploader": "stupidvideos.com",
|
||||
"upload_date": "20131215",
|
||||
"description": "A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
|
||||
"duration": 14.886,
|
||||
"thumbnails": [{
|
||||
"width": 100,
|
||||
"height": 76,
|
||||
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
|
||||
'id': '8aQUy7GV',
|
||||
'ext': 'mp4',
|
||||
'title': 'Police Car Rolls Away',
|
||||
'uploader': 'stupidvideos.com',
|
||||
'upload_date': '20131215',
|
||||
'timestamp': 1387068000,
|
||||
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
|
||||
'duration': 14.886,
|
||||
'thumbnails': [{
|
||||
'width': 100,
|
||||
'height': 76,
|
||||
'resolution': '100x76',
|
||||
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
|
||||
}],
|
||||
},
|
||||
}
|
||||
@@ -37,13 +37,10 @@ class BlinkxIE(InfoExtractor):
|
||||
video_id = m.group('id')
|
||||
display_id = video_id[:8]
|
||||
|
||||
api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
|
||||
'video=%s' % video_id)
|
||||
data_json = self._download_webpage(api_url, display_id)
|
||||
data = json.loads(data_json)['api']['results'][0]
|
||||
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
|
||||
pload_date = dt.strftime('%Y%m%d')
|
||||
|
||||
duration = None
|
||||
thumbnails = []
|
||||
formats = []
|
||||
@@ -58,16 +55,13 @@ class BlinkxIE(InfoExtractor):
|
||||
duration = m['d']
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen(u'Youtube video detected: %s' % yt_id)
|
||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
||||
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
||||
format_id = (u'%s-%sk-%s' %
|
||||
(vcodec,
|
||||
tbr,
|
||||
m['w']))
|
||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
@@ -88,7 +82,7 @@ class BlinkxIE(InfoExtractor):
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'uploader': data['channel_name'],
|
||||
'upload_date': pload_date,
|
||||
'timestamp': data['pubdate_epoch'],
|
||||
'description': data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
|
@@ -1,102 +1,124 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
compat_urlparse,
|
||||
clean_html,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class BlipTVIE(SubtitlesInfoExtractor):
|
||||
"""Information extractor for blip.tv"""
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
|
||||
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'info_dict': {
|
||||
'id': '5779306',
|
||||
'ext': 'mov',
|
||||
'upload_date': '20111205',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'uploader': 'Comic Book Resources - CBR TV',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'info_dict': {
|
||||
'id': '5779306',
|
||||
'ext': 'mov',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'timestamp': 1323138843,
|
||||
'upload_date': '20111206',
|
||||
'uploader': 'cbr',
|
||||
'uploader_id': '679425',
|
||||
'duration': 81,
|
||||
}
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/pull/2274
|
||||
'note': 'Video with subtitles',
|
||||
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||
'info_dict': {
|
||||
'id': '6586561',
|
||||
'ext': 'mp4',
|
||||
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||
'description': 'One-Zero-One',
|
||||
'timestamp': 1371261608,
|
||||
'upload_date': '20130615',
|
||||
'uploader': 'redvsblue',
|
||||
'uploader_id': '792887',
|
||||
'duration': 279,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# https://github.com/rg3/youtube-dl/pull/2274
|
||||
'note': 'Video with subtitles',
|
||||
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||
'info_dict': {
|
||||
'id': '6586561',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Red vs. Blue',
|
||||
'description': 'One-Zero-One',
|
||||
'upload_date': '20130614',
|
||||
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||
}
|
||||
}]
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
lookup_id = mobj.group('lookup_id')
|
||||
|
||||
# See https://github.com/rg3/youtube-dl/issues/857
|
||||
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||
if embed_mobj:
|
||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||
video_id = self._search_regex(
|
||||
r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||
|
||||
cchar = '&' if '?' in url else '?'
|
||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||
request = compat_urllib_request.Request(json_url)
|
||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||
|
||||
json_data = self._download_json(request, video_id=presumptive_id)
|
||||
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
if lookup_id:
|
||||
info_page = self._download_webpage(
|
||||
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
video_id = self._search_regex(r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||
else:
|
||||
data = json_data
|
||||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||
|
||||
def blip(s):
|
||||
return '{http://blip.tv/dtd/blip/1.0}%s' % s
|
||||
|
||||
def media(s):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % s
|
||||
|
||||
def itunes(s):
|
||||
return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
|
||||
|
||||
item = rss.find('channel/item')
|
||||
|
||||
video_id = item.find(blip('item_id')).text
|
||||
title = item.find('./title').text
|
||||
description = clean_html(compat_str(item.find(blip('puredescription')).text))
|
||||
timestamp = parse_iso8601(item.find(blip('datestamp')).text)
|
||||
uploader = item.find(blip('user')).text
|
||||
uploader_id = item.find(blip('userid')).text
|
||||
duration = int(item.find(blip('runtime')).text)
|
||||
media_thumbnail = item.find(media('thumbnail'))
|
||||
thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
|
||||
categories = [category.text for category in item.findall('category')]
|
||||
|
||||
video_id = compat_str(data['item_id'])
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
subtitles = {}
|
||||
formats = []
|
||||
if 'additionalMedia' in data:
|
||||
for f in data['additionalMedia']:
|
||||
if f.get('file_type_srt') == 1:
|
||||
LANGS = {
|
||||
'english': 'en',
|
||||
}
|
||||
lang = f['role'].rpartition('-')[-1].strip().lower()
|
||||
langcode = LANGS.get(lang, lang)
|
||||
subtitles[langcode] = f['url']
|
||||
continue
|
||||
if not int(f['media_width']): # filter m3u8
|
||||
continue
|
||||
subtitles = {}
|
||||
|
||||
media_group = item.find(media('group'))
|
||||
for media_content in media_group.findall(media('content')):
|
||||
url = media_content.get('url')
|
||||
role = media_content.get(blip('role'))
|
||||
msg = self._download_webpage(
|
||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||
video_id, 'Resolving URL for %s' % role)
|
||||
real_url = compat_urlparse.parse_qs(msg)['message'][0]
|
||||
|
||||
media_type = media_content.get('type')
|
||||
if media_type == 'text/srt' or url.endswith('.srt'):
|
||||
LANGS = {
|
||||
'english': 'en',
|
||||
}
|
||||
lang = role.rpartition('-')[-1].strip().lower()
|
||||
langcode = LANGS.get(lang, lang)
|
||||
subtitles[langcode] = url
|
||||
elif media_type.startswith('video/'):
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': f['role'],
|
||||
'width': int(f['media_width']),
|
||||
'height': int(f['media_height']),
|
||||
'url': real_url,
|
||||
'format_id': role,
|
||||
'format_note': media_type,
|
||||
'vcodec': media_content.get(blip('vcodec')),
|
||||
'acodec': media_content.get(blip('acodec')),
|
||||
'filesize': media_content.get('filesize'),
|
||||
'width': int(media_content.get('width')),
|
||||
'height': int(media_content.get('height')),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': data['media']['url'],
|
||||
'width': int(data['media']['width']),
|
||||
'height': int(data['media']['height']),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# subtitles
|
||||
@@ -107,12 +129,14 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'subtitles': video_subtitles,
|
||||
}
|
||||
|
@@ -17,15 +17,13 @@ class BRIE(InfoExtractor):
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html',
|
||||
'md5': 'c4f83cf0f023ba5875aba0bf46860df2',
|
||||
'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html',
|
||||
'md5': '93556dd2bcb2948d9259f8670c516d59',
|
||||
'info_dict': {
|
||||
'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532',
|
||||
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
|
||||
'ext': 'mp4',
|
||||
'title': 'Feiern und Verzichten',
|
||||
'description': 'Anselm Grün: Feiern und Verzichten',
|
||||
'uploader': 'BR/Birgit Baier',
|
||||
'upload_date': '20140301',
|
||||
'title': 'Am 1. und 2. August in Oberammergau',
|
||||
'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_parse_qs,
|
||||
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
unescapeHTML,
|
||||
@@ -29,10 +30,11 @@ class BrightcoveIE(InfoExtractor):
|
||||
{
|
||||
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
|
||||
'file': '2371591881001.mp4',
|
||||
'md5': '5423e113865d26e40624dce2e4b45d95',
|
||||
'note': 'Test Brightcove downloads and detection in GenericIE',
|
||||
'info_dict': {
|
||||
'id': '2371591881001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||
'uploader': '8TV',
|
||||
'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
|
||||
@@ -41,8 +43,9 @@ class BrightcoveIE(InfoExtractor):
|
||||
{
|
||||
# From http://medianetwork.oracle.com/video/player/1785452137001
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
|
||||
'file': '1785452137001.flv',
|
||||
'info_dict': {
|
||||
'id': '1785452137001',
|
||||
'ext': 'flv',
|
||||
'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
|
||||
'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
|
||||
'uploader': 'Oracle',
|
||||
@@ -70,7 +73,20 @@ class BrightcoveIE(InfoExtractor):
|
||||
'description': 'md5:363109c02998fee92ec02211bd8000df',
|
||||
'uploader': 'National Ballet of Canada',
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
# test flv videos served by akamaihd.net
|
||||
# From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
|
||||
'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
|
||||
# The md5 checksum changes on each download
|
||||
'info_dict': {
|
||||
'id': '2996102916001',
|
||||
'ext': 'flv',
|
||||
'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||
'uploader': 'Red Bull TV',
|
||||
'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
@@ -187,7 +203,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
|
||||
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
||||
info = json.loads(info)['data']
|
||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
||||
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
|
||||
@@ -219,12 +235,26 @@ class BrightcoveIE(InfoExtractor):
|
||||
|
||||
renditions = video_info.get('renditions')
|
||||
if renditions:
|
||||
renditions = sorted(renditions, key=lambda r: r['size'])
|
||||
info['formats'] = [{
|
||||
'url': rend['defaultURL'],
|
||||
'height': rend.get('frameHeight'),
|
||||
'width': rend.get('frameWidth'),
|
||||
} for rend in renditions]
|
||||
formats = []
|
||||
for rend in renditions:
|
||||
url = rend['defaultURL']
|
||||
if rend['remote']:
|
||||
# This type of renditions are served through akamaihd.net,
|
||||
# but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
else:
|
||||
ext = determine_ext(url)
|
||||
size = rend.get('size')
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'height': rend.get('frameHeight'),
|
||||
'width': rend.get('frameWidth'),
|
||||
'filesize': size if size != 0 else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
elif video_info.get('FLVFullLengthURL') is not None:
|
||||
info.update({
|
||||
'url': video_info['FLVFullLengthURL'],
|
||||
|
@@ -4,17 +4,20 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||
IE_NAME = 'canalplus.fr'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
||||
'md5': '60c29434a416a83c15dae2587d47027d',
|
||||
'md5': '3db39fb48b9685438ecf33a1078023e4',
|
||||
'info_dict': {
|
||||
'id': '922470',
|
||||
'ext': 'flv',
|
||||
@@ -26,10 +29,13 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.groupdict().get('id')
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = url_basename(mobj.group('path'))
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, mobj.group('path'))
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id')
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||
@@ -53,6 +59,7 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
||||
infos.find('TITRAGE/SOUS_TITRE').text),
|
||||
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
||||
|
@@ -1,10 +1,12 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -13,9 +15,10 @@ class CinemassacreIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'file': '19911.mp4',
|
||||
'md5': '782f8504ca95a0eba8fc9177c373eec7',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'id': '19911',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
@@ -23,9 +26,10 @@ class CinemassacreIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'file': '521be8ef82b16.mp4',
|
||||
'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'id': '521be8ef82b16',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
@@ -50,29 +54,40 @@ class CinemassacreIE(InfoExtractor):
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
|
||||
video_thumbnail = self._search_regex(
|
||||
r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
||||
sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||
videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
|
||||
|
||||
sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||
hd_url = self._html_search_regex(
|
||||
r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
|
||||
default=None)
|
||||
video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
|
||||
formats = [{
|
||||
'url': sd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'sd',
|
||||
'format_id': 'sd',
|
||||
'quality': 1,
|
||||
}]
|
||||
if hd_url:
|
||||
formats.append({
|
||||
'url': hd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'hd',
|
||||
'format_id': 'hd',
|
||||
'quality': 2,
|
||||
})
|
||||
formats = []
|
||||
baseurl = sd_url[:sd_url.rfind('/')+1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'))
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate // 1000 if bitrate else None,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate // 1000 if bitrate else None,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@@ -1,19 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
from .mtv import MTVIE
|
||||
|
||||
|
||||
class CMTIE(MTVIE):
|
||||
IE_NAME = u'cmt.com'
|
||||
IE_NAME = 'cmt.com'
|
||||
_VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml'
|
||||
_FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||
u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2',
|
||||
u'info_dict': {
|
||||
u'id': u'989124',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
u'description': u'Blame It All On My Roots',
|
||||
},
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061',
|
||||
'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2',
|
||||
'info_dict': {
|
||||
'id': '989124',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"',
|
||||
'description': 'Blame It All On My Roots',
|
||||
},
|
||||
]
|
||||
}]
|
||||
|
@@ -79,8 +79,11 @@ class CNNIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
|
||||
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
|
||||
thumbnails = [{
|
||||
'height': int(t.attrib['height']),
|
||||
'width': int(t.attrib['width']),
|
||||
'url': t.text,
|
||||
} for t in info.findall('images/image')]
|
||||
|
||||
metas_el = info.find('metas')
|
||||
upload_date = (
|
||||
@@ -93,8 +96,7 @@ class CNNIE(InfoExtractor):
|
||||
'id': info.attrib['id'],
|
||||
'title': info.find('headline').text,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnails[-1][1],
|
||||
'thumbnails': thumbs_dict,
|
||||
'thumbnails': thumbnails,
|
||||
'description': info.find('description').text,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
|
@@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid redirected URL: ' + url)
|
||||
if mobj.group('episode') == '':
|
||||
raise ExtractorError('Redirected URL is still not specific: ' + url)
|
||||
epTitle = mobj.group('episode').rpartition('/')[-1]
|
||||
epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
|
||||
|
||||
mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
|
||||
if len(mMovieParams) == 0:
|
||||
@@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url,
|
||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
|
@@ -1,11 +1,12 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import netrc
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import sys
|
||||
import netrc
|
||||
import time
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..utils import (
|
||||
@@ -92,8 +93,12 @@ class InfoExtractor(object):
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
and display_id "dancing-naked-mole-rats"
|
||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||
"url") for the varying thumbnails
|
||||
thumbnails: A list of dictionaries, with the following entries:
|
||||
* "url"
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
deprecated)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
uploader: Full name of the video uploader.
|
||||
@@ -113,6 +118,8 @@ class InfoExtractor(object):
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@@ -242,10 +249,11 @@ class InfoExtractor(object):
|
||||
url = url_or_request.get_full_url()
|
||||
except AttributeError:
|
||||
url = url_or_request
|
||||
if len(url) > 200:
|
||||
h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
|
||||
url = url[:200 - len(h)] + h
|
||||
raw_filename = ('%s_%s.dump' % (video_id, url))
|
||||
basen = '%s_%s' % (video_id, url)
|
||||
if len(basen) > 240:
|
||||
h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||
basen = basen[:240 - len(h)] + h
|
||||
raw_filename = basen + '.dump'
|
||||
filename = sanitize_filename(raw_filename, restricted=True)
|
||||
self.to_screen(u'Saving request to ' + filename)
|
||||
with open(filename, 'wb') as outf:
|
||||
@@ -452,6 +460,9 @@ class InfoExtractor(object):
|
||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||
return self._html_search_regex(regexes, html, name, **kargs)
|
||||
|
||||
def _og_search_url(self, html, **kargs):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
@@ -548,6 +559,30 @@ class InfoExtractor(object):
|
||||
)
|
||||
formats.sort(key=_formats_key)
|
||||
|
||||
def http_scheme(self):
|
||||
""" Either "https:" or "https:", depending on the user's preferences """
|
||||
return (
|
||||
'http:'
|
||||
if self._downloader.params.get('prefer_insecure', False)
|
||||
else 'https:')
|
||||
|
||||
def _proto_relative_url(self, url, scheme=None):
|
||||
if url is None:
|
||||
return url
|
||||
if url.startswith('//'):
|
||||
if scheme is None:
|
||||
scheme = self.http_scheme()
|
||||
return scheme + url
|
||||
else:
|
||||
return url
|
||||
|
||||
def _sleep(self, timeout, video_id, msg_template=None):
|
||||
if msg_template is None:
|
||||
msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds'
|
||||
msg = msg_template % {'video_id': video_id, 'timeout': timeout}
|
||||
self.to_screen(msg)
|
||||
time.sleep(timeout)
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
|
@@ -150,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': info['owner_screenname'],
|
||||
'uploader': info['owner.screenname'],
|
||||
'upload_date': video_upload_date,
|
||||
'title': self._og_search_title(webpage),
|
||||
'subtitles': video_subtitles,
|
||||
|
@@ -7,9 +7,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class DiscoveryIE(InfoExtractor):
|
||||
_VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||
_VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
|
||||
_TEST = {
|
||||
'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
|
||||
'md5': 'e12614f9ee303a6ccef415cb0793eba2',
|
||||
'info_dict': {
|
||||
'id': '614784',
|
||||
|
@@ -1,39 +1,37 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
|
||||
u'file': u'36983.mp4',
|
||||
u'md5': u'9dcfe344732808dbfcc901537973c922',
|
||||
u'info_dict': {
|
||||
u"title": u"Kaffeeland Schweiz",
|
||||
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
|
||||
u"uploader": u"3sat",
|
||||
u"upload_date": u"20130622"
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
|
||||
'md5': '9dcfe344732808dbfcc901537973c922',
|
||||
'info_dict': {
|
||||
'id': '36983',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kaffeeland Schweiz',
|
||||
'description': 'md5:cc4424b18b75ae9948b13929a0814033',
|
||||
'uploader': '3sat',
|
||||
'upload_date': '20130622'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
|
||||
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': te.attrib['key'].partition('x')[0],
|
||||
'height': te.attrib['key'].partition('x')[2],
|
||||
'width': int(te.attrib['key'].partition('x')[0]),
|
||||
'height': int(te.attrib['key'].partition('x')[2]),
|
||||
'url': te.text,
|
||||
} for te in thumbnail_els]
|
||||
|
||||
|
91
youtube_dl/extractor/drtv.py
Normal file
91
youtube_dl/extractor/drtv.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import ExtractorError
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class DRTVIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
|
||||
'md5': '4a7e1dd65cdb2643500a3f753c942f25',
|
||||
'info_dict': {
|
||||
'id': 'partiets-mand-7-8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partiets mand (7:8)',
|
||||
'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
|
||||
'timestamp': 1403047940,
|
||||
'upload_date': '20140617',
|
||||
'duration': 1299.040,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
programcard = self._download_json(
|
||||
'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
data = programcard['Data'][0]
|
||||
|
||||
title = data['Title']
|
||||
description = data['Description']
|
||||
timestamp = parse_iso8601(data['CreatedTime'][:-5])
|
||||
|
||||
thumbnail = None
|
||||
duration = None
|
||||
|
||||
restricted_to_denmark = False
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for asset in data['Assets']:
|
||||
if asset['Kind'] == 'Image':
|
||||
thumbnail = asset['Uri']
|
||||
elif asset['Kind'] == 'VideoResource':
|
||||
duration = asset['DurationInMilliseconds'] / 1000.0
|
||||
restricted_to_denmark = asset['RestrictedToDenmark']
|
||||
for link in asset['Links']:
|
||||
target = link['Target']
|
||||
uri = link['Uri']
|
||||
formats.append({
|
||||
'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
|
||||
'format_id': target,
|
||||
'ext': link['FileFormat'],
|
||||
'preference': -1 if target == 'HDS' else -2,
|
||||
})
|
||||
subtitles_list = asset.get('SubtitlesList')
|
||||
if isinstance(subtitles_list, list):
|
||||
LANGS = {
|
||||
'Danish': 'dk',
|
||||
}
|
||||
for subs in subtitles_list:
|
||||
lang = subs['Language']
|
||||
subtitles[LANGS.get(lang, lang)] = subs['Uri']
|
||||
|
||||
if not formats and restricted_to_denmark:
|
||||
raise ExtractorError(
|
||||
'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, subtitles)
|
||||
return
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles),
|
||||
}
|
54
youtube_dl/extractor/empflix.py
Normal file
54
youtube_dl/extractor/empflix.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EmpflixIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
|
||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
|
||||
'info_dict': {
|
||||
'id': '33051',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amateur Finger Fuck',
|
||||
'description': 'Amateur solo finger fucking.',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
|
||||
|
||||
cfg_url = self._html_search_regex(
|
||||
r'flashvars\.config = escape\("([^"]+)"',
|
||||
webpage, 'flashvars.config')
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, video_id, note='Downloading metadata')
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': item.find('videoLink').text,
|
||||
'format_id': item.find('res').text,
|
||||
} for item in cfg_xml.findall('./quality/item')
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
@@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title')
|
||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||
uploader = self._html_search_regex(
|
||||
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
|
||||
fatal=False)
|
||||
|
63
youtube_dl/extractor/fc2.py
Normal file
63
youtube_dl/extractor/fc2.py
Normal file
@@ -0,0 +1,63 @@
|
||||
#! -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_TEST = {
|
||||
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
|
||||
'md5': 'a6ebe8ebe0396518689d963774a54eb7',
|
||||
'info_dict': {
|
||||
'id': '20121103kUan1KHs',
|
||||
'ext': 'flv',
|
||||
'title': 'Boxing again with Puff',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/')
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
info = compat_urlparse.parse_qs(info_webpage)
|
||||
|
||||
if 'err_code' in info:
|
||||
raise ExtractorError('Error code: %s' % info['err_code'][0])
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
title_info = info.get('title')
|
||||
if title_info:
|
||||
title = title_info[0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -15,6 +15,7 @@ class FirstpostIE(InfoExtractor):
|
||||
'id': '1025403',
|
||||
'ext': 'mp4',
|
||||
'title': 'India to launch indigenous aircraft carrier INS Vikrant today',
|
||||
'description': 'md5:feef3041cb09724e0bdc02843348f5f4',
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,13 +23,16 @@ class FirstpostIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta('twitter:title', page, 'title')
|
||||
description = self._html_search_meta('twitter:description', page, 'title')
|
||||
|
||||
data = self._download_xml(
|
||||
'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id,
|
||||
'Downloading video XML')
|
||||
|
||||
item = data.find('./playlist/item')
|
||||
thumbnail = item.find('./image').text
|
||||
title = item.find('./title').text
|
||||
|
||||
formats = [
|
||||
{
|
||||
@@ -42,6 +46,7 @@ class FirstpostIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -48,24 +48,36 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
'file': '84981923.mp4',
|
||||
'info_dict': {
|
||||
'id': '84981923',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soir 3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
'info_dict': {
|
||||
'id': 'EV_20019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Débat des candidats à la Commission européenne',
|
||||
'description': 'Débat des candidats à la Commission européenne',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
|
||||
video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
|
@@ -4,22 +4,32 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
'file': '0732f586d7.mp4',
|
||||
'md5': 'f647e9e90064b53b6e046e75d0241fbd',
|
||||
'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9',
|
||||
'info_dict': {
|
||||
'description': ('Lyrics changed to match the video. Spoken cameo '
|
||||
'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a '
|
||||
'concept by Dustin McLean (DustFilms.com). Performed, edited, '
|
||||
'and written by David A. Scott.'),
|
||||
'id': '0732f586d7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heart-Shaped Box: Literal Video Version',
|
||||
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||
'md5': 'ff4d83318f89776ed0250634cfaa8d36',
|
||||
'info_dict': {
|
||||
'id': 'e402820827',
|
||||
'ext': 'mp4',
|
||||
'title': 'Please Use This Song (Jon Lajoie)',
|
||||
'description': 'md5:2ed27d364f5a805a6dba199faaf6681d',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -27,27 +37,34 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||
webpage, 'video URL', flags=re.DOTALL)
|
||||
links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
|
||||
if not links:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
if mobj.group('type') == 'embed':
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)
|
||||
title = post['name']
|
||||
description = post.get('description')
|
||||
thumbnail = post.get('picture')
|
||||
else:
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = None
|
||||
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
|
||||
|
||||
bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
|
||||
bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
|
||||
bitrates.sort()
|
||||
|
||||
formats = []
|
||||
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
'url': '%s%d.%s' % (link[0], bitrate, link[1]),
|
||||
'format_id': '%s-%d' % (link[1], bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'title': post['name'],
|
||||
'description': post.get('description'),
|
||||
'thumbnail': post.get('picture'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ class GamekingsIE(InfoExtractor):
|
||||
'id': '20130811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
|
||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
||||
}
|
||||
}
|
||||
|
||||
|
90
youtube_dl/extractor/gameone.py
Normal file
90
youtube_dl/extractor/gameone.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_with_ns,
|
||||
parse_iso8601
|
||||
)
|
||||
|
||||
NAMESPACE_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
# URL prefix to download the mp4 files directly instead of streaming via rtmp
|
||||
# Credits go to XBox-Maniac
|
||||
# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
|
||||
RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
|
||||
|
||||
|
||||
class GameOneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.gameone.de/tv/288',
|
||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
||||
'info_dict': {
|
||||
'id': '288',
|
||||
'ext': 'mp4',
|
||||
'title': 'Game One - Folge 288',
|
||||
'duration': 1238,
|
||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
||||
'age_limit': 16,
|
||||
'upload_date': '20140513',
|
||||
'timestamp': 1399980122,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage, secure=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
age_limit = int(
|
||||
self._search_regex(
|
||||
r'age=(\d+)',
|
||||
self._html_search_meta(
|
||||
'age-de-meta-label',
|
||||
webpage),
|
||||
'age_limit',
|
||||
'0'))
|
||||
mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
|
||||
|
||||
mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
|
||||
title = mrss.find('.//item/title').text
|
||||
thumbnail = mrss.find('.//item/image').get('url')
|
||||
timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
|
||||
content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
|
||||
content_url = content.get('url')
|
||||
|
||||
content = self._download_xml(
|
||||
content_url,
|
||||
video_id,
|
||||
'Downloading media:content')
|
||||
rendition_items = content.findall('.//rendition')
|
||||
duration = int(rendition_items[0].get('duration'))
|
||||
formats = [
|
||||
{
|
||||
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
||||
'width': int(r.get('width')),
|
||||
'height': int(r.get('height')),
|
||||
'tbr': int(r.get('bitrate')),
|
||||
}
|
||||
for r in rendition_items
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'age_limit': age_limit,
|
||||
'timestamp': timestamp,
|
||||
}
|
@@ -15,11 +15,12 @@ from ..utils import (
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
|
||||
_TEST = {
|
||||
"url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
||||
"file": "gs-2300-6410818.mp4",
|
||||
"md5": "b2a30deaa8654fcccd43713a6b6a4825",
|
||||
"info_dict": {
|
||||
"title": "Arma 3 - Community Guide: SITREP I",
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||
'info_dict': {
|
||||
'id': 'gs-2300-6410818',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arma 3 - Community Guide: SITREP I',
|
||||
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
||||
}
|
||||
}
|
||||
|
@@ -260,7 +260,35 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Spi0n',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}
|
||||
},
|
||||
# YouTube embed
|
||||
{
|
||||
'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
|
||||
'info_dict': {
|
||||
'id': 'FXRb4ykk4S0',
|
||||
'ext': 'mp4',
|
||||
'title': 'The NBL Auction 2014',
|
||||
'uploader': 'BADMINTON England',
|
||||
'uploader_id': 'BADMINTONEvents',
|
||||
'upload_date': '20140603',
|
||||
'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# MTVSercices embed
|
||||
{
|
||||
'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
|
||||
'md5': '35727f82f58c76d996fc188f9755b0d5',
|
||||
'info_dict': {
|
||||
'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Review',
|
||||
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -348,17 +376,14 @@ class GenericIE(InfoExtractor):
|
||||
if url.startswith('//'):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': (
|
||||
'http:'
|
||||
if self._downloader.params.get('prefer_insecure', False)
|
||||
else 'https:') + url,
|
||||
'url': self.http_scheme() + url,
|
||||
}
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
default_search = self._downloader.params.get('default_search')
|
||||
if default_search is None:
|
||||
default_search = 'auto_warning'
|
||||
default_search = 'error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning'):
|
||||
if '/' in url:
|
||||
@@ -366,9 +391,19 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result('http://' + url)
|
||||
else:
|
||||
if default_search == 'auto_warning':
|
||||
self._downloader.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
|
||||
if re.match(r'^(?:url|URL)$', url):
|
||||
raise ExtractorError(
|
||||
'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
|
||||
expected=True)
|
||||
else:
|
||||
self._downloader.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
|
||||
return self.url_result('ytsearch:' + url)
|
||||
elif default_search == 'error':
|
||||
raise ExtractorError(
|
||||
('%r is not a valid URL. '
|
||||
'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
|
||||
) % (url, url), expected=True)
|
||||
else:
|
||||
assert ':' in default_search
|
||||
return self.url_result(default_search + url)
|
||||
@@ -476,8 +511,13 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:<iframe[^>]+?src=|embedSWF\(\s*)
|
||||
(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*
|
||||
)
|
||||
(["\'])
|
||||
(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?:embed|v)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
@@ -563,7 +603,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded NovaMov-based player
|
||||
mobj = re.search(
|
||||
r'''(?x)<iframe[^>]+?src=(["\'])
|
||||
r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
|
||||
(?P<url>http://(?:(?:embed|www)\.)?
|
||||
(?:novamov\.com|
|
||||
nowvideo\.(?:ch|sx|eu|at|ag|co)|
|
||||
@@ -585,6 +625,11 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'VK')
|
||||
|
||||
# Look for embedded ivi player
|
||||
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Ivi')
|
||||
|
||||
# Look for embedded Huffington Post player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
||||
@@ -636,6 +681,30 @@ class GenericIE(InfoExtractor):
|
||||
if smotri_url:
|
||||
return self.url_result(smotri_url, 'Smotri')
|
||||
|
||||
# Look for embeded soundcloud player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url)
|
||||
|
||||
# Look for embedded vulture.com player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='Vulture')
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url, ie='MTVServicesEmbedded')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
@@ -667,7 +736,7 @@ class GenericIE(InfoExtractor):
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
|
||||
if not found:
|
||||
found = re.findall(
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
||||
webpage)
|
||||
@@ -702,7 +771,7 @@ class GenericIE(InfoExtractor):
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[1]
|
||||
return entries[0]
|
||||
else:
|
||||
for num, e in enumerate(entries, start=1):
|
||||
e['title'] = '%s (%d)' % (e['title'], num)
|
||||
|
@@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor):
|
||||
|
||||
# Extract title
|
||||
# Get the first line for title
|
||||
video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
|
||||
webpage, 'title', default='NA')
|
||||
video_title = self._og_search_description(webpage).splitlines()[0]
|
||||
|
||||
# Step 2, Simulate clicking the image box to launch video
|
||||
DOMAIN = 'https://plus.google.com/'
|
||||
|
87
youtube_dl/extractor/gorillavid.py
Normal file
87
youtube_dl/extractor/gorillavid.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class GorillaVidIE(InfoExtractor):
|
||||
IE_DESC = 'GorillaVid.in and daclips.in'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?
|
||||
(?:daclips\.in|gorillavid\.in)/
|
||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://gorillavid.in/06y9juieqpmi',
|
||||
'md5': '5ae4a3580620380619678ee4875893ba',
|
||||
'info_dict': {
|
||||
'id': '06y9juieqpmi',
|
||||
'ext': 'flv',
|
||||
'title': 'Rebecca Black My Moment Official Music Video Reaction',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
|
||||
'md5': 'c9e293ca74d46cad638e199c3f3fe604',
|
||||
'info_dict': {
|
||||
'id': 'z08zf8le23c6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Say something nice',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://daclips.in/3rso4kdn6f9m',
|
||||
'info_dict': {
|
||||
'id': '3rso4kdn6f9m',
|
||||
'ext': 'mp4',
|
||||
'title': 'Micro Pig piglets ready on 16th July 2009',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
post = compat_urllib_parse.urlencode(fields)
|
||||
|
||||
req = compat_urllib_request.Request(url, post)
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||
|
||||
title = self._search_regex(r'style="z-index: [0-9]+;">([0-9a-zA-Z ]+)(?:-.+)?</span>', webpage, 'title')
|
||||
thumbnail = self._search_regex(r'image:\'(http[^\']+)\',', webpage, 'thumbnail')
|
||||
url = self._search_regex(r'file: \'(http[^\']+)\',', webpage, 'file url')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
'ext': determine_ext(url),
|
||||
'quality': 1,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
73
youtube_dl/extractor/goshgay.py
Normal file
73
youtube_dl/extractor/goshgay.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
str_to_int,
|
||||
ExtractorError,
|
||||
)
|
||||
import json
|
||||
|
||||
|
||||
class GoshgayIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)'
|
||||
_TEST = {
|
||||
'url': 'http://www.goshgay.com/video4116282',
|
||||
'md5': '268b9f3c3229105c57859e166dd72b03',
|
||||
'info_dict': {
|
||||
'id': '4116282',
|
||||
'ext': 'flv',
|
||||
'title': 'md5:089833a4790b5e103285a07337f245bf',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title')
|
||||
|
||||
player_config = self._search_regex(
|
||||
r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings')
|
||||
player_vars = json.loads(player_config.replace("'", '"'))
|
||||
width = str_to_int(player_vars.get('width'))
|
||||
height = str_to_int(player_vars.get('height'))
|
||||
config_uri = player_vars.get('config')
|
||||
|
||||
if config_uri is None:
|
||||
raise ExtractorError('Missing config URI')
|
||||
node = self._download_xml(config_uri, video_id, 'Downloading player config XML',
|
||||
errnote='Unable to download XML')
|
||||
if node is None:
|
||||
raise ExtractorError('Missing config XML')
|
||||
if node.tag != 'config':
|
||||
raise ExtractorError('Missing config attribute')
|
||||
fns = node.findall('file')
|
||||
imgs = node.findall('image')
|
||||
if len(fns) != 1:
|
||||
raise ExtractorError('Missing media URI')
|
||||
video_url = fns[0].text
|
||||
if len(imgs) < 1:
|
||||
thumbnail = None
|
||||
else:
|
||||
thumbnail = imgs[0].text
|
||||
|
||||
url_comp = compat_urlparse.urlparse(url)
|
||||
ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'thumbnail': thumbnail,
|
||||
'http_referer': ref,
|
||||
'age_limit': 18,
|
||||
}
|
42
youtube_dl/extractor/hentaistigma.py
Normal file
42
youtube_dl/extractor/hentaistigma.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HentaiStigmaIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
|
||||
'md5': '4e3d07422a68a4cc363d8f57c8bf0d23',
|
||||
'info_dict': {
|
||||
'id': 'inyouchuu-etsu-bonus',
|
||||
'ext': 'mp4',
|
||||
"title": "Inyouchuu Etsu Bonus",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
wrap_url = self._html_search_regex(
|
||||
r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
}
|
@@ -1,10 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
@@ -13,59 +14,55 @@ from ..utils import (
|
||||
|
||||
|
||||
class HypemIE(InfoExtractor):
|
||||
"""Information Extractor for hypem"""
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
|
||||
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
|
||||
_TEST = {
|
||||
u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
u'file': u'1v6ga.mp3',
|
||||
u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
u'info_dict': {
|
||||
u"title": u"Tame"
|
||||
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
|
||||
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
|
||||
'info_dict': {
|
||||
'id': '1v6ga',
|
||||
'ext': 'mp3',
|
||||
'title': 'Tame',
|
||||
'uploader': 'BODYWORK',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
track_id = mobj.group(1)
|
||||
|
||||
data = {'ax': 1, 'ts': time.time()}
|
||||
data_encoded = compat_urllib_parse.urlencode(data)
|
||||
complete_url = url + "?" + data_encoded
|
||||
request = compat_urllib_request.Request(complete_url)
|
||||
response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
|
||||
response, urlh = self._download_webpage_handle(
|
||||
request, track_id, 'Downloading webpage with the url')
|
||||
cookie = urlh.headers.get('Set-Cookie', '')
|
||||
|
||||
self.report_extraction(track_id)
|
||||
|
||||
html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
|
||||
response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
|
||||
html_tracks = self._html_search_regex(
|
||||
r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
|
||||
response, 'tracks')
|
||||
try:
|
||||
track_list = json.loads(html_tracks)
|
||||
track = track_list[u'tracks'][0]
|
||||
track = track_list['tracks'][0]
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
raise ExtractorError('Hypemachine contained invalid JSON.')
|
||||
|
||||
key = track[u"key"]
|
||||
track_id = track[u"id"]
|
||||
artist = track[u"artist"]
|
||||
title = track[u"song"]
|
||||
key = track['key']
|
||||
track_id = track['id']
|
||||
artist = track['artist']
|
||||
title = track['song']
|
||||
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
|
||||
request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
|
||||
serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
|
||||
request = compat_urllib_request.Request(
|
||||
serve_url, '', {'Content-Type': 'application/json'})
|
||||
request.add_header('cookie', cookie)
|
||||
song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
|
||||
try:
|
||||
song_data = json.loads(song_data_json)
|
||||
except ValueError:
|
||||
raise ExtractorError(u'Hypemachine contained invalid JSON.')
|
||||
final_url = song_data[u"url"]
|
||||
song_data = self._download_json(request, track_id, 'Downloading metadata')
|
||||
final_url = song_data["url"]
|
||||
|
||||
return [{
|
||||
'id': track_id,
|
||||
'url': final_url,
|
||||
'ext': "mp3",
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
}]
|
||||
return {
|
||||
'id': track_id,
|
||||
'url': final_url,
|
||||
'ext': 'mp3',
|
||||
'title': title,
|
||||
'uploader': artist,
|
||||
}
|
||||
|
@@ -5,8 +5,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class StatigramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)'
|
||||
class IconosquareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
||||
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
||||
@@ -15,6 +15,7 @@ class StatigramIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'aguynamedpatrick',
|
||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -25,7 +26,7 @@ class StatigramIE(InfoExtractor):
|
||||
html_title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>',
|
||||
webpage, 'title')
|
||||
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
||||
title = re.sub(r'(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)$', '', html_title)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'@([^ ]+)', title, 'uploader name', fatal=False)
|
||||
|
||||
@@ -33,6 +34,7 @@ class StatigramIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id': uploader_id
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
@@ -33,14 +33,14 @@ class IviIE(InfoExtractor):
|
||||
},
|
||||
# Serial's serie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
|
||||
'md5': '221f56b35e3ed815fde2df71032f4b3e',
|
||||
'info_dict': {
|
||||
'id': '74791',
|
||||
'id': '9549',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дежурный ангел - 1 серия',
|
||||
'duration': 2490,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
'title': 'Двое из ларца - Серия 1',
|
||||
'duration': 2655,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
|
35
youtube_dl/extractor/ku6.py
Normal file
35
youtube_dl/extractor/ku6.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Ku6IE(InfoExtractor):
|
||||
_VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
|
||||
_TEST = {
|
||||
'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
|
||||
'md5': '01203549b9efbb45f4b87d55bdea1ed1',
|
||||
'info_dict': {
|
||||
'id': 'JG-8yS14xzBr4bCn1pu0xw',
|
||||
'ext': 'f4v',
|
||||
'title': 'techniques test',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title')
|
||||
dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id
|
||||
jsonData = self._download_json(dataUrl, video_id)
|
||||
downloadUrl = jsonData['data']['f']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': downloadUrl
|
||||
}
|
||||
|
@@ -24,7 +24,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом',
|
||||
'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.',
|
||||
'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'upload_date': '20140130',
|
||||
}
|
||||
}
|
||||
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@@ -6,31 +8,35 @@ from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
xpath_with_ns,
|
||||
compat_str,
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
|
||||
class LivestreamIE(InfoExtractor):
|
||||
IE_NAME = u'livestream'
|
||||
IE_NAME = 'livestream'
|
||||
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
||||
_TEST = {
|
||||
u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
u'file': u'4719370.mp4',
|
||||
u'md5': u'0d2186e3187d185a04b3cdd02b828836',
|
||||
u'info_dict': {
|
||||
u'title': u'Live from Webster Hall NYC',
|
||||
u'upload_date': u'20121012',
|
||||
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
'md5': '53274c76ba7754fb0e8d072716f2292b',
|
||||
'info_dict': {
|
||||
'id': '4719370',
|
||||
'ext': 'mp4',
|
||||
'title': 'Live from Webster Hall NYC',
|
||||
'upload_date': '20121012',
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_video_info(self, video_data):
|
||||
video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
|
||||
return {'id': video_data['id'],
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_data['caption'],
|
||||
'thumbnail': video_data['thumbnail_url'],
|
||||
'upload_date': video_data['updated_at'].replace('-','')[:8],
|
||||
}
|
||||
return {
|
||||
'id': compat_str(video_data['id']),
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': video_data['caption'],
|
||||
'thumbnail': video_data['thumbnail_url'],
|
||||
'upload_date': video_data['updated_at'].replace('-', '')[:8],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -40,43 +46,43 @@ class LivestreamIE(InfoExtractor):
|
||||
|
||||
if video_id is None:
|
||||
# This is an event page:
|
||||
config_json = self._search_regex(r'window.config = ({.*?});',
|
||||
webpage, u'window config')
|
||||
config_json = self._search_regex(
|
||||
r'window.config = ({.*?});', webpage, 'window config')
|
||||
info = json.loads(config_json)['event']
|
||||
videos = [self._extract_video_info(video_data['data'])
|
||||
for video_data in info['feed']['data'] if video_data['type'] == u'video']
|
||||
for video_data in info['feed']['data'] if video_data['type'] == 'video']
|
||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||
else:
|
||||
og_video = self._og_search_video_url(webpage, name=u'player url')
|
||||
og_video = self._og_search_video_url(webpage, 'player url')
|
||||
query_str = compat_urllib_parse_urlparse(og_video).query
|
||||
query = compat_urlparse.parse_qs(query_str)
|
||||
api_url = query['play_url'][0].replace('.smil', '')
|
||||
info = json.loads(self._download_webpage(api_url, video_id,
|
||||
u'Downloading video info'))
|
||||
info = json.loads(self._download_webpage(
|
||||
api_url, video_id, 'Downloading video info'))
|
||||
return self._extract_video_info(info)
|
||||
|
||||
|
||||
# The original version of Livestream uses a different system
|
||||
class LivestreamOriginalIE(InfoExtractor):
|
||||
IE_NAME = u'livestream:original'
|
||||
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)'
|
||||
IE_NAME = 'livestream:original'
|
||||
_VALID_URL = r'''(?x)https?://www\.livestream\.com/
|
||||
(?P<user>[^/]+)/(?P<type>video|folder)
|
||||
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
|
||||
'''
|
||||
_TEST = {
|
||||
u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
u'info_dict': {
|
||||
u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'info_dict': {
|
||||
'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
|
||||
'ext': 'flv',
|
||||
'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital',
|
||||
},
|
||||
u'params': {
|
||||
'params': {
|
||||
# rtmp
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
user = mobj.group('user')
|
||||
def _extract_video(self, user, video_id):
|
||||
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
|
||||
|
||||
info = self._download_xml(api_url, video_id)
|
||||
@@ -84,7 +90,7 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
ns = {'media': 'http://search.yahoo.com/mrss'}
|
||||
thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
|
||||
# Remove the extension and number from the path (like 1.jpg)
|
||||
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path')
|
||||
path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -94,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
||||
def _extract_folder(self, url, folder_id):
|
||||
webpage = self._download_webpage(url, folder_id)
|
||||
urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': folder_id,
|
||||
'entries': [{
|
||||
'_type': 'url',
|
||||
'url': video_url,
|
||||
} for video_url in urls],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
id = mobj.group('id')
|
||||
user = mobj.group('user')
|
||||
url_type = mobj.group('type')
|
||||
if url_type == 'folder':
|
||||
return self._extract_folder(url, id)
|
||||
else:
|
||||
return self._extract_video(user, id)
|
||||
|
||||
|
||||
# The server doesn't support HEAD request, the generic extractor can't detect
|
||||
# the redirection
|
||||
class LivestreamShortenerIE(InfoExtractor):
|
||||
IE_NAME = 'livestream:shortener'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, id)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': self._og_search_url(webpage),
|
||||
}
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@@ -10,28 +9,48 @@ from .common import InfoExtractor
|
||||
class MailRuIE(InfoExtractor):
|
||||
IE_NAME = 'mailru'
|
||||
IE_DESC = 'Видео@Mail.Ru'
|
||||
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
|
||||
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||
'info_dict': {
|
||||
'id': '46301138',
|
||||
'ext': 'mp4',
|
||||
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||
'upload_date': '20140224',
|
||||
'uploader': 'sonypicturesrus',
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||
'info_dict': {
|
||||
'id': '46301138',
|
||||
'ext': 'mp4',
|
||||
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||
'timestamp': 1393232740,
|
||||
'upload_date': '20140224',
|
||||
'uploader': 'sonypicturesrus',
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
|
||||
'md5': '00a91a58c3402204dcced523777b475f',
|
||||
'info_dict': {
|
||||
'id': '46843144',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||
'timestamp': 1397217632,
|
||||
'upload_date': '20140411',
|
||||
'uploader': 'hitech',
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.group('idv1')
|
||||
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
@@ -40,10 +59,11 @@ class MailRuIE(InfoExtractor):
|
||||
movie = video_data['movie']
|
||||
content_id = str(movie['contentId'])
|
||||
title = movie['title']
|
||||
if title.endswith('.mp4'):
|
||||
title = title[:-4]
|
||||
thumbnail = movie['poster']
|
||||
duration = movie['duration']
|
||||
|
||||
upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
|
||||
view_count = video_data['views_count']
|
||||
|
||||
formats = [
|
||||
@@ -57,7 +77,7 @@ class MailRuIE(InfoExtractor):
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': video_data['timestamp'],
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
|
@@ -4,9 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):
|
||||
'uploader': 'Daniel Holbach',
|
||||
'uploader_id': 'dholbach',
|
||||
'upload_date': '20111115',
|
||||
'timestamp': 1321359578,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
|
||||
info = self._download_json(
|
||||
api_url, track_id, 'Downloading cloudcast info')
|
||||
|
||||
preview_url = self._search_regex(
|
||||
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
|
||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||
@@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor):
|
||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||
final_song_url = self._get_url(template_url)
|
||||
if final_song_url is None:
|
||||
raise ExtractorError(u'Unable to extract track url')
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
|
||||
PREFIX = (
|
||||
r'<div class="cloudcast-play-button-container"'
|
||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||
title = self._html_search_regex(
|
||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||
PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
|
||||
fatal=False))
|
||||
uploader = self._html_search_regex(
|
||||
PREFIX + r'm-owner-name="([^"]+)"',
|
||||
webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
||||
webpage, 'like count', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
webpage, 'play count', fatal=False))
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
||||
webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': info['name'],
|
||||
'title': title,
|
||||
'url': final_song_url,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info['pictures'].get('extra_large'),
|
||||
'uploader': info['user']['name'],
|
||||
'uploader_id': info['user']['username'],
|
||||
'upload_date': unified_strdate(info['created_time']),
|
||||
'view_count': info['play_count'],
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
}
|
||||
|
87
youtube_dl/extractor/motherless.py
Normal file
87
youtube_dl/extractor/motherless.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MotherlessIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://motherless.com/AC3FFE1',
|
||||
'md5': '5527fef81d2e529215dad3c2d744a7d9',
|
||||
'info_dict': {
|
||||
'id': 'AC3FFE1',
|
||||
'ext': 'flv',
|
||||
'title': 'Fucked in the ass while playing PS3',
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://motherless.com/532291B',
|
||||
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
|
||||
'info_dict': {
|
||||
'id': '532291B',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amazing girl playing the omegle game, PERFECT!',
|
||||
'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
|
||||
video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url')
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count')
|
||||
|
||||
upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date')
|
||||
if 'Ago' in upload_date:
|
||||
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
||||
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
||||
else:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id')
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage)
|
||||
if categories:
|
||||
categories = [cat.strip() for cat in categories.split(',')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'categories': categories,
|
||||
'view_count': int_or_none(view_count.replace(',', '')),
|
||||
'like_count': int_or_none(like_count.replace(',', '')),
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'url': video_url,
|
||||
}
|
@@ -28,7 +28,7 @@ class MporaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._search_regex(
|
||||
r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json')
|
||||
r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json')
|
||||
|
||||
data = json.loads(data_json)
|
||||
|
||||
|
@@ -22,6 +22,7 @@ def _media_xml_tag(tag):
|
||||
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
_MOBILE_TEMPLATE = None
|
||||
|
||||
@staticmethod
|
||||
def _id_from_uri(uri):
|
||||
return uri.split(':')[-1]
|
||||
@@ -35,6 +36,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
||||
return base + m.group('finalid')
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
return self._FEED_URL
|
||||
|
||||
def _get_thumbnail_url(self, uri, itemdoc):
|
||||
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
|
||||
thumb_node = itemdoc.find(search_path)
|
||||
@@ -80,6 +84,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
})
|
||||
except (KeyError, TypeError):
|
||||
raise ExtractorError('Invalid rendition field.')
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _get_video_info(self, itemdoc):
|
||||
@@ -135,10 +140,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
def _get_videos_info(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
feed_url = self._get_feed_url(uri)
|
||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||
|
||||
idoc = self._download_xml(
|
||||
self._FEED_URL + '?' + data, video_id,
|
||||
feed_url + '?' + data, video_id,
|
||||
'Downloading info', transform_source=fix_xml_ampersands)
|
||||
return [self._get_video_info(item) for item in idoc.findall('.//item')]
|
||||
|
||||
@@ -159,6 +164,37 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
|
||||
class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtvservices:embedded'
|
||||
_VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
|
||||
|
||||
_TEST = {
|
||||
# From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
|
||||
'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
|
||||
'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
|
||||
'info_dict': {
|
||||
'id': '1043906',
|
||||
'ext': 'mp4',
|
||||
'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
|
||||
'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
|
||||
},
|
||||
}
|
||||
|
||||
def _get_feed_url(self, uri):
|
||||
video_id = self._id_from_uri(uri)
|
||||
site_id = uri.replace(video_id, '')
|
||||
config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id)
|
||||
config_doc = self._download_xml(config_url, video_id)
|
||||
feed_node = config_doc.find('.//feed')
|
||||
feed_url = feed_node.text.strip().split('?')[0]
|
||||
return feed_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mgid = mobj.group('mgid')
|
||||
return self._get_videos_info(mgid)
|
||||
|
||||
|
||||
class MTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)^https?://
|
||||
(?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
|
||||
|
@@ -1,4 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -12,12 +14,13 @@ class NaverIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://tvcast.naver.com/v/81652',
|
||||
u'file': u'81652.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||
u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||
u'upload_date': u'20130903',
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'info_dict': {
|
||||
'id': '81652',
|
||||
'ext': 'mp4',
|
||||
'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||
'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||
'upload_date': '20130903',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -28,7 +31,7 @@ class NaverIE(InfoExtractor):
|
||||
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||
webpage)
|
||||
if m_id is None:
|
||||
raise ExtractorError(u'couldn\'t extract vid and key')
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
vid = m_id.group(1)
|
||||
key = m_id.group(2)
|
||||
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
|
||||
@@ -39,22 +42,27 @@ class NaverIE(InfoExtractor):
|
||||
})
|
||||
info = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||
video_id, u'Downloading video info')
|
||||
video_id, 'Downloading video info')
|
||||
urls = self._download_xml(
|
||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||
video_id, u'Downloading video formats info')
|
||||
video_id, 'Downloading video formats info')
|
||||
|
||||
formats = []
|
||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||
domain = format_el.find('Domain').text
|
||||
if domain.startswith('rtmp'):
|
||||
continue
|
||||
formats.append({
|
||||
f = {
|
||||
'url': domain + format_el.find('uri').text,
|
||||
'ext': 'mp4',
|
||||
'width': int(format_el.find('width').text),
|
||||
'height': int(format_el.find('height').text),
|
||||
})
|
||||
}
|
||||
if domain.startswith('rtmp'):
|
||||
f.update({
|
||||
'ext': 'flv',
|
||||
'rtmp_protocol': '1', # rtmpt
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr, compat_str
|
||||
@@ -31,30 +32,68 @@ class NBCIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
|
||||
((video/.+?/(?P<id>\d+))|
|
||||
(feature/[^/]+/(?P<title>.+)))
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
'info_dict': {
|
||||
'id': '52753292',
|
||||
'ext': 'flv',
|
||||
'title': 'Crew emerges after four-month Mars food study',
|
||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
'info_dict': {
|
||||
'id': '52753292',
|
||||
'ext': 'flv',
|
||||
'title': 'Crew emerges after four-month Mars food study',
|
||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
|
||||
'md5': 'b2421750c9f260783721d898f4c42063',
|
||||
'info_dict': {
|
||||
'id': 'I1wpAI_zmhsQ',
|
||||
'ext': 'flv',
|
||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = all_info.find('video')
|
||||
if video_id is not None:
|
||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = all_info.find('video')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': compat_str(info.find('caption').text),
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': compat_str(info.find('caption').text),
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
else:
|
||||
# "feature" pages use theplatform.com
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
bootstrap_json = self._search_regex(
|
||||
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
|
||||
flags=re.MULTILINE)
|
||||
bootstrap = json.loads(bootstrap_json)
|
||||
info = bootstrap['results'][0]['video']
|
||||
playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
|
||||
mpxid = info['mpxId']
|
||||
all_videos = self._download_json(playlist_url, title)['videos']
|
||||
# The response contains additional videos
|
||||
info = next(v for v in all_videos if v['mpxId'] == mpxid)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
# We get the best quality video
|
||||
'url': info['videoAssets'][-1]['publicUrl'],
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class NDRIE(InfoExtractor):
|
||||
@@ -45,17 +49,16 @@ class NDRIE(InfoExtractor):
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
title = self._og_search_title(page)
|
||||
title = self._og_search_title(page).strip()
|
||||
description = self._og_search_description(page)
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
|
||||
page)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||
mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||
if mp3_url:
|
||||
formats.append({
|
||||
'url': mp3_url.group('audio'),
|
||||
@@ -64,13 +67,15 @@ class NDRIE(InfoExtractor):
|
||||
|
||||
thumbnail = None
|
||||
|
||||
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
if video_url:
|
||||
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
|
||||
page, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.ndr.de' + thumbnail
|
||||
for format_id in ['lo', 'hi', 'hq']:
|
||||
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
|
||||
if thumbnails:
|
||||
quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
|
||||
largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
|
||||
thumbnail = 'http://www.ndr.de' + largest[0]
|
||||
|
||||
for format_id in 'lo', 'hi', 'hq':
|
||||
formats.append({
|
||||
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||
'format_id': format_id,
|
||||
|
@@ -1,22 +1,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import month_by_name
|
||||
from ..utils import (
|
||||
month_by_name,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NDTVIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710",
|
||||
u"file": u"300710.mp4",
|
||||
u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88",
|
||||
u"info_dict": {
|
||||
u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
|
||||
u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.",
|
||||
u"upload_date": u"20131208",
|
||||
u"duration": 1327,
|
||||
u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg",
|
||||
'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
|
||||
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
|
||||
'info_dict': {
|
||||
'id': '300710',
|
||||
'ext': 'mp4',
|
||||
'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal",
|
||||
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
|
||||
'upload_date': '20131208',
|
||||
'duration': 1327,
|
||||
'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,13 +33,12 @@ class NDTVIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
filename = self._search_regex(
|
||||
r"__filename='([^']+)'", webpage, u'video filename')
|
||||
video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
|
||||
r"__filename='([^']+)'", webpage, 'video filename')
|
||||
video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
|
||||
filename)
|
||||
|
||||
duration_str = filename = self._search_regex(
|
||||
r"__duration='([^']+)'", webpage, u'duration', fatal=False)
|
||||
duration = None if duration_str is None else int(duration_str)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
|
||||
|
||||
date_m = re.search(r'''(?x)
|
||||
<p\s+class="vod_dateline">\s*
|
||||
@@ -41,7 +46,7 @@ class NDTVIE(InfoExtractor):
|
||||
(?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
|
||||
''', webpage)
|
||||
upload_date = None
|
||||
assert date_m
|
||||
|
||||
if date_m is not None:
|
||||
month = month_by_name(date_m.group('monthname'))
|
||||
if month is not None:
|
||||
@@ -49,14 +54,19 @@ class NDTVIE(InfoExtractor):
|
||||
date_m.group('year'), month, int(date_m.group('day')))
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
READ_MORE = u' (Read more)'
|
||||
READ_MORE = ' (Read more)'
|
||||
if description.endswith(READ_MORE):
|
||||
description = description[:-len(READ_MORE)]
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - NDTV'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': self._og_search_title(webpage),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': duration,
|
||||
|
92
youtube_dl/extractor/newstube.py
Normal file
92
youtube_dl/extractor/newstube.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NewstubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
|
||||
'info_dict': {
|
||||
'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
|
||||
'ext': 'flv',
|
||||
'title': 'Телеканал CNN переместил город Славянск в Крым',
|
||||
'description': 'md5:419a8c9f03442bc0b0a794d689360335',
|
||||
'duration': 31.05,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_guid = self._html_search_regex(
|
||||
r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
page, 'video GUID')
|
||||
|
||||
player = self._download_xml(
|
||||
'http://p.newstube.ru/v2/player.asmx/GetAutoPlayInfo6?state=&url=%s&sessionId=&id=%s&placement=profile&location=n2' % (url, video_guid),
|
||||
video_guid, 'Downloading player XML')
|
||||
|
||||
def ns(s):
|
||||
return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
|
||||
|
||||
error_message = player.find(ns('./ErrorMessage'))
|
||||
if error_message is not None:
|
||||
raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True)
|
||||
|
||||
session_id = player.find(ns('./SessionId')).text
|
||||
media_info = player.find(ns('./Medias/MediaInfo'))
|
||||
title = media_info.find(ns('./Name')).text
|
||||
description = self._og_search_description(page)
|
||||
thumbnail = media_info.find(ns('./KeyFrame')).text
|
||||
duration = int(media_info.find(ns('./Duration')).text) / 1000.0
|
||||
|
||||
formats = []
|
||||
|
||||
for stream_info in media_info.findall(ns('./Streams/StreamInfo')):
|
||||
media_location = stream_info.find(ns('./MediaLocation'))
|
||||
if media_location is None:
|
||||
continue
|
||||
|
||||
server = media_location.find(ns('./Server')).text
|
||||
app = media_location.find(ns('./App')).text
|
||||
media_id = stream_info.find(ns('./Id')).text
|
||||
quality_id = stream_info.find(ns('./QualityId')).text
|
||||
name = stream_info.find(ns('./Name')).text
|
||||
width = int(stream_info.find(ns('./Width')).text)
|
||||
height = int(stream_info.find(ns('./Height')).text)
|
||||
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/%s' % (server, app),
|
||||
'app': app,
|
||||
'play_path': '01/%s' % video_guid.upper(),
|
||||
'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
|
||||
'page_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': quality_id,
|
||||
'format_note': name,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_guid,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -73,14 +73,16 @@ class NFBIE(InfoExtractor):
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
formats = [{
|
||||
'url': x.find('default/streamerURI').text,
|
||||
'app': x.find('default/streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('default/url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': x.get('quality'),
|
||||
} for x in media.findall('assets/asset')]
|
||||
for asset in media.findall('assets/asset'):
|
||||
for x in asset:
|
||||
formats.append({
|
||||
'url': x.find('streamerURI').text,
|
||||
'app': x.find('streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s-%s' % (x.tag, asset.get('quality')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -8,10 +8,9 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,6 +29,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'uploader_id': '2698420',
|
||||
'upload_date': '20131123',
|
||||
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||
'duration': 33,
|
||||
},
|
||||
'params': {
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
@@ -37,17 +37,20 @@ class NiconicoIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
# Determine whether the downloader uses authentication to download video
|
||||
_AUTHENTICATE = False
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
if self._downloader.params.get('username', None) is not None:
|
||||
self._AUTHENTICATE = True
|
||||
|
||||
if self._AUTHENTICATE:
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
# Login is required
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
|
||||
# Log in
|
||||
login_form_strs = {
|
||||
@@ -79,44 +82,66 @@ class NiconicoIE(InfoExtractor):
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||
note='Downloading video info page')
|
||||
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||
video_id, 'Downloading flv info')
|
||||
if self._AUTHENTICATE:
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||
video_id, 'Downloading flv info')
|
||||
else:
|
||||
# Get external player info
|
||||
ext_player_info = self._download_webpage(
|
||||
'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id)
|
||||
thumb_play_key = self._search_regex(
|
||||
r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey')
|
||||
|
||||
# Get flv info
|
||||
flv_info_data = compat_urllib_parse.urlencode({
|
||||
'k': thumb_play_key,
|
||||
'v': video_id
|
||||
})
|
||||
flv_info_request = compat_urllib_request.Request(
|
||||
'http://ext.nicovideo.jp/thumb_watch', flv_info_data,
|
||||
{'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
flv_info_webpage = self._download_webpage(
|
||||
flv_info_request, video_id,
|
||||
note='Downloading flv info', errnote='Unable to download flv info')
|
||||
|
||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||
|
||||
# Start extracting information
|
||||
video_title = video_info.find('.//title').text
|
||||
video_extension = video_info.find('.//movie_type').text
|
||||
video_format = video_extension.upper()
|
||||
video_thumbnail = video_info.find('.//thumbnail_url').text
|
||||
video_description = video_info.find('.//description').text
|
||||
video_uploader_id = video_info.find('.//user_id').text
|
||||
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
|
||||
video_view_count = video_info.find('.//view_counter').text
|
||||
video_webpage_url = video_info.find('.//watch_url').text
|
||||
title = video_info.find('.//title').text
|
||||
extension = video_info.find('.//movie_type').text
|
||||
video_format = extension.upper()
|
||||
thumbnail = video_info.find('.//thumbnail_url').text
|
||||
description = video_info.find('.//description').text
|
||||
upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
|
||||
view_count = int_or_none(video_info.find('.//view_counter').text)
|
||||
comment_count = int_or_none(video_info.find('.//comment_num').text)
|
||||
duration = parse_duration(video_info.find('.//length').text)
|
||||
webpage_url = video_info.find('.//watch_url').text
|
||||
|
||||
# uploader
|
||||
video_uploader = video_uploader_id
|
||||
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||
try:
|
||||
user_info = self._download_xml(
|
||||
url, video_id, note='Downloading user information')
|
||||
video_uploader = user_info.find('.//nickname').text
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err))
|
||||
if video_info.find('.//ch_id') is not None:
|
||||
uploader_id = video_info.find('.//ch_id').text
|
||||
uploader = video_info.find('.//ch_name').text
|
||||
elif video_info.find('.//user_id') is not None:
|
||||
uploader_id = video_info.find('.//user_id').text
|
||||
uploader = video_info.find('.//user_nickname').text
|
||||
else:
|
||||
uploader_id = uploader = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_real_url,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
'title': title,
|
||||
'ext': extension,
|
||||
'format': video_format,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'view_count': video_view_count,
|
||||
'webpage_url': video_webpage_url,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'duration': duration,
|
||||
'webpage_url': webpage_url,
|
||||
}
|
||||
|
@@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
post_view = json.loads(self._html_search_regex(
|
||||
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
|
||||
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view'))
|
||||
|
||||
youtube_id = post_view['videoExternalId']
|
||||
title = post_view['title']
|
||||
|
@@ -26,7 +26,8 @@ class NocoIE(InfoExtractor):
|
||||
'uploader': 'Nolife',
|
||||
'uploader_id': 'NOL',
|
||||
'duration': 2851.2,
|
||||
}
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -34,7 +35,7 @@ class NocoIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
medias = self._download_json(
|
||||
'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -42,7 +43,7 @@ class NocoIE(InfoExtractor):
|
||||
format_id = fmt['quality_key']
|
||||
|
||||
file = self._download_json(
|
||||
'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
|
||||
'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
|
||||
video_id, 'Downloading %s video JSON' % format_id)
|
||||
|
||||
file_url = file['file']
|
||||
@@ -70,7 +71,7 @@ class NocoIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
show = self._download_json(
|
||||
'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
|
||||
'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
|
||||
|
||||
upload_date = unified_strdate(show['indexed'])
|
||||
uploader = show['partner_name']
|
||||
|
@@ -4,9 +4,7 @@ import re
|
||||
|
||||
from .brightcove import BrightcoveIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NownessIE(InfoExtractor):
|
||||
@@ -14,9 +12,10 @@ class NownessIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
||||
'file': '2520295746001.mp4',
|
||||
'md5': '0ece2f70a7bd252c7b00f3070182d418',
|
||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||
'info_dict': {
|
||||
'id': '2520295746001',
|
||||
'ext': 'mp4',
|
||||
'description': 'Candor: The Art of Gesticulation',
|
||||
'uploader': 'Nowness',
|
||||
'title': 'Candor: The Art of Gesticulation',
|
||||
|
62
youtube_dl/extractor/npo.py
Normal file
62
youtube_dl/extractor/npo.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NPOIE(InfoExtractor):
|
||||
IE_NAME = 'npo.nl'
|
||||
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
|
||||
'md5': '4b3f9c429157ec4775f2c9cb7b911016',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1220719',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nieuwsuur',
|
||||
'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
|
||||
'upload_date': '20140622',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
metadata = self._download_json(
|
||||
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
||||
video_id,
|
||||
# We have to remove the javascript callback
|
||||
transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j)
|
||||
)
|
||||
token_page = self._download_webpage(
|
||||
'http://ida.omroep.nl/npoplayer/i.js',
|
||||
video_id,
|
||||
note='Downloading token'
|
||||
)
|
||||
token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token')
|
||||
streams_info = self._download_json(
|
||||
'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token),
|
||||
video_id
|
||||
)
|
||||
|
||||
stream_info = self._download_json(
|
||||
streams_info['streams'][0] + '&type=json',
|
||||
video_id,
|
||||
'Downloading stream info'
|
||||
)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': metadata['titel'],
|
||||
'ext': 'mp4',
|
||||
'url': stream_info['url'],
|
||||
'description': metadata['info'],
|
||||
'thumbnail': metadata['images'][-1]['url'],
|
||||
'upload_date': unified_strdate(metadata['gidsdatum']),
|
||||
}
|
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NRKIE(InfoExtractor):
|
||||
@@ -13,7 +17,7 @@ class NRKIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
|
||||
'md5': '12618eef328c9a35c1b47d5583d9c30d',
|
||||
'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'flv',
|
||||
@@ -23,7 +27,7 @@ class NRKIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
|
||||
'md5': '390b2ce15c0d6aa376ef5059ac9f865e',
|
||||
'md5': '3471f2a51718195164e88f46bf427668',
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
@@ -64,4 +68,77 @@ class NRKIE(InfoExtractor):
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314',
|
||||
'ext': 'flv',
|
||||
'title': '20 spørsmål',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'upload_date': '20140523',
|
||||
'duration': 1741.52,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': 'af01795a31f1cf7265c8657534d8077b',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
'ext': 'flv',
|
||||
'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||
'upload_date': '20140524',
|
||||
'duration': 4605.0,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('title', page, 'title')
|
||||
description = self._html_search_meta('description', page, 'description')
|
||||
thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
|
||||
duration = float_or_none(
|
||||
self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
f4m_url = re.search(r'data-media="([^"]+)"', page)
|
||||
if f4m_url:
|
||||
formats.append({
|
||||
'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
'format_id': 'f4m',
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
|
||||
if m3u8_url:
|
||||
formats.append({
|
||||
'url': m3u8_url.group(1),
|
||||
'format_id': 'm3u8',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML
|
||||
)
|
||||
|
||||
@@ -24,9 +23,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 136,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||
@@ -38,9 +37,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 172,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
@@ -52,9 +51,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 1496,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||
@@ -66,9 +65,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 5592,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||
@@ -80,33 +79,25 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 2590,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||
r'<video restriction[^>]+><key>(\d+)</key>'
|
||||
r'<video restriction[^>]+><key>(\d+)</key>',
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
for pattern in self._VIDEO_ID_REGEXES:
|
||||
mobj = re.search(pattern, page)
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if not mobj:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
||||
|
||||
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
||||
title = unescapeHTML(player.find('./data/title').text)
|
||||
@@ -124,7 +115,7 @@ class NTVIE(InfoExtractor):
|
||||
'7': 'video2',
|
||||
}
|
||||
|
||||
app = apps[puid22] if puid22 in apps else apps['4']
|
||||
app = apps.get(puid22, apps['4'])
|
||||
|
||||
formats = []
|
||||
for format_id in ['', 'hi', 'webm']:
|
||||
|
69
youtube_dl/extractor/nuvid.py
Normal file
69
youtube_dl/extractor/nuvid.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
|
||||
class NuvidIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://m.nuvid.com/video/1310741/',
|
||||
'md5': 'eab207b7ac4fccfb4e23c86201f11277',
|
||||
'info_dict': {
|
||||
'id': '1310741',
|
||||
'ext': 'mp4',
|
||||
'title': 'Horny babes show their awesome bodeis and',
|
||||
'duration': 129,
|
||||
'upload_date': '20140508',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
formats = []
|
||||
|
||||
for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
|
||||
request = compat_urllib_request.Request(
|
||||
'http://m.nuvid.com/play/%s' % video_id)
|
||||
request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading %s page' % format_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||
title = self._html_search_regex(
|
||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
|
||||
thumbnail = self._html_search_regex(
|
||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
77
youtube_dl/extractor/nytimes.py
Normal file
77
youtube_dl/extractor/nytimes.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class NYTimesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
title = video_data['headline']
|
||||
description = video_data['summary']
|
||||
duration = video_data['duration'] / 1000.0
|
||||
|
||||
uploader = video_data['byline']
|
||||
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
||||
|
||||
def get_file_size(file_size):
|
||||
if isinstance(file_size, int):
|
||||
return file_size
|
||||
elif isinstance(file_size, dict):
|
||||
return int(file_size.get('value', 0))
|
||||
else:
|
||||
return 0
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['type'],
|
||||
'vcodec': video['video_codec'],
|
||||
'width': video['width'],
|
||||
'height': video['height'],
|
||||
'filesize': get_file_size(video['fileSize']),
|
||||
} for video in video_data['renditions']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [
|
||||
{
|
||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||
'resolution': '%dx%d' % (image['width'], image['height']),
|
||||
} for image in video_data['images']
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@@ -1,10 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse
|
||||
|
||||
|
||||
class PhotobucketIE(InfoExtractor):
|
||||
@@ -14,6 +14,7 @@ class PhotobucketIE(InfoExtractor):
|
||||
'file': 'zpsc0c3b9fa.mp4',
|
||||
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
'info_dict': {
|
||||
'timestamp': 1367669341,
|
||||
'upload_date': '20130504',
|
||||
'uploader': 'rachaneronas',
|
||||
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
|
||||
@@ -32,11 +33,12 @@ class PhotobucketIE(InfoExtractor):
|
||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||
webpage, 'info json')
|
||||
info = json.loads(info_json)
|
||||
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': info['downloadUrl'],
|
||||
'url': url,
|
||||
'uploader': info['username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
|
||||
'timestamp': info['creationDate'],
|
||||
'title': info['title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbUrl'],
|
||||
|
@@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
|
||||
r'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
|
@@ -8,8 +8,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
|
||||
|
||||
@@ -160,19 +158,19 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
_CLIPID_REGEXES = [
|
||||
r'"clip_id"\s*:\s+"(\d+)"',
|
||||
r'clipid: "(\d+)"',
|
||||
r'clipId=(\d+)',
|
||||
r'clip[iI]d=(\d+)',
|
||||
]
|
||||
_TITLE_REGEXES = [
|
||||
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
|
||||
r'<header class="clearfix">\s*<h3>(.+?)</h3>',
|
||||
r'<!-- start video -->\s*<h1>(.+?)</h1>',
|
||||
r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>',
|
||||
r'<h1 class="att-name">\s*(.+?)</h1>',
|
||||
]
|
||||
_DESCRIPTION_REGEXES = [
|
||||
r'<p itemprop="description">\s*(.+?)</p>',
|
||||
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
|
||||
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
|
||||
r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">',
|
||||
r'<p class="att-description">\s*(.+?)\s*</p>',
|
||||
]
|
||||
_UPLOAD_DATE_REGEXES = [
|
||||
r'<meta property="og:published_time" content="(.+?)">',
|
||||
@@ -188,16 +186,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
def extract(patterns, name, page, fatal=False):
|
||||
for pattern in patterns:
|
||||
mobj = re.search(pattern, page)
|
||||
if mobj:
|
||||
return clean_html(mobj.group(1))
|
||||
if fatal:
|
||||
raise RegexNotFoundError(u'Unable to extract %s' % name)
|
||||
return None
|
||||
|
||||
clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True)
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, page, 'clip id')
|
||||
|
||||
access_token = 'testclient'
|
||||
client_name = 'kolibri-1.2.5'
|
||||
@@ -246,13 +235,12 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
|
||||
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
||||
|
||||
title = extract(self._TITLE_REGEXES, 'title', page, fatal=True)
|
||||
description = extract(self._DESCRIPTION_REGEXES, 'description', page)
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, page, 'title')
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEXES, page, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(page)
|
||||
|
||||
upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, page, 'upload date', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
|
122
youtube_dl/extractor/rai.py
Normal file
122
youtube_dl/extractor/rai.py
Normal file
@@ -0,0 +1,122 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class RaiIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||
'md5': 'c064c0b2d09c278fb293116ef5d0a32d',
|
||||
'info_dict': {
|
||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||
'ext': 'mp4',
|
||||
'title': 'Report del 07/04/2014',
|
||||
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
|
||||
'upload_date': '20140407',
|
||||
'duration': 6160,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html',
|
||||
'md5': '8bb9c151924ce241b74dd52ef29ceafa',
|
||||
'info_dict': {
|
||||
'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9',
|
||||
'ext': 'mp4',
|
||||
'title': 'TG PRIMO TEMPO',
|
||||
'description': '',
|
||||
'upload_date': '20140612',
|
||||
'duration': 1758,
|
||||
},
|
||||
'skip': 'Error 404',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html',
|
||||
'md5': '35cf7c229f22eeef43e48b5cf923bef0',
|
||||
'info_dict': {
|
||||
'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13',
|
||||
'ext': 'mp4',
|
||||
'title': 'State of the Net, Antonella La Carpia: regole virali',
|
||||
'description': 'md5:b0ba04a324126903e3da7763272ae63c',
|
||||
'upload_date': '20140613',
|
||||
},
|
||||
'skip': 'Error 404',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html',
|
||||
'md5': '35694f062977fe6619943f08ed935730',
|
||||
'info_dict': {
|
||||
'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alluvione in Sardegna e dissesto idrogeologico',
|
||||
'description': 'Edizione delle ore 20:30 ',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
|
||||
|
||||
title = media.get('name')
|
||||
description = media.get('desc')
|
||||
thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
|
||||
duration = parse_duration(media.get('length'))
|
||||
uploader = media.get('author')
|
||||
upload_date = unified_strdate(media.get('date'))
|
||||
|
||||
formats = []
|
||||
|
||||
for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
|
||||
media_url = media.get(format_id)
|
||||
if not media_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': format_id,
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
page = self._download_webpage(url, video_id)
|
||||
self._list_available_subtitles(video_id, page)
|
||||
return
|
||||
|
||||
subtitles = {}
|
||||
if self._have_to_download_any_subtitles:
|
||||
page = self._download_webpage(url, video_id)
|
||||
subtitles = self.extract_subtitles(video_id, page)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
subtitles = {}
|
||||
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
||||
if m:
|
||||
captions = m.group('captions')
|
||||
STL_EXT = '.stl'
|
||||
SRT_EXT = '.srt'
|
||||
if captions.endswith(STL_EXT):
|
||||
captions = captions[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
|
||||
return subtitles
|
@@ -12,7 +12,12 @@ from ..utils import (
|
||||
|
||||
class RUTVIE(InfoExtractor):
|
||||
IE_DESC = 'RUTV.RU'
|
||||
_VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://player\.(?:rutv\.ru|vgtrk\.com)/
|
||||
(?P<path>flash2v/container\.swf\?id=
|
||||
|iframe/(?P<type>swf|video|live)/id/
|
||||
|index/iframe/cast_id/)
|
||||
(?P<id>\d+)'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -90,7 +95,7 @@ class RUTVIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
@@ -103,10 +108,16 @@ class RUTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('type')
|
||||
video_path = mobj.group('path')
|
||||
|
||||
if not video_type or video_type == 'swf':
|
||||
if video_path.startswith('flash2v'):
|
||||
video_type = 'video'
|
||||
elif video_path.startswith('iframe'):
|
||||
video_type = mobj.group('type')
|
||||
if video_type == 'swf':
|
||||
video_type = 'video'
|
||||
elif video_path.startswith('index/iframe/cast_id'):
|
||||
video_type = 'live'
|
||||
|
||||
json_data = self._download_json(
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||
|
@@ -11,17 +11,13 @@ class SciVeeIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.scivee.tv/node/62352',
|
||||
#'md5': 'b16699b74c9e6a120f6772a44960304f',
|
||||
'md5': 'b16699b74c9e6a120f6772a44960304f',
|
||||
'info_dict': {
|
||||
'id': '62352',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
|
||||
'description': 'md5:81f1710638e11a481358fab1b11059d7',
|
||||
},
|
||||
'params': {
|
||||
# Range HTTP header is ignored
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
44
youtube_dl/extractor/slutload.py
Normal file
44
youtube_dl/extractor/slutload.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlutloadIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
||||
_TEST = {
|
||||
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
||||
'md5': '0cf531ae8006b530bd9df947a6a0df77',
|
||||
'info_dict': {
|
||||
'id': 'TD73btpBqSxc',
|
||||
'ext': 'mp4',
|
||||
"title": "virginie baisee en cam",
|
||||
"age_limit": 18,
|
||||
'thumbnail': 're:https?://.*?\.jpg'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
||||
webpage, 'title').strip()
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
||||
webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18
|
||||
}
|
@@ -1,7 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import itertools
|
||||
|
||||
@@ -12,6 +11,7 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
of the stream token and uid
|
||||
"""
|
||||
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!sets/)(?P<title>[\w\d-]+)/?
|
||||
@@ -44,7 +44,8 @@ class SoundcloudIE(InfoExtractor):
|
||||
"upload_date": "20121011",
|
||||
"description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
|
||||
"uploader": "E.T. ExTerrestrial Music",
|
||||
"title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
|
||||
"title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1",
|
||||
"duration": 143,
|
||||
}
|
||||
},
|
||||
# not streamable song
|
||||
@@ -57,6 +58,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
'upload_date': '20120521',
|
||||
'duration': 227,
|
||||
},
|
||||
'params': {
|
||||
# rtmp
|
||||
@@ -74,6 +76,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'uploader': 'jaimeMF',
|
||||
'description': 'test chars: \"\'/\\ä↭',
|
||||
'upload_date': '20131209',
|
||||
'duration': 9,
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
@@ -87,6 +90,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'description': 'Vocals',
|
||||
'uploader': 'Sim Gretina',
|
||||
'upload_date': '20130815',
|
||||
#'duration': 42,
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -94,10 +98,6 @@ class SoundcloudIE(InfoExtractor):
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen('%s: Resolving id' % video_id)
|
||||
@@ -123,6 +123,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'title': info['title'],
|
||||
'description': info['description'],
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(info.get('duration'), 1000),
|
||||
}
|
||||
formats = []
|
||||
if info.get('downloadable', False):
|
||||
@@ -141,11 +142,10 @@ class SoundcloudIE(InfoExtractor):
|
||||
# We have to retrieve the url
|
||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||
stream_json = self._download_webpage(
|
||||
format_dict = self._download_json(
|
||||
streams_url,
|
||||
track_id, 'Downloading track url')
|
||||
|
||||
format_dict = json.loads(stream_json)
|
||||
for key, stream_url in format_dict.items():
|
||||
if key.startswith('http'):
|
||||
formats.append({
|
||||
@@ -198,7 +198,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
full_title = track_id
|
||||
elif mobj.group('player'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
return self.url_result(query['url'][0], ie='Soundcloud')
|
||||
return self.url_result(query['url'][0])
|
||||
else:
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group('uploader')
|
||||
@@ -213,11 +213,11 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
url = 'http://soundcloud.com/%s' % resolve_title
|
||||
info_json_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON')
|
||||
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
|
||||
|
||||
info = json.loads(info_json)
|
||||
return self._extract_info_dict(info, full_title, secret_token=token)
|
||||
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||
IE_NAME = 'soundcloud:set'
|
||||
@@ -232,16 +232,15 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
resolv_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(resolv_url, full_title)
|
||||
info = self._download_json(resolv_url, full_title)
|
||||
|
||||
info = json.loads(info_json)
|
||||
if 'errors' in info:
|
||||
for err in info['errors']:
|
||||
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
|
||||
@@ -256,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
|
||||
|
||||
class SoundcloudUserIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
|
||||
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
|
||||
IE_NAME = 'soundcloud:user'
|
||||
|
||||
# it's in tests/test_playlists.py
|
||||
@@ -265,29 +264,65 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group('user')
|
||||
resource = mobj.group('rsrc')
|
||||
if resource is None:
|
||||
resource = 'tracks'
|
||||
elif resource == 'likes':
|
||||
resource = 'favorites'
|
||||
|
||||
url = 'http://soundcloud.com/%s/' % uploader
|
||||
resolv_url = self._resolv_url(url)
|
||||
user_json = self._download_webpage(resolv_url, uploader,
|
||||
'Downloading user info')
|
||||
user = json.loads(user_json)
|
||||
user = self._download_json(
|
||||
resolv_url, uploader, 'Downloading user info')
|
||||
base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource)
|
||||
|
||||
tracks = []
|
||||
entries = []
|
||||
for i in itertools.count():
|
||||
data = compat_urllib_parse.urlencode({'offset': i*50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
|
||||
response = self._download_webpage(tracks_url, uploader,
|
||||
'Downloading tracks page %s' % (i+1))
|
||||
new_tracks = json.loads(response)
|
||||
tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
|
||||
if len(new_tracks) < 50:
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'offset': i * 50,
|
||||
'limit': 50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
new_entries = self._download_json(
|
||||
base_url + data, uploader, 'Downloading track page %s' % (i + 1))
|
||||
if len(new_entries) == 0:
|
||||
self.to_screen('%s: End page received' % uploader)
|
||||
break
|
||||
entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': compat_str(user['id']),
|
||||
'title': user['username'],
|
||||
'entries': tracks,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
|
||||
# it's in tests/test_playlists.py
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
|
||||
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
data = self._download_json(
|
||||
base_url + data, playlist_id, 'Downloading playlist')
|
||||
|
||||
entries = [
|
||||
self._extract_info_dict(t, quiet=True) for t in data['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': data.get('title'),
|
||||
'description': data.get('description'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
40
youtube_dl/extractor/soundgasm.py
Normal file
40
youtube_dl/extractor/soundgasm.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SoundgasmIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
|
||||
'md5': '010082a2c802c5275bb00030743e75ad',
|
||||
'info_dict': {
|
||||
'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
|
||||
'ext': 'm4a',
|
||||
'title': 'ytdl_Piano-sample',
|
||||
'description': 'Royalty Free Sample Music'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('title')
|
||||
audio_title = mobj.group('user') + '_' + mobj.group('title')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
audio_url = self._html_search_regex(
|
||||
r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL')
|
||||
audio_id = re.split('\/|\.', audio_url)[-2]
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description',
|
||||
fatal=False)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'display_id': display_id,
|
||||
'url': audio_url,
|
||||
'title': audio_title,
|
||||
'description': description
|
||||
}
|
@@ -1,3 +1,4 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -9,18 +10,33 @@ class SpiegelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||
'file': '1259285.mp4',
|
||||
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||
'info_dict': {
|
||||
'id': '1259285',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
||||
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
|
||||
'duration': 49,
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||
'file': '1309159.mp4',
|
||||
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
|
||||
'info_dict': {
|
||||
'id': '1309159',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
||||
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
||||
'duration': 983,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html',
|
||||
'md5': '54f58ba0e752e3c07bc2a26222dd0acf',
|
||||
'info_dict': {
|
||||
'id': '1502367',
|
||||
'ext': 'mp4',
|
||||
'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern',
|
||||
'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91',
|
||||
'duration': 42,
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -30,18 +46,20 @@ class SpiegelIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
title = self._html_search_regex(
|
||||
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml'
|
||||
idoc = self._download_xml(
|
||||
xml_url, video_id,
|
||||
note='Downloading XML', errnote='Failed to download XML')
|
||||
base_url = self._search_regex(
|
||||
r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL')
|
||||
|
||||
xml_url = base_url + video_id + '.xml'
|
||||
idoc = self._download_xml(xml_url, video_id)
|
||||
|
||||
formats = [
|
||||
{
|
||||
'format_id': n.tag.rpartition('type')[2],
|
||||
'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text,
|
||||
'url': base_url + n.find('./filename').text,
|
||||
'width': int(n.find('./width').text),
|
||||
'height': int(n.find('./height').text),
|
||||
'abr': int(n.find('./audiobitrate').text),
|
||||
@@ -59,7 +77,8 @@ class SpiegelIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
81
youtube_dl/extractor/spiegeltv.py
Normal file
81
youtube_dl/extractor/spiegeltv.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SpiegeltvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.spiegel.tv/filme/flug-mh370/',
|
||||
'info_dict': {
|
||||
'id': 'flug-mh370',
|
||||
'ext': 'm4v',
|
||||
'title': 'Flug MH370',
|
||||
'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines',
|
||||
'thumbnail': 're:http://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'
|
||||
version_json = self._download_json(
|
||||
'%s/version.json' % apihost, video_id,
|
||||
note='Downloading version information')
|
||||
version_name = version_json['version_name']
|
||||
|
||||
slug_json = self._download_json(
|
||||
'%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id),
|
||||
video_id,
|
||||
note='Downloading object information')
|
||||
oid = slug_json['object_id']
|
||||
|
||||
media_json = self._download_json(
|
||||
'%s/%s/restapi/media/%s.json' % (apihost, version_name, oid),
|
||||
video_id, note='Downloading media information')
|
||||
uuid = media_json['uuid']
|
||||
is_wide = media_json['is_wide']
|
||||
|
||||
server_json = self._download_json(
|
||||
'http://www.spiegel.tv/streaming_servers/', video_id,
|
||||
note='Downloading server information')
|
||||
server = server_json[0]['endpoint']
|
||||
|
||||
thumbnails = []
|
||||
for image in media_json['images']:
|
||||
thumbnails.append({
|
||||
'url': image['url'],
|
||||
'width': image['width'],
|
||||
'height': image['height'],
|
||||
})
|
||||
|
||||
description = media_json['subtitle']
|
||||
duration = media_json['duration_in_ms'] / 1000.
|
||||
|
||||
if is_wide:
|
||||
format = '16x9'
|
||||
else:
|
||||
format = '4x3'
|
||||
|
||||
url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'ext': 'm4v',
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails
|
||||
}
|
@@ -53,7 +53,7 @@ class SteamIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20140329',
|
||||
'title': 'FRONTIERS - Final Greenlight Trailer',
|
||||
'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205",
|
||||
'description': 'md5:6df4fe8dd494ae811869672b0767e025',
|
||||
'uploader': 'AAD Productions',
|
||||
'uploader_id': 'AtomicAgeDogGames',
|
||||
}
|
||||
|
@@ -5,13 +5,16 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class StreamCZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||
'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
|
||||
'info_dict': {
|
||||
@@ -22,7 +25,18 @@ class StreamCZIE(InfoExtractor):
|
||||
'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
|
||||
'duration': 256,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
||||
'md5': '246272e753e26bbace7fcd9deca0650c',
|
||||
'info_dict': {
|
||||
'id': '10002447',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kancelář Blaník: Tři roky pro Mazánka',
|
||||
'description': 'md5:9177695a8b756a0a8ab160de4043b392',
|
||||
'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000',
|
||||
'duration': 368,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -57,7 +71,7 @@ class StreamCZIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': str(jsonData['id']),
|
||||
'id': compat_str(jsonData['episode_id']),
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
|
||||
'formats': formats,
|
||||
|
104
youtube_dl/extractor/swrmediathek.py
Normal file
104
youtube_dl/extractor/swrmediathek.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class SWRMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
|
||||
'md5': '8c5f6f0172753368547ca8413a7768ac',
|
||||
'info_dict': {
|
||||
'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
|
||||
'ext': 'mp4',
|
||||
'title': 'SWR odysso',
|
||||
'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
'duration': 2602,
|
||||
'upload_date': '20140515',
|
||||
'uploader': 'SWR Fernsehen',
|
||||
'uploader_id': '990030',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
|
||||
'md5': 'b10ab854f912eecc5a6b55cd6fc1f545',
|
||||
'info_dict': {
|
||||
'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
|
||||
'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 5305,
|
||||
'upload_date': '20140516',
|
||||
'uploader': 'SWR Fernsehen',
|
||||
'uploader_id': '990030',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
|
||||
'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
|
||||
'info_dict': {
|
||||
'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6',
|
||||
'ext': 'mp3',
|
||||
'title': 'Saša Stanišic: Vor dem Fest',
|
||||
'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 3366,
|
||||
'upload_date': '20140520',
|
||||
'uploader': 'SWR 2',
|
||||
'uploader_id': '284670',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
attr = video['attr']
|
||||
media_type = attr['entry_etype']
|
||||
|
||||
formats = []
|
||||
for entry in video['sub']:
|
||||
if entry['name'] != 'entry_media':
|
||||
continue
|
||||
|
||||
entry_attr = entry['attr']
|
||||
codec = entry_attr['val0']
|
||||
quality = int(entry_attr['val1'])
|
||||
|
||||
fmt = {
|
||||
'url': entry_attr['val2'],
|
||||
'quality': quality,
|
||||
}
|
||||
|
||||
if media_type == 'Video':
|
||||
fmt.update({
|
||||
'format_note': ['144p', '288p', '544p'][quality-1],
|
||||
'vcodec': codec,
|
||||
})
|
||||
elif media_type == 'Audio':
|
||||
fmt.update({
|
||||
'acodec': codec,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': attr['entry_title'],
|
||||
'description': attr['entry_descl'],
|
||||
'thumbnail': attr['entry_image_16_9'],
|
||||
'duration': parse_duration(attr['entry_durat']),
|
||||
'upload_date': attr['entry_pdatet'][:-4],
|
||||
'uploader': attr['channel_title'],
|
||||
'uploader_id': attr['channel_idkey'],
|
||||
'formats': formats,
|
||||
}
|
79
youtube_dl/extractor/tagesschau.py
Normal file
79
youtube_dl/extractor/tagesschau.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TagesschauIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html',
|
||||
'md5': 'bcdeac2194fb296d599ce7929dfa4009',
|
||||
'info_dict': {
|
||||
'id': '1399128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen',
|
||||
'description': 'md5:69da3c61275b426426d711bde96463ab',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
|
||||
'md5': '66652566900963a3f962333579eeffcf',
|
||||
'info_dict': {
|
||||
'id': '5964',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
|
||||
'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
's': {'width': 256, 'height': 144, 'quality': 1},
|
||||
'm': {'width': 512, 'height': 288, 'quality': 2},
|
||||
'l': {'width': 960, 'height': 544, 'quality': 3},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if video_id.startswith('-'):
|
||||
display_id = video_id.strip('-')
|
||||
else:
|
||||
display_id = video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerpage = self._download_webpage(
|
||||
'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id,
|
||||
display_id, 'Downloading player page')
|
||||
|
||||
medias = re.findall(
|
||||
r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
|
||||
playerpage)
|
||||
|
||||
formats = []
|
||||
for url, ext, res in medias:
|
||||
f = {
|
||||
'format_id': res + '_' + ext,
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
}
|
||||
f.update(self._FORMATS.get(res, {}))
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': self._og_search_title(webpage).strip(),
|
||||
'thumbnail': 'http://www.tagesschau.de' + thumbnail,
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage).strip(),
|
||||
}
|
124
youtube_dl/extractor/teachertube.py
Normal file
124
youtube_dl/extractor/teachertube.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
qualities,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class TeacherTubeIE(InfoExtractor):
|
||||
IE_NAME = 'teachertube'
|
||||
IE_DESC = 'teachertube.com videos'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
|
||||
'md5': 'f9434ef992fd65936d72999951ee254c',
|
||||
'info_dict': {
|
||||
'id': '339997',
|
||||
'ext': 'mp4',
|
||||
'title': 'Measures of dispersion from a frequency table',
|
||||
'description': 'Measures of dispersion from a frequency table',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
|
||||
'md5': '0d625ec6bc9bf50f70170942ad580676',
|
||||
'info_dict': {
|
||||
'id': '340064',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make Paper Dolls _ Paper Art Projects',
|
||||
'description': 'Learn how to make paper dolls in this simple',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.teachertube.com/music.php?music_id=8805',
|
||||
'md5': '01e8352006c65757caf7b961f6050e21',
|
||||
'info_dict': {
|
||||
'id': '8805',
|
||||
'ext': 'mp3',
|
||||
'title': 'PER ASPERA AD ASTRA',
|
||||
'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
|
||||
'md5': '9c79fbb2dd7154823996fc28d4a26998',
|
||||
'info_dict': {
|
||||
'id': '297790',
|
||||
'ext': 'mp4',
|
||||
'title': 'Intro Video - Schleicher',
|
||||
'description': 'Intro Video - Why to flip, how flipping will',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('title', webpage, 'title')
|
||||
TITLE_SUFFIX = ' - TeacherTube'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)].strip()
|
||||
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
quality = qualities(['mp3', 'flv', 'mp4'])
|
||||
|
||||
media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
|
||||
media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage))
|
||||
media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage))
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': media_url,
|
||||
'quality': quality(determine_ext(media_url))
|
||||
} for media_url in set(media_urls)
|
||||
]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'),
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
||||
class TeacherTubeUserIE(InfoExtractor):
|
||||
IE_NAME = 'teachertube:user:collection'
|
||||
IE_DESC = 'teachertube.com user and collection videos'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
|
||||
|
||||
_MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user')
|
||||
|
||||
urls = []
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
urls.extend(re.findall(self._MEDIA_RE, webpage))
|
||||
|
||||
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
|
||||
for p in pages:
|
||||
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
|
||||
webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
|
||||
urls.extend(re.findall(self._MEDIA_RE, webpage))
|
||||
|
||||
entries = []
|
||||
for url in urls:
|
||||
entries.append(self.url_result(url, 'TeacherTube'))
|
||||
|
||||
return self.playlist_result(entries, user_id)
|
33
youtube_dl/extractor/teachingchannel.py
Normal file
33
youtube_dl/extractor/teachingchannel.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class TeachingChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
'title': 'A History of Teaming',
|
||||
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_code)
|
@@ -27,7 +27,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||
'md5': 'fc94ac279feebbce69f21c0c6ee82810',
|
||||
'info_dict': {
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
|
84
youtube_dl/extractor/tenplay.py
Normal file
84
youtube_dl/extractor/tenplay.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TenPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+'
|
||||
_TEST = {
|
||||
'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way',
|
||||
#'md5': 'd68703d9f73dc8fccf3320ab34202590',
|
||||
'info_dict': {
|
||||
'id': '2695695426001',
|
||||
'ext': 'flv',
|
||||
'title': 'TENplay: TV your way',
|
||||
'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.',
|
||||
'timestamp': 1380150606.889,
|
||||
'upload_date': '20130925',
|
||||
'uploader': 'TENplay',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}
|
||||
|
||||
_video_fields = [
|
||||
"id", "name", "shortDescription", "longDescription", "creationDate",
|
||||
"publishedDate", "lastModifiedDate", "customFields", "videoStillURL",
|
||||
"thumbnailURL", "referenceId", "length", "playsTotal",
|
||||
"playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, url)
|
||||
video_id = self._html_search_regex(
|
||||
r'videoID: "(\d+?)"', webpage, 'video_id')
|
||||
api_token = self._html_search_regex(
|
||||
r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token')
|
||||
title = self._html_search_regex(
|
||||
r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>',
|
||||
webpage, 'title')
|
||||
|
||||
json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title)
|
||||
|
||||
formats = []
|
||||
for rendition in json['renditions']:
|
||||
url = rendition['remoteUrl'] or rendition['url']
|
||||
protocol = 'rtmp' if url.startswith('rtmp') else 'http'
|
||||
ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower()
|
||||
|
||||
if protocol == 'rtmp':
|
||||
url = url.replace('&mp4:', '')
|
||||
|
||||
formats.append({
|
||||
'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]),
|
||||
'width': rendition['frameWidth'],
|
||||
'height': rendition['frameHeight'],
|
||||
'tbr': rendition['encodingRate'] / 1024,
|
||||
'filesize': rendition['size'],
|
||||
'protocol': protocol,
|
||||
'ext': ext,
|
||||
'vcodec': rendition['videoCodec'].lower(),
|
||||
'container': rendition['videoContainer'].lower(),
|
||||
'url': url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': json['referenceId'],
|
||||
'title': json['name'],
|
||||
'description': json['shortDescription'] or json['longDescription'],
|
||||
'formats': formats,
|
||||
'thumbnails': [{
|
||||
'url': json['videoStillURL']
|
||||
}, {
|
||||
'url': json['thumbnailURL']
|
||||
}],
|
||||
'thumbnail': json['videoStillURL'],
|
||||
'duration': json['length'] / 1000,
|
||||
'timestamp': float(json['creationDate']) / 1000,
|
||||
'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay',
|
||||
'view_count': json['playsTotal']
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user