Compare commits
207 Commits
2014.04.21
...
2014.06.02
Author | SHA1 | Date | |
---|---|---|---|
b7e8b6e37a | |||
ceb7a17f34 | |||
1a2f2e1e66 | |||
6803016858 | |||
9b7c4fd981 | |||
dc31942f42 | |||
1f6b8f3115 | |||
9168308579 | |||
7e8fdb1aae | |||
386ba39cac | |||
236d0cd07c | |||
ed86f38a11 | |||
6db80ad2db | |||
6ebb46c106 | |||
0f97c9a06f | |||
77fb72646f | |||
aae74e3832 | |||
894e730911 | |||
63961d87a6 | |||
87fe568c28 | |||
46531b374d | |||
9e8753911c | |||
5c6b1e578c | |||
8f0c8fb452 | |||
b702ecebf0 | |||
950dc95e97 | |||
d9dd3584e1 | |||
15a9f36849 | |||
d0087d4ff2 | |||
cc5ada6f4c | |||
dfb2e1a325 | |||
65bab327b4 | |||
9eeb7abc6b | |||
c70df21099 | |||
418424e5f5 | |||
8477466125 | |||
865dbd4a26 | |||
b1e6f55912 | |||
4d78f3b770 | |||
7f739999e9 | |||
0f8a01d4f3 | |||
e2bf499b14 | |||
7cf4547ab6 | |||
eec4d8ef96 | |||
1c783bca88 | |||
ac73651f66 | |||
e5ceb3bfda | |||
c2ef29234c | |||
1a1826c1af | |||
c7c6d43fe1 | |||
2902d44f99 | |||
d6e4ba287b | |||
f50ee8d1c3 | |||
0e67ab0d8e | |||
77541837e5 | |||
e3a6576f35 | |||
89bb8e97ee | |||
375696b1b1 | |||
4ea5c7b70d | |||
8dfa187b8a | |||
c1ed1f7055 | |||
1514f74967 | |||
2e8323e3f7 | |||
69f8364042 | |||
79981f039b | |||
91994c2c81 | |||
76e92371ac | |||
08af0205f9 | |||
a725fb1f43 | |||
05ee2b6dad | |||
b74feacac5 | |||
426b52fc5d | |||
5c30b26846 | |||
f07b74fc18 | |||
a5a45015ba | |||
beee53de06 | |||
8712f2bea7 | |||
ea102818c9 | |||
0a871f6880 | |||
481efc84a8 | |||
01ed5c9be3 | |||
ad3bc6acd5 | |||
5afa7f8bee | |||
ec8deefc27 | |||
a2d5a4ee64 | |||
dffcc2ea0c | |||
1800eeefed | |||
d7e7dedbde | |||
d19bb9c0aa | |||
3ef79a974a | |||
bc6800fbed | |||
65314dccf8 | |||
feb7221209 | |||
56a94d8cbb | |||
24e6ec8ac8 | |||
87724af7a8 | |||
b65c3e77e8 | |||
5301304bf2 | |||
948bcc60df | |||
25dfe0eb10 | |||
8e71456a81 | |||
ccdd34ed78 | |||
26d886354f | |||
a172b258ac | |||
7b93c2c204 | |||
57c7411f46 | |||
d0a122348e | |||
e4cbb5f382 | |||
c1bce22f23 | |||
e3abbbe301 | |||
55b36e3710 | |||
877bea9ce1 | |||
33c7ff861e | |||
749fe60c1e | |||
63b31b059c | |||
1476b497eb | |||
e399853d0c | |||
fdb205b19e | |||
fbe8053120 | |||
ea783d01e1 | |||
b7d73595dc | |||
e97e53eeed | |||
342f630dbf | |||
69c8fb9e5d | |||
5f0f8013ac | |||
b5368acee8 | |||
f71959fcf5 | |||
5c9f3b8b16 | |||
bebd6f9308 | |||
84a2806c16 | |||
d0111a7409 | |||
aab8874c55 | |||
fcf5b01746 | |||
4de9e9a6db | |||
0067d6c4be | |||
2099125333 | |||
b48f147d5a | |||
4f3e943080 | |||
7558830fa3 | |||
867274e997 | |||
6515778305 | |||
3b1dfc0f2f | |||
d664de44b7 | |||
bbe99d26ec | |||
50fc59968e | |||
b8b01bb92a | |||
eb45133451 | |||
10c0e2d818 | |||
669f0e7cda | |||
32fd27ec98 | |||
0c13f378de | |||
0049594efb | |||
113c7d3eb0 | |||
549371fc99 | |||
957f27e5bb | |||
1f8c19767b | |||
a383a98af6 | |||
acd69589a5 | |||
b30b8698ea | |||
f1f25be6db | |||
deab8c1960 | |||
c57f775710 | |||
e75cafe9fb | |||
33ab8453c4 | |||
ebd3c7b370 | |||
29645a1d44 | |||
22d99a801a | |||
57b8d84cd9 | |||
65e4ad5bfe | |||
98b7d476d9 | |||
201e3c99b9 | |||
8a7a4a9796 | |||
df297c8794 | |||
3f53a75f02 | |||
7c360e3a04 | |||
d2176c8011 | |||
aa92f06308 | |||
e00c9cf599 | |||
ba60a3ebe0 | |||
efb7e11988 | |||
a55c8b7aac | |||
a980bc4324 | |||
4b10aadffc | |||
5bec574859 | |||
d11271dd29 | |||
1d9d26d09b | |||
c0292e8ab7 | |||
f44e5d8b43 | |||
6ea74538e3 | |||
24b8924b46 | |||
86a3c67112 | |||
8be874370d | |||
aec74dd95a | |||
6890574256 | |||
d03745c684 | |||
28746fbd59 | |||
0321213c11 | |||
3f0aae4244 | |||
48099643cc | |||
621f33c9d0 | |||
f07a9f6f43 | |||
e51880fd32 | |||
88ce273da4 | |||
b9ba5dfa28 | |||
4086f11929 | |||
478c2c6193 | |||
d2d6481afb |
@ -3,6 +3,7 @@ python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
email:
|
||||
|
14
CHANGELOG
14
CHANGELOG
@ -1,14 +0,0 @@
|
||||
2013.01.02 Codename: GIULIA
|
||||
|
||||
* Add support for ComedyCentral clips <nto>
|
||||
* Corrected Vimeo description fetching <Nick Daniels>
|
||||
* Added the --no-post-overwrites argument <Barbu Paul - Gheorghe>
|
||||
* --verbose offers more environment info
|
||||
* New info_dict field: uploader_id
|
||||
* New updates system, with signature checking
|
||||
* New IEs: NBA, JustinTV, FunnyOrDie, TweetReel, Steam, Ustream
|
||||
* Fixed IEs: BlipTv
|
||||
* Fixed for Python 3 IEs: Xvideo, Youku, XNXX, Dailymotion, Vimeo, InfoQ
|
||||
* Simplified IEs and test code
|
||||
* Various (Python 3 and other) fixes
|
||||
* Revamped and expanded tests
|
8
Makefile
8
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz
|
||||
|
||||
cleanall: clean
|
||||
rm -f youtube-dl youtube-dl.exe
|
||||
@ -55,7 +55,9 @@ README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
youtube-dl.1: README.md
|
||||
pandoc -s -f markdown -t man README.md -o youtube-dl.1
|
||||
python devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||
rm -f youtube-dl.1.temp.md
|
||||
|
||||
youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
|
||||
python devscripts/bash-completion.py
|
||||
@ -75,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
|
||||
--exclude 'docs/_build' \
|
||||
-- \
|
||||
bin devscripts test youtube_dl docs \
|
||||
CHANGELOG LICENSE README.md README.txt \
|
||||
LICENSE README.md README.txt \
|
||||
Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
|
||||
youtube-dl
|
||||
|
21
README.md
21
README.md
@ -1,11 +1,24 @@
|
||||
% YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
**youtube-dl** [OPTIONS] URL [URL...]
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
|
||||
sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
||||
|
||||
If you do not have curl, you can alternatively use a recent wget:
|
||||
|
||||
sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
||||
|
||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||
|
||||
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
||||
|
||||
# DESCRIPTION
|
||||
**youtube-dl** is a small command-line program to download videos from
|
||||
YouTube.com and a few more sites. It requires the Python interpreter, version
|
||||
@ -458,7 +471,7 @@ If your report is shorter than two lines, it is almost certainly missing some of
|
||||
|
||||
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
|
||||
|
||||
Site support requests must contain an example URL. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
|
||||
|
||||
### Are you using the latest version?
|
||||
|
||||
|
@ -15,7 +15,7 @@ header = oldreadme[:oldreadme.index('# OPTIONS')]
|
||||
footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
|
||||
|
||||
options = helptext[helptext.index(' General Options:') + 19:]
|
||||
options = re.sub(r'^ (\w.+)$', r'## \1', options, flags=re.M)
|
||||
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
|
||||
options = '# OPTIONS\n' + options + '\n'
|
||||
|
||||
with io.open(README_FILE, 'w', encoding='utf-8') as f:
|
||||
|
20
devscripts/prepare_manpage.py
Normal file
20
devscripts/prepare_manpage.py
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
import io
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
PREFIX = '%YOUTUBE-DL(1)\n\n# NAME\n'
|
||||
readme = re.sub(r'(?s)# INSTALLATION.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
print(readme.encode('utf-8'))
|
||||
else:
|
||||
print(readme)
|
@ -45,9 +45,9 @@ fi
|
||||
/bin/echo -e "\n### Changing version in version.py..."
|
||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
|
||||
|
||||
/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
|
||||
/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..."
|
||||
make README.md
|
||||
git add CHANGELOG README.md youtube_dl/version.py
|
||||
git add README.md youtube_dl/version.py
|
||||
git commit -m "release $version"
|
||||
|
||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
||||
|
@ -85,7 +85,7 @@ def gettestcases(include_onlymatching=False):
|
||||
else:
|
||||
tests = getattr(ie, '_TESTS', [])
|
||||
for t in tests:
|
||||
if not include_onlymatching and getattr(t, 'only_matching', False):
|
||||
if not include_onlymatching and t.get('only_matching', False):
|
||||
continue
|
||||
t['name'] = type(ie).__name__[:-len('IE')]
|
||||
yield t
|
||||
@ -107,7 +107,7 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
elif isinstance(expected, type):
|
||||
got = got_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
|
||||
u'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(got_dict.get(info_field))
|
||||
@ -134,3 +134,17 @@ def expect_info_dict(self, expected_dict, got_dict):
|
||||
missing_keys,
|
||||
'Missing keys in test definition: %s' % (
|
||||
', '.join(sorted(missing_keys))))
|
||||
|
||||
|
||||
def assertRegexpMatches(self, text, regexp, msg=None):
|
||||
if hasattr(self, 'assertRegexpMatches'):
|
||||
return self.assertRegexpMatches(text, regexp, msg)
|
||||
else:
|
||||
m = re.match(regexp, text)
|
||||
if not m:
|
||||
note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text)
|
||||
if msg is None:
|
||||
msg = note
|
||||
else:
|
||||
msg = note + ', ' + msg
|
||||
self.assertTrue(m, msg)
|
||||
|
@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
|
||||
@ -67,7 +67,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['ext'], 'mp4')
|
||||
|
||||
# No prefer_free_formats => prefer mp4 and flv for greater compatibilty
|
||||
# No prefer_free_formats => prefer mp4 and flv for greater compatibility
|
||||
ydl = YDL()
|
||||
ydl.params['prefer_free_formats'] = False
|
||||
formats = [
|
||||
@ -274,6 +274,12 @@ class TestFormatSelection(unittest.TestCase):
|
||||
# Replace missing fields with 'NA'
|
||||
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
|
||||
|
||||
def test_format_note(self):
|
||||
ydl = YoutubeDL()
|
||||
self.assertEqual(ydl._format_note({}), '')
|
||||
assertRegexpMatches(self, ydl._format_note({
|
||||
'vbr': 10,
|
||||
}), '^\s*10k$')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -10,6 +10,7 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import (
|
||||
assertRegexpMatches,
|
||||
expect_info_dict,
|
||||
FakeYDL,
|
||||
)
|
||||
@ -22,9 +23,11 @@ from youtube_dl.extractor import (
|
||||
VimeoUserIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoGroupsIE,
|
||||
VineUserIE,
|
||||
UstreamChannelIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE,
|
||||
LivestreamIE,
|
||||
NHLVideocenterIE,
|
||||
BambuserChannelIE,
|
||||
@ -100,6 +103,13 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], 'Rolex Awards for Enterprise')
|
||||
self.assertTrue(len(result['entries']) > 72)
|
||||
|
||||
def test_vine_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = VineUserIE(dl)
|
||||
result = ie.extract('https://vine.co/Visa')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertTrue(len(result['entries']) >= 50)
|
||||
|
||||
def test_ustream_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = UstreamChannelIE(dl)
|
||||
@ -124,6 +134,17 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['id'], '9615865')
|
||||
self.assertTrue(len(result['entries']) >= 12)
|
||||
|
||||
def test_soundcloud_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = SoundcloudPlaylistIE(dl)
|
||||
result = ie.extract('http://api.soundcloud.com/playlists/4110309')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '4110309')
|
||||
self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]')
|
||||
assertRegexpMatches(
|
||||
self, result['description'], r'TILT Brass - Bowery Poetry Club')
|
||||
self.assertEqual(len(result['entries']), 6)
|
||||
|
||||
def test_livestream_event(self):
|
||||
dl = FakeYDL()
|
||||
ie = LivestreamIE(dl)
|
||||
@ -188,20 +209,20 @@ class TestPlaylists(unittest.TestCase):
|
||||
def test_ivi_compilation(self):
|
||||
dl = FakeYDL()
|
||||
ie = IviCompilationIE(dl)
|
||||
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
|
||||
result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'dezhurnyi_angel')
|
||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)')
|
||||
self.assertTrue(len(result['entries']) >= 23)
|
||||
self.assertEqual(result['id'], 'dvoe_iz_lartsa')
|
||||
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
|
||||
self.assertTrue(len(result['entries']) >= 24)
|
||||
|
||||
def test_ivi_compilation_season(self):
|
||||
dl = FakeYDL()
|
||||
ie = IviCompilationIE(dl)
|
||||
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
|
||||
result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'dezhurnyi_angel/season2')
|
||||
self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон')
|
||||
self.assertTrue(len(result['entries']) >= 7)
|
||||
self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
|
||||
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
|
||||
self.assertTrue(len(result['entries']) >= 12)
|
||||
|
||||
def test_imdb_list(self):
|
||||
dl = FakeYDL()
|
||||
|
@ -181,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 28)
|
||||
self.assertTrue(len(subtitles.keys()) >= 28)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning(u'Automatic Captions not supported by this server')
|
||||
|
@ -31,6 +31,7 @@ from .utils import (
|
||||
ContentTooShortError,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
DEFAULT_OUTTMPL,
|
||||
determine_ext,
|
||||
DownloadError,
|
||||
encodeFilename,
|
||||
@ -440,7 +441,8 @@ class YoutubeDL(object):
|
||||
if v is not None)
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
|
||||
tmpl = os.path.expanduser(self.params['outtmpl'])
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
tmpl = os.path.expanduser(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
return filename
|
||||
except ValueError as err:
|
||||
@ -1025,10 +1027,11 @@ class YoutubeDL(object):
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
if (len(url_list) > 1 and
|
||||
'%' not in self.params['outtmpl']
|
||||
'%' not in outtmpl
|
||||
and self.params.get('max_downloads') != 1):
|
||||
raise SameFileError(self.params['outtmpl'])
|
||||
raise SameFileError(outtmpl)
|
||||
|
||||
for url in url_list:
|
||||
try:
|
||||
@ -1139,57 +1142,57 @@ class YoutubeDL(object):
|
||||
res = default
|
||||
return res
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def format_note(fdict):
|
||||
res = ''
|
||||
if fdict.get('ext') in ['f4f', 'f4m']:
|
||||
res += '(unsupported) '
|
||||
if fdict.get('format_note') is not None:
|
||||
res += fdict['format_note'] + ' '
|
||||
if fdict.get('tbr') is not None:
|
||||
res += '%4dk ' % fdict['tbr']
|
||||
if fdict.get('container') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += '%s container' % fdict['container']
|
||||
if (fdict.get('vcodec') is not None and
|
||||
fdict.get('vcodec') != 'none'):
|
||||
if res:
|
||||
res += ', '
|
||||
res += fdict['vcodec']
|
||||
if fdict.get('vbr') is not None:
|
||||
res += '@'
|
||||
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
|
||||
res += 'video@'
|
||||
def _format_note(self, fdict):
|
||||
res = ''
|
||||
if fdict.get('ext') in ['f4f', 'f4m']:
|
||||
res += '(unsupported) '
|
||||
if fdict.get('format_note') is not None:
|
||||
res += fdict['format_note'] + ' '
|
||||
if fdict.get('tbr') is not None:
|
||||
res += '%4dk ' % fdict['tbr']
|
||||
if fdict.get('container') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += '%s container' % fdict['container']
|
||||
if (fdict.get('vcodec') is not None and
|
||||
fdict.get('vcodec') != 'none'):
|
||||
if res:
|
||||
res += ', '
|
||||
res += fdict['vcodec']
|
||||
if fdict.get('vbr') is not None:
|
||||
res += '%4dk' % fdict['vbr']
|
||||
if fdict.get('acodec') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
if fdict['acodec'] == 'none':
|
||||
res += 'video only'
|
||||
else:
|
||||
res += '%-5s' % fdict['acodec']
|
||||
elif fdict.get('abr') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += 'audio'
|
||||
if fdict.get('abr') is not None:
|
||||
res += '@%3dk' % fdict['abr']
|
||||
if fdict.get('asr') is not None:
|
||||
res += ' (%5dHz)' % fdict['asr']
|
||||
if fdict.get('filesize') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += format_bytes(fdict['filesize'])
|
||||
return res
|
||||
res += '@'
|
||||
elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
|
||||
res += 'video@'
|
||||
if fdict.get('vbr') is not None:
|
||||
res += '%4dk' % fdict['vbr']
|
||||
if fdict.get('acodec') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
if fdict['acodec'] == 'none':
|
||||
res += 'video only'
|
||||
else:
|
||||
res += '%-5s' % fdict['acodec']
|
||||
elif fdict.get('abr') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += 'audio'
|
||||
if fdict.get('abr') is not None:
|
||||
res += '@%3dk' % fdict['abr']
|
||||
if fdict.get('asr') is not None:
|
||||
res += ' (%5dHz)' % fdict['asr']
|
||||
if fdict.get('filesize') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += format_bytes(fdict['filesize'])
|
||||
return res
|
||||
|
||||
def list_formats(self, info_dict):
|
||||
def line(format, idlen=20):
|
||||
return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
|
||||
format['format_id'],
|
||||
format['ext'],
|
||||
self.format_resolution(format),
|
||||
format_note(format),
|
||||
self._format_note(format),
|
||||
))
|
||||
|
||||
formats = info_dict.get('formats', [info_dict])
|
||||
@ -1197,8 +1200,8 @@ class YoutubeDL(object):
|
||||
max(len(f['format_id']) for f in formats))
|
||||
formats_s = [line(f, idlen) for f in formats]
|
||||
if len(formats) > 1:
|
||||
formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
|
||||
formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
|
||||
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
|
||||
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
|
||||
|
||||
header_line = line({
|
||||
'format_id': 'format code', 'ext': 'extension',
|
||||
|
@ -53,6 +53,10 @@ __authors__ = (
|
||||
'Mattias Harrysson',
|
||||
'phaer',
|
||||
'Sainyam Kapoor',
|
||||
'Nicolas Évrard',
|
||||
'Jason Normore',
|
||||
'Hoje Lee',
|
||||
'Adam Thalhammer',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
@ -72,6 +76,7 @@ from .utils import (
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
DateRange,
|
||||
DEFAULT_OUTTMPL,
|
||||
decodeOption,
|
||||
get_term_width,
|
||||
DownloadError,
|
||||
@ -676,7 +681,7 @@ def _real_main(argv=None):
|
||||
if not opts.audioquality.isdigit():
|
||||
parser.error(u'invalid audio quality specified')
|
||||
if opts.recodevideo is not None:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
|
||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
|
||||
parser.error(u'invalid video recode format specified')
|
||||
if opts.date is not None:
|
||||
date = DateRange.day(opts.date)
|
||||
@ -705,7 +710,7 @@ def _real_main(argv=None):
|
||||
or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
|
||||
or (opts.useid and u'%(id)s.%(ext)s')
|
||||
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
|
||||
or u'%(title)s-%(id)s.%(ext)s')
|
||||
or DEFAULT_OUTTMPL)
|
||||
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
|
||||
parser.error(u'Cannot download a video and extract audio into the same'
|
||||
u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
|
||||
|
@ -14,6 +14,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
_TEST_FILE_SIZE = 10241
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
tmpfilename = self.temp_name(filename)
|
||||
@ -28,8 +30,10 @@ class HttpFD(FileDownloader):
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
if self.params.get('test', False):
|
||||
request.add_header('Range', 'bytes=0-10240')
|
||||
is_test = self.params.get('test', False)
|
||||
|
||||
if is_test:
|
||||
request.add_header('Range', 'bytes=0-%s' % str(self._TEST_FILE_SIZE - 1))
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
@ -100,6 +104,15 @@ class HttpFD(FileDownloader):
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
|
||||
# Range HTTP header may be ignored/unsupported by a webserver
|
||||
# (e.g. extractor/scivee.py, extractor/bambuser.py).
|
||||
# However, for a test we still would like to download just a piece of a file.
|
||||
# To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
|
||||
# block size when downloading a file.
|
||||
if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
|
||||
data_len = self._TEST_FILE_SIZE
|
||||
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
@ -118,7 +131,7 @@ class HttpFD(FileDownloader):
|
||||
while True:
|
||||
# Download and write
|
||||
before = time.time()
|
||||
data_block = data.read(block_size)
|
||||
data_block = data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
after = time.time()
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
@ -162,6 +175,9 @@ class HttpFD(FileDownloader):
|
||||
'speed': speed,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
break
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
|
@ -10,6 +10,7 @@ from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@ -127,7 +128,10 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--flashVer', flash_version]
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
if isinstance(conn, list):
|
||||
for entry in conn:
|
||||
basic_args += ['--conn', entry]
|
||||
elif isinstance(conn, compat_str):
|
||||
basic_args += ['--conn', conn]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||
|
||||
|
@ -20,6 +20,7 @@ from .auengine import AUEngineIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
from .bbccouk import BBCCoUkIE
|
||||
from .bilibili import BiliBiliIE
|
||||
from .blinkx import BlinkxIE
|
||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||
from .bloomberg import BloombergIE
|
||||
@ -40,6 +41,7 @@ from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .clubic import ClubicIE
|
||||
from .cmt import CMTIE
|
||||
from .cnet import CNETIE
|
||||
from .cnn import (
|
||||
@ -70,6 +72,7 @@ from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elpais import ElPaisIE
|
||||
from .empflix import EmpflixIE
|
||||
from .engadget import EngadgetIE
|
||||
from .escapist import EscapistIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
@ -77,6 +80,7 @@ from .exfm import ExfmIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
from .firstpost import FirstpostIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
@ -107,10 +111,12 @@ from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .hark import HarkIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .howcast import HowcastIE
|
||||
from .huffpost import HuffPostIE
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
from .ign import IGNIE, OneUPIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
@ -158,6 +164,7 @@ from .mofosex import MofosexIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movshare import MovShareIE
|
||||
from .mtv import (
|
||||
MTVIE,
|
||||
@ -177,6 +184,7 @@ from .nbc import (
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nfb import NFBIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
@ -186,7 +194,13 @@ from .normalboots import NormalbootsIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntv import NTVIE
|
||||
from .nytimes import NYTimesIE
|
||||
from .nuvid import NuvidIE
|
||||
from .oe1 import OE1IE
|
||||
from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
@ -207,6 +221,7 @@ from .ringtv import RingTVIE
|
||||
from .ro220 import Ro220IE
|
||||
from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE
|
||||
@ -218,9 +233,11 @@ from .rutube import (
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .scivee import SciVeeIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sina import SinaIE
|
||||
from .slideshare import SlideshareIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
SmotriCommunityIE,
|
||||
@ -228,7 +245,12 @@ from .smotri import (
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .sohu import SohuIE
|
||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
|
||||
from .soundcloud import (
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
SoundcloudPlaylistIE
|
||||
)
|
||||
from .southparkstudios import (
|
||||
SouthParkStudiosIE,
|
||||
SouthparkDeIE,
|
||||
@ -238,10 +260,10 @@ from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .spike import SpikeIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .statigram import StatigramIE
|
||||
from .steam import SteamIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .swrmediathek import SWRMediathekIE
|
||||
from .syfy import SyfyIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
@ -282,6 +304,7 @@ from .videodetective import VideoDetectiveIE
|
||||
from .videolecturesnet import VideoLecturesNetIE
|
||||
from .videofyme import VideofyMeIE
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videott import VideoTtIE
|
||||
from .videoweed import VideoWeedIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
@ -290,15 +313,21 @@ from .vimeo import (
|
||||
VimeoAlbumIE,
|
||||
VimeoGroupsIE,
|
||||
VimeoReviewIE,
|
||||
VimeoWatchLaterIE,
|
||||
)
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
)
|
||||
from .vine import VineIE
|
||||
from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
WDRMausIE,
|
||||
)
|
||||
from .weibo import WeiboIE
|
||||
|
@ -1,7 +1,6 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -16,6 +15,7 @@ class AftonbladetIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'timestamp': 1394142732,
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
@ -27,17 +27,17 @@ class AftonbladetIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
internal_meta_id = self._html_search_regex(
|
||||
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
|
||||
internal_meta_url = META_URL % internal_meta_id
|
||||
internal_meta_url = meta_url % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = FORMATS_URL % internal_video_id
|
||||
internal_formats_url = format_url % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
@ -54,16 +54,13 @@ class AftonbladetIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
|
||||
upload_date = timestamp.strftime('%Y%m%d')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json['imageUrl'],
|
||||
'description': internal_meta_json['shortPreamble'],
|
||||
'upload_date': upload_date,
|
||||
'timestamp': internal_meta_json['timePublished'],
|
||||
'duration': internal_meta_json['duration'],
|
||||
'view_count': internal_meta_json['views'],
|
||||
}
|
||||
|
@ -38,7 +38,9 @@ class ARDIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title')
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
@ -74,7 +74,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url')
|
||||
json_url = self._html_search_regex(
|
||||
r'arte_vp_url="(.*?)"', webpage, 'json vp url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang):
|
||||
@ -120,14 +121,17 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
||||
else:
|
||||
def sort_key(f):
|
||||
versionCode = f.get('versionCode')
|
||||
if versionCode is None:
|
||||
versionCode = ''
|
||||
return (
|
||||
# Sort first by quality
|
||||
int(f.get('height',-1)),
|
||||
int(f.get('bitrate',-1)),
|
||||
int(f.get('height', -1)),
|
||||
int(f.get('bitrate', -1)),
|
||||
# The original version with subtitles has lower relevance
|
||||
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||
re.match(r'VO-ST(F|A)', versionCode) is None,
|
||||
# The version with sourds/mal subtitles has also lower relevance
|
||||
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||
re.match(r'VO?(F|A)-STM\1', versionCode) is None,
|
||||
# Prefer http downloads over m3u8
|
||||
0 if f['url'].endswith('m3u8') else 1,
|
||||
)
|
||||
|
@ -12,14 +12,14 @@ from ..utils import (
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>.*)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'file': '1812978515.mp3',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
'info_dict': {
|
||||
"title": "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||
"duration": 10,
|
||||
"duration": 9.8485,
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}]
|
||||
@ -28,36 +28,32 @@ class BandcampIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
# We get the link to the free download page
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if m_download is None:
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)
|
||||
d = data[0]
|
||||
data = json.loads(json_code)[0]
|
||||
|
||||
duration = int(round(d['duration']))
|
||||
formats = []
|
||||
for format_id, format_url in d['file'].items():
|
||||
ext, _, abr_str = format_id.partition('-')
|
||||
|
||||
for format_id, format_url in data['file'].items():
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'ext': format_id.partition('-')[0],
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
'acodec': format_id.partition('-')[0],
|
||||
'abr': int(format_id.partition('-')[2]),
|
||||
'acodec': ext,
|
||||
'abr': int(abr_str),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(d['id']),
|
||||
'title': d['title'],
|
||||
'id': compat_str(data['id']),
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'duration': float(data['duration']),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('No free songs found')
|
||||
@ -67,11 +63,9 @@ class BandcampIE(InfoExtractor):
|
||||
r'var TralbumData = {(.*?)id: (?P<id>\d*?)$',
|
||||
webpage, re.MULTILINE | re.DOTALL).group('id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, video_id,
|
||||
'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascrip code
|
||||
info = re.search(r'items: (.*?),$',
|
||||
download_webpage, re.MULTILINE).group(1)
|
||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||
# We get the dictionary of the track from some javascript code
|
||||
info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1)
|
||||
info = json.loads(info)[0]
|
||||
# We pick mp3-320 for now, until format selection can be easily implemented.
|
||||
mp3_info = info['downloads']['mp3-320']
|
||||
@ -100,7 +94,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@ -123,13 +117,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
},
|
||||
'skip': 'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('subdomain')
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
display_id = title or playlist_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
@ -139,6 +135,8 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
title = self._search_regex(r'album_title : "(.*?)"', webpage, 'title')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
106
youtube_dl/extractor/bilibili.py
Normal file
106
youtube_dl/extractor/bilibili.py
Normal file
@ -0,0 +1,106 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_parse_qs,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||
'info_dict': {
|
||||
'id': '1074402',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_code = self._search_regex(
|
||||
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||
|
||||
title = self._html_search_meta(
|
||||
'media:title', video_code, 'title', fatal=True)
|
||||
duration_str = self._html_search_meta(
|
||||
'duration', video_code, 'duration')
|
||||
if duration_str is None:
|
||||
duration = None
|
||||
else:
|
||||
duration_mobj = re.match(
|
||||
r'^T(?:(?P<hours>[0-9]+)H)?(?P<minutes>[0-9]+)M(?P<seconds>[0-9]+)S$',
|
||||
duration_str)
|
||||
duration = (
|
||||
int_or_none(duration_mobj.group('hours'), default=0) * 3600 +
|
||||
int(duration_mobj.group('minutes')) * 60 +
|
||||
int(duration_mobj.group('seconds')))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'uploadDate', video_code, fatal=False))
|
||||
thumbnail = self._html_search_meta(
|
||||
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
|
||||
|
||||
player_params = compat_parse_qs(self._html_search_regex(
|
||||
r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"',
|
||||
webpage, 'player params'))
|
||||
|
||||
if 'cid' in player_params:
|
||||
cid = player_params['cid'][0]
|
||||
|
||||
lq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading LQ video info'
|
||||
)
|
||||
lq_durl = lq_doc.find('.//durl')
|
||||
formats = [{
|
||||
'format_id': 'lq',
|
||||
'quality': 1,
|
||||
'url': lq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
lq_durl.find('./size'), get_attr='text'),
|
||||
}]
|
||||
|
||||
hq_doc = self._download_xml(
|
||||
'http://interface.bilibili.cn/playurl?cid=%s' % cid,
|
||||
video_id,
|
||||
note='Downloading HQ video info',
|
||||
fatal=False,
|
||||
)
|
||||
if hq_doc is not False:
|
||||
hq_durl = hq_doc.find('.//durl')
|
||||
formats.append({
|
||||
'format_id': 'hq',
|
||||
'quality': 2,
|
||||
'ext': 'flv',
|
||||
'url': hq_durl.find('./url').text,
|
||||
'filesize': int_or_none(
|
||||
hq_durl.find('./size'), get_attr='text'),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
@ -19,15 +18,16 @@ class BlinkxIE(InfoExtractor):
|
||||
'file': '8aQUy7GV.mp4',
|
||||
'md5': '2e9a07364af40163a908edbf10bb2492',
|
||||
'info_dict': {
|
||||
"title": "Police Car Rolls Away",
|
||||
"uploader": "stupidvideos.com",
|
||||
"upload_date": "20131215",
|
||||
"description": "A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!",
|
||||
"duration": 14.886,
|
||||
"thumbnails": [{
|
||||
"width": 100,
|
||||
"height": 76,
|
||||
"url": "http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg",
|
||||
'title': 'Police Car Rolls Away',
|
||||
'uploader': 'stupidvideos.com',
|
||||
'upload_date': '20131215',
|
||||
'timestamp': 1387068000,
|
||||
'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
|
||||
'duration': 14.886,
|
||||
'thumbnails': [{
|
||||
'width': 100,
|
||||
'height': 76,
|
||||
'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
|
||||
}],
|
||||
},
|
||||
}
|
||||
@ -41,9 +41,6 @@ class BlinkxIE(InfoExtractor):
|
||||
'video=%s' % video_id)
|
||||
data_json = self._download_webpage(api_url, display_id)
|
||||
data = json.loads(data_json)['api']['results'][0]
|
||||
dt = datetime.datetime.fromtimestamp(data['pubdate_epoch'])
|
||||
pload_date = dt.strftime('%Y%m%d')
|
||||
|
||||
duration = None
|
||||
thumbnails = []
|
||||
formats = []
|
||||
@ -64,10 +61,7 @@ class BlinkxIE(InfoExtractor):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
tbr = (int(m['vbr']) + int(m['abr'])) // 1000
|
||||
format_id = (u'%s-%sk-%s' %
|
||||
(vcodec,
|
||||
tbr,
|
||||
m['w']))
|
||||
format_id = u'%s-%sk-%s' % (vcodec, tbr, m['w'])
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': m['link'],
|
||||
@ -88,7 +82,7 @@ class BlinkxIE(InfoExtractor):
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'uploader': data['channel_name'],
|
||||
'upload_date': pload_date,
|
||||
'timestamp': data['pubdate_epoch'],
|
||||
'description': data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
|
@ -1,102 +1,124 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
compat_urlparse,
|
||||
clean_html,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class BlipTVIE(SubtitlesInfoExtractor):
|
||||
"""Information extractor for blip.tv"""
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))'
|
||||
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(?P<presumptive_id>.+)$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'info_dict': {
|
||||
'id': '5779306',
|
||||
'ext': 'mov',
|
||||
'upload_date': '20111205',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'uploader': 'Comic Book Resources - CBR TV',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
|
||||
'md5': 'c6934ad0b6acf2bd920720ec888eb812',
|
||||
'info_dict': {
|
||||
'id': '5779306',
|
||||
'ext': 'mov',
|
||||
'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
|
||||
'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
|
||||
'timestamp': 1323138843,
|
||||
'upload_date': '20111206',
|
||||
'uploader': 'cbr',
|
||||
'uploader_id': '679425',
|
||||
'duration': 81,
|
||||
}
|
||||
},
|
||||
{
|
||||
# https://github.com/rg3/youtube-dl/pull/2274
|
||||
'note': 'Video with subtitles',
|
||||
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||
'info_dict': {
|
||||
'id': '6586561',
|
||||
'ext': 'mp4',
|
||||
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||
'description': 'One-Zero-One',
|
||||
'timestamp': 1371261608,
|
||||
'upload_date': '20130615',
|
||||
'uploader': 'redvsblue',
|
||||
'uploader_id': '792887',
|
||||
'duration': 279,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# https://github.com/rg3/youtube-dl/pull/2274
|
||||
'note': 'Video with subtitles',
|
||||
'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
|
||||
'md5': '309f9d25b820b086ca163ffac8031806',
|
||||
'info_dict': {
|
||||
'id': '6586561',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Red vs. Blue',
|
||||
'description': 'One-Zero-One',
|
||||
'upload_date': '20130614',
|
||||
'title': 'Red vs. Blue Season 11 Episode 1',
|
||||
}
|
||||
}]
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
presumptive_id = mobj.group('presumptive_id')
|
||||
lookup_id = mobj.group('lookup_id')
|
||||
|
||||
# See https://github.com/rg3/youtube-dl/issues/857
|
||||
embed_mobj = re.match(r'https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url)
|
||||
if embed_mobj:
|
||||
info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1)
|
||||
info_page = self._download_webpage(info_url, embed_mobj.group(1))
|
||||
video_id = self._search_regex(
|
||||
r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||
return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV')
|
||||
|
||||
cchar = '&' if '?' in url else '?'
|
||||
json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
|
||||
request = compat_urllib_request.Request(json_url)
|
||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||
|
||||
json_data = self._download_json(request, video_id=presumptive_id)
|
||||
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
if lookup_id:
|
||||
info_page = self._download_webpage(
|
||||
'http://blip.tv/play/%s.x?p=1' % lookup_id, lookup_id, 'Resolving lookup id')
|
||||
video_id = self._search_regex(r'data-episode-id="([0-9]+)', info_page, 'video_id')
|
||||
else:
|
||||
data = json_data
|
||||
video_id = mobj.group('id')
|
||||
|
||||
rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
|
||||
|
||||
def blip(s):
|
||||
return '{http://blip.tv/dtd/blip/1.0}%s' % s
|
||||
|
||||
def media(s):
|
||||
return '{http://search.yahoo.com/mrss/}%s' % s
|
||||
|
||||
def itunes(s):
|
||||
return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s
|
||||
|
||||
item = rss.find('channel/item')
|
||||
|
||||
video_id = item.find(blip('item_id')).text
|
||||
title = item.find('./title').text
|
||||
description = clean_html(compat_str(item.find(blip('puredescription')).text))
|
||||
timestamp = parse_iso8601(item.find(blip('datestamp')).text)
|
||||
uploader = item.find(blip('user')).text
|
||||
uploader_id = item.find(blip('userid')).text
|
||||
duration = int(item.find(blip('runtime')).text)
|
||||
media_thumbnail = item.find(media('thumbnail'))
|
||||
thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text
|
||||
categories = [category.text for category in item.findall('category')]
|
||||
|
||||
video_id = compat_str(data['item_id'])
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
subtitles = {}
|
||||
formats = []
|
||||
if 'additionalMedia' in data:
|
||||
for f in data['additionalMedia']:
|
||||
if f.get('file_type_srt') == 1:
|
||||
LANGS = {
|
||||
'english': 'en',
|
||||
}
|
||||
lang = f['role'].rpartition('-')[-1].strip().lower()
|
||||
langcode = LANGS.get(lang, lang)
|
||||
subtitles[langcode] = f['url']
|
||||
continue
|
||||
if not int(f['media_width']): # filter m3u8
|
||||
continue
|
||||
subtitles = {}
|
||||
|
||||
media_group = item.find(media('group'))
|
||||
for media_content in media_group.findall(media('content')):
|
||||
url = media_content.get('url')
|
||||
role = media_content.get(blip('role'))
|
||||
msg = self._download_webpage(
|
||||
url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
|
||||
video_id, 'Resolving URL for %s' % role)
|
||||
real_url = compat_urlparse.parse_qs(msg)['message'][0]
|
||||
|
||||
media_type = media_content.get('type')
|
||||
if media_type == 'text/srt' or url.endswith('.srt'):
|
||||
LANGS = {
|
||||
'english': 'en',
|
||||
}
|
||||
lang = role.rpartition('-')[-1].strip().lower()
|
||||
langcode = LANGS.get(lang, lang)
|
||||
subtitles[langcode] = url
|
||||
elif media_type.startswith('video/'):
|
||||
formats.append({
|
||||
'url': f['url'],
|
||||
'format_id': f['role'],
|
||||
'width': int(f['media_width']),
|
||||
'height': int(f['media_height']),
|
||||
'url': real_url,
|
||||
'format_id': role,
|
||||
'format_note': media_type,
|
||||
'vcodec': media_content.get(blip('vcodec')),
|
||||
'acodec': media_content.get(blip('acodec')),
|
||||
'filesize': media_content.get('filesize'),
|
||||
'width': int(media_content.get('width')),
|
||||
'height': int(media_content.get('height')),
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': data['media']['url'],
|
||||
'width': int(data['media']['width']),
|
||||
'height': int(data['media']['height']),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# subtitles
|
||||
@ -107,12 +129,14 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'subtitles': video_subtitles,
|
||||
}
|
||||
|
@ -4,17 +4,20 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class CanalplusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>\d+))'
|
||||
_VALID_URL = r'https?://(?:www\.canalplus\.fr/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
|
||||
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
|
||||
IE_NAME = 'canalplus.fr'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
|
||||
'md5': '60c29434a416a83c15dae2587d47027d',
|
||||
'md5': '3db39fb48b9685438ecf33a1078023e4',
|
||||
'info_dict': {
|
||||
'id': '922470',
|
||||
'ext': 'flv',
|
||||
@ -26,10 +29,13 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.groupdict().get('id')
|
||||
|
||||
# Beware, some subclasses do not define an id group
|
||||
display_id = url_basename(mobj.group('path'))
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, mobj.group('path'))
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'<canal:player videoId="(\d+)"', webpage, 'video id')
|
||||
|
||||
info_url = self._VIDEO_INFO_TEMPLATE % video_id
|
||||
@ -53,6 +59,7 @@ class CanalplusIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text,
|
||||
infos.find('TITRAGE/SOUS_TITRE').text),
|
||||
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
|
||||
|
@ -1,10 +1,12 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -13,9 +15,10 @@ class CinemassacreIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'file': '19911.mp4',
|
||||
'md5': '782f8504ca95a0eba8fc9177c373eec7',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'id': '19911',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
@ -23,9 +26,10 @@ class CinemassacreIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'file': '521be8ef82b16.mp4',
|
||||
'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'id': '521be8ef82b16',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
@ -50,29 +54,40 @@ class CinemassacreIE(InfoExtractor):
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
|
||||
playerdata = self._download_webpage(playerdata_url, video_id)
|
||||
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage')
|
||||
video_thumbnail = self._search_regex(
|
||||
r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
||||
sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||
videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url')
|
||||
|
||||
sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file')
|
||||
hd_url = self._html_search_regex(
|
||||
r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file',
|
||||
default=None)
|
||||
video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False)
|
||||
videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML')
|
||||
|
||||
formats = [{
|
||||
'url': sd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'sd',
|
||||
'format_id': 'sd',
|
||||
'quality': 1,
|
||||
}]
|
||||
if hd_url:
|
||||
formats.append({
|
||||
'url': hd_url,
|
||||
'ext': 'mp4',
|
||||
'format': 'hd',
|
||||
'format_id': 'hd',
|
||||
'quality': 2,
|
||||
})
|
||||
formats = []
|
||||
baseurl = sd_url[:sd_url.rfind('/')+1]
|
||||
for video in videolist.findall('.//video'):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
file_ = src.partition(':')[-1]
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
bitrate = int_or_none(video.get('system-bitrate'))
|
||||
format = {
|
||||
'url': baseurl + file_,
|
||||
'format_id': src.rpartition('.')[0].rpartition('_')[-1],
|
||||
}
|
||||
if width or height:
|
||||
format.update({
|
||||
'tbr': bitrate // 1000 if bitrate else None,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
else:
|
||||
format.update({
|
||||
'abr': bitrate // 1000 if bitrate else None,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
formats.append(format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
58
youtube_dl/extractor/clubic.py
Normal file
58
youtube_dl/extractor/clubic.py
Normal file
@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class ClubicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/[^/]+/video.*-(?P<id>[0-9]+)\.html'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',
|
||||
'md5': '1592b694ba586036efac1776b0b43cd3',
|
||||
'info_dict': {
|
||||
'id': '448474',
|
||||
'ext': 'mp4',
|
||||
'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité',
|
||||
'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*',
|
||||
'thumbnail': 're:^http://img\.clubic\.com/.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
|
||||
config_json = self._search_regex(
|
||||
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
|
||||
'configuration')
|
||||
config = json.loads(config_json)
|
||||
|
||||
video_info = config['videoInfo']
|
||||
sources = config['sources']
|
||||
quality_order = qualities(['sd', 'hq'])
|
||||
|
||||
formats = [{
|
||||
'format_id': src['streamQuality'],
|
||||
'url': src['src'],
|
||||
'quality': quality_order(src['streamQuality']),
|
||||
} for src in sources]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'formats': formats,
|
||||
'description': clean_html(video_info.get('description')),
|
||||
'thumbnail': config.get('poster'),
|
||||
}
|
@ -33,7 +33,7 @@ class CNETIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._html_search_regex(
|
||||
r"<div class=\"cnetVideoPlayer\" data-cnet-video-options='([^']+)'",
|
||||
r"<div class=\"cnetVideoPlayer\"\s+.*?data-cnet-video-options='([^']+)'",
|
||||
webpage, 'data json')
|
||||
data = json.loads(data_json)
|
||||
vdata = data['video']
|
||||
|
@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor):
|
||||
})
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format,
|
||||
'url': rtmp_video_url,
|
||||
'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
|
||||
'ext': self._video_extensions.get(format, 'mp4'),
|
||||
'height': h,
|
||||
'width': w,
|
||||
|
@ -113,6 +113,8 @@ class InfoExtractor(object):
|
||||
webpage_url: The url to the video webpage, if given to youtube-dl it
|
||||
should allow to get the same result again. (It will be set
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@ -242,10 +244,11 @@ class InfoExtractor(object):
|
||||
url = url_or_request.get_full_url()
|
||||
except AttributeError:
|
||||
url = url_or_request
|
||||
if len(url) > 200:
|
||||
h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest()
|
||||
url = url[:200 - len(h)] + h
|
||||
raw_filename = ('%s_%s.dump' % (video_id, url))
|
||||
basen = '%s_%s' % (video_id, url)
|
||||
if len(basen) > 240:
|
||||
h = u'___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||
basen = basen[:240 - len(h)] + h
|
||||
raw_filename = basen + '.dump'
|
||||
filename = sanitize_filename(raw_filename, restricted=True)
|
||||
self.to_screen(u'Saving request to ' + filename)
|
||||
with open(filename, 'wb') as outf:
|
||||
@ -279,9 +282,12 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note=u'Downloading XML', errnote=u'Unable to download XML',
|
||||
transform_source=None):
|
||||
transform_source=None, fatal=True):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||
if xml_string is False:
|
||||
return xml_string
|
||||
if transform_source:
|
||||
xml_string = transform_source(xml_string)
|
||||
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||
@ -545,6 +551,23 @@ class InfoExtractor(object):
|
||||
)
|
||||
formats.sort(key=_formats_key)
|
||||
|
||||
def http_scheme(self):
|
||||
""" Either "https:" or "https:", depending on the user's preferences """
|
||||
return (
|
||||
'http:'
|
||||
if self._downloader.params.get('prefer_insecure', False)
|
||||
else 'https:')
|
||||
|
||||
def _proto_relative_url(self, url, scheme=None):
|
||||
if url is None:
|
||||
return url
|
||||
if url.startswith('//'):
|
||||
if scheme is None:
|
||||
scheme = self.http_scheme()
|
||||
return scheme + url
|
||||
else:
|
||||
return url
|
||||
|
||||
|
||||
class SearchInfoExtractor(InfoExtractor):
|
||||
"""
|
||||
@ -588,3 +611,4 @@ class SearchInfoExtractor(InfoExtractor):
|
||||
@property
|
||||
def SEARCH_KEY(self):
|
||||
return self._SEARCH_KEY
|
||||
|
||||
|
@ -11,8 +11,8 @@ from ..utils import (
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
int_or_none,
|
||||
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
@ -188,7 +188,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
webpage = self._download_webpage(request,
|
||||
id, u'Downloading page %s' % pagenum)
|
||||
|
||||
video_ids.extend(re.findall(r'data-id="(.+?)"', webpage))
|
||||
video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
|
||||
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
@ -217,9 +217,9 @@ class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
webpage = self._download_webpage(url, user)
|
||||
full_user = self._html_search_regex(
|
||||
r'<a class="label" href="/%s".*?>(.*?)</' % re.escape(user),
|
||||
webpage, u'user', flags=re.DOTALL)
|
||||
full_user = unescapeHTML(self._html_search_regex(
|
||||
r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
|
||||
webpage, u'user', flags=re.DOTALL))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
54
youtube_dl/extractor/empflix.py
Normal file
54
youtube_dl/extractor/empflix.py
Normal file
@ -0,0 +1,54 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EmpflixIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
|
||||
'md5': 'b1bc15b6412d33902d6e5952035fcabc',
|
||||
'info_dict': {
|
||||
'id': '33051',
|
||||
'ext': 'mp4',
|
||||
'title': 'Amateur Finger Fuck',
|
||||
'description': 'Amateur solo finger fucking.',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
|
||||
|
||||
cfg_url = self._html_search_regex(
|
||||
r'flashvars\.config = escape\("([^"]+)"',
|
||||
webpage, 'flashvars.config')
|
||||
|
||||
cfg_xml = self._download_xml(
|
||||
cfg_url, video_id, note='Downloading metadata')
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': item.find('videoLink').text,
|
||||
'format_id': item.find('res').text,
|
||||
} for item in cfg_xml.findall('./quality/item')
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
}
|
@ -76,9 +76,8 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||
'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
|
||||
'name_action_selected': 'dont_save',
|
||||
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
60
youtube_dl/extractor/fc2.py
Normal file
60
youtube_dl/extractor/fc2.py
Normal file
@ -0,0 +1,60 @@
|
||||
#! -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class FC2IE(InfoExtractor):
|
||||
_VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
|
||||
IE_NAME = 'fc2'
|
||||
_TEST = {
|
||||
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
|
||||
'md5': 'a6ebe8ebe0396518689d963774a54eb7',
|
||||
'info_dict': {
|
||||
'id': '20121103kUan1KHs',
|
||||
'ext': 'flv',
|
||||
'title': 'Boxing again with Puff',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
self._downloader.cookiejar.clear_session_cookies() # must clear
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
refer = url.replace('/content/', '/a/content/')
|
||||
|
||||
mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()
|
||||
|
||||
info_url = (
|
||||
"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
|
||||
format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.','%2E')))
|
||||
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note='Downloading info page')
|
||||
info = compat_urlparse.parse_qs(info_webpage)
|
||||
|
||||
if 'err_code' in info:
|
||||
raise ExtractorError('Error code: %s' % info['err_code'][0])
|
||||
|
||||
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'][0],
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -58,9 +59,17 @@ class FiveMinIE(InfoExtractor):
|
||||
'isPlayerSeed': 'true',
|
||||
'url': embed_url,
|
||||
})
|
||||
info = self._download_json(
|
||||
response = self._download_json(
|
||||
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
|
||||
video_id)['binding'][0]
|
||||
video_id)
|
||||
if not response['success']:
|
||||
err_msg = response['errorMessage']
|
||||
if err_msg == 'ErrorVideoUserNotGeo':
|
||||
msg = 'Video not available from your location'
|
||||
else:
|
||||
msg = 'Aol said: %s' % err_msg
|
||||
raise ExtractorError(msg, expected=True, video_id=video_id)
|
||||
info = response['binding'][0]
|
||||
|
||||
second_id = compat_str(int(video_id[:-2]) + 1)
|
||||
formats = []
|
||||
|
@ -48,24 +48,36 @@ class PluzzIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'francetvinfo.fr'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+)\.html'
|
||||
_VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||
'file': '84981923.mp4',
|
||||
'info_dict': {
|
||||
'id': '84981923',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soir 3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
|
||||
'info_dict': {
|
||||
'id': 'EV_20019',
|
||||
'ext': 'mp4',
|
||||
'title': 'Débat des candidats à la Commission européenne',
|
||||
'description': 'Débat des candidats à la Commission européenne',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'HLS (reqires ffmpeg)'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
video_id = self._search_regex(r'id-video=(\d+?)[@"]', webpage, 'video id')
|
||||
video_id = self._search_regex(r'id-video=((?:[^0-9]*?_)?[0-9]+)[@"]', webpage, 'video id')
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
|
@ -4,22 +4,32 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
'file': '0732f586d7.mp4',
|
||||
'md5': 'f647e9e90064b53b6e046e75d0241fbd',
|
||||
'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9',
|
||||
'info_dict': {
|
||||
'description': ('Lyrics changed to match the video. Spoken cameo '
|
||||
'by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a '
|
||||
'concept by Dustin McLean (DustFilms.com). Performed, edited, '
|
||||
'and written by David A. Scott.'),
|
||||
'id': '0732f586d7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heart-Shaped Box: Literal Video Version',
|
||||
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||
'md5': 'ff4d83318f89776ed0250634cfaa8d36',
|
||||
'info_dict': {
|
||||
'id': 'e402820827',
|
||||
'ext': 'mp4',
|
||||
'title': 'Please Use This Song (Jon Lajoie)',
|
||||
'description': 'md5:2ed27d364f5a805a6dba199faaf6681d',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -27,27 +37,34 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||
webpage, 'video URL', flags=re.DOTALL)
|
||||
links = re.findall(r'<source src="([^"]+/v)\d+\.([^"]+)" type=\'video', webpage)
|
||||
if not links:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
if mobj.group('type') == 'embed':
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)
|
||||
title = post['name']
|
||||
description = post.get('description')
|
||||
thumbnail = post.get('picture')
|
||||
else:
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = None
|
||||
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
|
||||
|
||||
bitrates = self._html_search_regex(r'<source src="[^"]+/v,((?:\d+,)+)\.mp4\.csmil', webpage, 'video bitrates')
|
||||
bitrates = [int(b) for b in bitrates.rstrip(',').split(',')]
|
||||
bitrates.sort()
|
||||
|
||||
formats = []
|
||||
|
||||
for bitrate in bitrates:
|
||||
for link in links:
|
||||
formats.append({
|
||||
'url': '%s%d.%s' % (link[0], bitrate, link[1]),
|
||||
'format_id': '%s-%d' % (link[1], bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'title': post['name'],
|
||||
'description': post.get('description'),
|
||||
'thumbnail': post.get('picture'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ class GamekingsIE(InfoExtractor):
|
||||
'id': '20130811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
|
||||
'description': 'md5:36fd701e57e8c15ac8682a2374c99731',
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,11 +15,12 @@ from ..utils import (
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
|
||||
_TEST = {
|
||||
"url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
|
||||
"file": "gs-2300-6410818.mp4",
|
||||
"md5": "b2a30deaa8654fcccd43713a6b6a4825",
|
||||
"info_dict": {
|
||||
"title": "Arma 3 - Community Guide: SITREP I",
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||
'info_dict': {
|
||||
'id': 'gs-2300-6410818',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arma 3 - Community Guide: SITREP I',
|
||||
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
||||
}
|
||||
}
|
||||
|
@ -248,6 +248,18 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
|
||||
}
|
||||
},
|
||||
# Dailymotion embed
|
||||
{
|
||||
'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
|
||||
'md5': '441aeeb82eb72c422c7f14ec533999cd',
|
||||
'info_dict': {
|
||||
'id': 'k2mm4bCdJ6CQ2i7c8o2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
|
||||
'uploader': 'Spi0n',
|
||||
},
|
||||
'add_ie': ['Dailymotion'],
|
||||
}
|
||||
]
|
||||
|
||||
@ -333,6 +345,12 @@ class GenericIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': self.http_scheme() + url,
|
||||
}
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
default_search = self._downloader.params.get('default_search')
|
||||
@ -345,8 +363,13 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result('http://' + url)
|
||||
else:
|
||||
if default_search == 'auto_warning':
|
||||
self._downloader.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
|
||||
if re.match(r'^(?:url|URL)$', url):
|
||||
raise ExtractorError(
|
||||
'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
|
||||
expected=True)
|
||||
else:
|
||||
self._downloader.report_warning(
|
||||
'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
|
||||
return self.url_result('ytsearch:' + url)
|
||||
else:
|
||||
assert ':' in default_search
|
||||
@ -469,7 +492,7 @@ class GenericIE(InfoExtractor):
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion')
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]))
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
@ -531,7 +554,7 @@ class GenericIE(InfoExtractor):
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
# Look for Aparat videos
|
||||
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||
mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group(1), 'Aparat')
|
||||
|
||||
@ -542,7 +565,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded NovaMov-based player
|
||||
mobj = re.search(
|
||||
r'''(?x)<iframe[^>]+?src=(["\'])
|
||||
r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
|
||||
(?P<url>http://(?:(?:embed|www)\.)?
|
||||
(?:novamov\.com|
|
||||
nowvideo\.(?:ch|sx|eu|at|ag|co)|
|
||||
@ -615,65 +638,86 @@ class GenericIE(InfoExtractor):
|
||||
if smotri_url:
|
||||
return self.url_result(smotri_url, 'Smotri')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Look for gorilla-vid style embedding
|
||||
mobj = re.search(r'(?s)(?:jw_plugins|JWPlayerOptions).*?file\s*:\s*["\'](.*?)["\']', webpage)
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit
|
||||
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit: JWPlayer JS loader
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||
# Look for embeded soundcloud player
|
||||
mobj = re.search(
|
||||
r'<iframe src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
url = unescapeHTML(mobj.group('url'))
|
||||
return self.url_result(url)
|
||||
|
||||
if mobj is None:
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
# Look for gorilla-vid style embedding
|
||||
found = re.findall(r'''(?sx)
|
||||
(?:
|
||||
jw_plugins|
|
||||
JWPlayerOptions|
|
||||
jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
|
||||
)
|
||||
.*?file\s*:\s*["\'](.*?)["\']''', webpage)
|
||||
if not found:
|
||||
# Broaden the search a little bit
|
||||
found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
# Broaden the findall a little bit: JWPlayer JS loader
|
||||
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
if mobj is None:
|
||||
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
if not found:
|
||||
# We look for Open Graph info:
|
||||
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
|
||||
m_video_type = re.search(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
if not found:
|
||||
# HTML5 video
|
||||
mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
|
||||
if not found:
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'([^\']+)\'"',
|
||||
webpage)
|
||||
if mobj:
|
||||
new_url = mobj.group(1)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
self.report_following_redirect(new_url)
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': new_url,
|
||||
}
|
||||
if mobj is None:
|
||||
if not found:
|
||||
raise ExtractorError('Unsupported URL: %s' % url)
|
||||
|
||||
# It's possible that one of the regexes
|
||||
# matched, but returned an empty group:
|
||||
if mobj.group(1) is None:
|
||||
raise ExtractorError('Did not find a valid video URL at %s' % url)
|
||||
entries = []
|
||||
for video_url in found:
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||
|
||||
video_url = mobj.group(1)
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
|
||||
# Sometimes, jwplayer extraction will result in a YouTube URL
|
||||
if YoutubeIE.suitable(video_url):
|
||||
entries.append(self.url_result(video_url, 'Youtube'))
|
||||
continue
|
||||
|
||||
# Sometimes, jwplayer extraction will result in a YouTube URL
|
||||
if YoutubeIE.suitable(video_url):
|
||||
return self.url_result(video_url, 'Youtube')
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
else:
|
||||
for num, e in enumerate(entries, start=1):
|
||||
e['title'] = '%s (%d)' % (e['title'], num)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
}
|
||||
|
42
youtube_dl/extractor/hentaistigma.py
Normal file
42
youtube_dl/extractor/hentaistigma.py
Normal file
@ -0,0 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HentaiStigmaIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
|
||||
'md5': '4e3d07422a68a4cc363d8f57c8bf0d23',
|
||||
'info_dict': {
|
||||
'id': 'inyouchuu-etsu-bonus',
|
||||
'ext': 'mp4',
|
||||
"title": "Inyouchuu Etsu Bonus",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
wrap_url = self._html_search_regex(
|
||||
r'<iframe src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
}
|
@ -5,8 +5,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class StatigramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)'
|
||||
class IconosquareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
||||
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
||||
@ -15,6 +15,7 @@ class StatigramIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'aguynamedpatrick',
|
||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
||||
},
|
||||
}
|
||||
|
||||
@ -25,7 +26,7 @@ class StatigramIE(InfoExtractor):
|
||||
html_title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>',
|
||||
webpage, 'title')
|
||||
title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title)
|
||||
title = re.sub(r'(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)$', '', html_title)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'@([^ ]+)', title, 'uploader name', fatal=False)
|
||||
|
||||
@ -33,6 +34,7 @@ class StatigramIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id': uploader_id
|
||||
}
|
@ -33,14 +33,14 @@ class IviIE(InfoExtractor):
|
||||
},
|
||||
# Serial's serie
|
||||
{
|
||||
'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
'md5': '3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
|
||||
'md5': '221f56b35e3ed815fde2df71032f4b3e',
|
||||
'info_dict': {
|
||||
'id': '74791',
|
||||
'id': '9549',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дежурный ангел - 1 серия',
|
||||
'duration': 2490,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
'title': 'Двое из ларца - Серия 1',
|
||||
'duration': 2655,
|
||||
'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg',
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
|
||||
'md5': '1574e9b4d6438446d5b7dbcdf2786276',
|
||||
'info_dict': {
|
||||
'id': 'r303r',
|
||||
'ext': 'flv',
|
||||
|
@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
@ -10,28 +9,48 @@ from .common import InfoExtractor
|
||||
class MailRuIE(InfoExtractor):
|
||||
IE_NAME = 'mailru'
|
||||
IE_DESC = 'Видео@Mail.Ru'
|
||||
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
|
||||
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||
'info_dict': {
|
||||
'id': '46301138',
|
||||
'ext': 'mp4',
|
||||
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||
'upload_date': '20140224',
|
||||
'uploader': 'sonypicturesrus',
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
}
|
||||
}
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||
'info_dict': {
|
||||
'id': '46301138',
|
||||
'ext': 'mp4',
|
||||
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||
'timestamp': 1393232740,
|
||||
'upload_date': '20140224',
|
||||
'uploader': 'sonypicturesrus',
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
|
||||
'md5': '00a91a58c3402204dcced523777b475f',
|
||||
'info_dict': {
|
||||
'id': '46843144',
|
||||
'ext': 'mp4',
|
||||
'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
|
||||
'timestamp': 1397217632,
|
||||
'upload_date': '20140411',
|
||||
'uploader': 'hitech',
|
||||
'uploader_id': 'hitech@corp.mail.ru',
|
||||
'duration': 245,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.group('idv1')
|
||||
|
||||
if not video_id:
|
||||
video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
@ -40,10 +59,11 @@ class MailRuIE(InfoExtractor):
|
||||
movie = video_data['movie']
|
||||
content_id = str(movie['contentId'])
|
||||
title = movie['title']
|
||||
if title.endswith('.mp4'):
|
||||
title = title[:-4]
|
||||
thumbnail = movie['poster']
|
||||
duration = movie['duration']
|
||||
|
||||
upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
|
||||
view_count = video_data['views_count']
|
||||
|
||||
formats = [
|
||||
@ -57,7 +77,7 @@ class MailRuIE(InfoExtractor):
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': video_data['timestamp'],
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
|
@ -3,9 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
|
@ -4,9 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):
|
||||
'uploader': 'Daniel Holbach',
|
||||
'uploader_id': 'dholbach',
|
||||
'upload_date': '20111115',
|
||||
'timestamp': 1321359578,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
@ -51,10 +56,6 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
|
||||
info = self._download_json(
|
||||
api_url, track_id, 'Downloading cloudcast info')
|
||||
|
||||
preview_url = self._search_regex(
|
||||
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
|
||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||
@ -65,16 +66,41 @@ class MixcloudIE(InfoExtractor):
|
||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||
final_song_url = self._get_url(template_url)
|
||||
if final_song_url is None:
|
||||
raise ExtractorError(u'Unable to extract track url')
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
|
||||
PREFIX = (
|
||||
r'<div class="cloudcast-play-button-container"'
|
||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||
title = self._html_search_regex(
|
||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||
thumbnail = self._proto_relative_url(self._html_search_regex(
|
||||
PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
|
||||
fatal=False))
|
||||
uploader = self._html_search_regex(
|
||||
PREFIX + r'm-owner-name="([^"]+)"',
|
||||
webpage, 'uploader', fatal=False)
|
||||
uploader_id = self._search_regex(
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
||||
webpage, 'like count', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
webpage, 'play count', fatal=False))
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
||||
webpage, 'upload date'))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': info['name'],
|
||||
'title': title,
|
||||
'url': final_song_url,
|
||||
'description': info.get('description'),
|
||||
'thumbnail': info['pictures'].get('extra_large'),
|
||||
'uploader': info['user']['name'],
|
||||
'uploader_id': info['user']['username'],
|
||||
'upload_date': unified_strdate(info['created_time']),
|
||||
'view_count': info['play_count'],
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
}
|
||||
|
45
youtube_dl/extractor/moviezine.py
Normal file
45
youtube_dl/extractor/moviezine.py
Normal file
@ -0,0 +1,45 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MoviezineIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.moviezine.se/video/205866',
|
||||
'info_dict': {
|
||||
'id': '205866',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oculus - Trailer 1',
|
||||
'description': 'md5:40cc6790fc81d931850ca9249b40e8a4',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
|
||||
|
||||
formats =[{
|
||||
'format_id': 'sd',
|
||||
'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'),
|
||||
'quality': 0,
|
||||
'ext': 'mp4',
|
||||
}]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
|
||||
'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import find_xpath_attr, compat_str
|
||||
@ -31,30 +32,68 @@ class NBCIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
|
||||
((video/.+?/(?P<id>\d+))|
|
||||
(feature/[^/]+/(?P<title>.+)))
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
'info_dict': {
|
||||
'id': '52753292',
|
||||
'ext': 'flv',
|
||||
'title': 'Crew emerges after four-month Mars food study',
|
||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
||||
'info_dict': {
|
||||
'id': '52753292',
|
||||
'ext': 'flv',
|
||||
'title': 'Crew emerges after four-month Mars food study',
|
||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||
},
|
||||
},
|
||||
}
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236',
|
||||
'md5': 'b2421750c9f260783721d898f4c42063',
|
||||
'info_dict': {
|
||||
'id': 'I1wpAI_zmhsQ',
|
||||
'ext': 'flv',
|
||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = all_info.find('video')
|
||||
if video_id is not None:
|
||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||
info = all_info.find('video')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': compat_str(info.find('caption').text),
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('headline').text,
|
||||
'ext': 'flv',
|
||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||
'description': compat_str(info.find('caption').text),
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
else:
|
||||
# "feature" pages use theplatform.com
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
bootstrap_json = self._search_regex(
|
||||
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
|
||||
flags=re.MULTILINE)
|
||||
bootstrap = json.loads(bootstrap_json)
|
||||
info = bootstrap['results'][0]['video']
|
||||
playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
|
||||
mpxid = info['mpxId']
|
||||
all_videos = self._download_json(playlist_url, title)['videos']
|
||||
# The response contains additional videos
|
||||
info = next(v for v in all_videos if v['mpxId'] == mpxid)
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
# We get the best quality video
|
||||
'url': info['videoAssets'][-1]['publicUrl'],
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@ -4,7 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class NDRIE(InfoExtractor):
|
||||
@ -45,17 +49,16 @@ class NDRIE(InfoExtractor):
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
title = self._og_search_title(page)
|
||||
title = self._og_search_title(page).strip()
|
||||
description = self._og_search_description(page)
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
mobj = re.search(
|
||||
r'<div class="duration"><span class="min">(?P<minutes>\d+)</span>:<span class="sec">(?P<seconds>\d+)</span></div>',
|
||||
page)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||
mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
|
||||
if mp3_url:
|
||||
formats.append({
|
||||
'url': mp3_url.group('audio'),
|
||||
@ -64,13 +67,15 @@ class NDRIE(InfoExtractor):
|
||||
|
||||
thumbnail = None
|
||||
|
||||
video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
|
||||
if video_url:
|
||||
thumbnail = self._html_search_regex(r'(?m)title: "NDR PLAYER",\s*poster: "([^"]+)",',
|
||||
page, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.ndr.de' + thumbnail
|
||||
for format_id in ['lo', 'hi', 'hq']:
|
||||
thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
|
||||
if thumbnails:
|
||||
quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
|
||||
largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
|
||||
thumbnail = 'http://www.ndr.de' + largest[0]
|
||||
|
||||
for format_id in 'lo', 'hi', 'hq':
|
||||
formats.append({
|
||||
'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
|
||||
'format_id': format_id,
|
||||
|
87
youtube_dl/extractor/newstube.py
Normal file
87
youtube_dl/extractor/newstube.py
Normal file
@ -0,0 +1,87 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NewstubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs',
|
||||
'info_dict': {
|
||||
'id': 'd156a237-a6e9-4111-a682-039995f721f1',
|
||||
'ext': 'flv',
|
||||
'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»',
|
||||
'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77',
|
||||
'duration': 20.04,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_guid = self._html_search_regex(
|
||||
r'<meta property="og:video" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
page, 'video GUID')
|
||||
|
||||
player = self._download_xml(
|
||||
'http://p.newstube.ru/v2/player.asmx/GetAutoPlayInfo6?state=&url=%s&sessionId=&id=%s&placement=profile&location=n2' % (url, video_guid),
|
||||
video_guid, 'Downloading player XML')
|
||||
|
||||
def ns(s):
|
||||
return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
|
||||
|
||||
session_id = player.find(ns('./SessionId')).text
|
||||
media_info = player.find(ns('./Medias/MediaInfo'))
|
||||
title = media_info.find(ns('./Name')).text
|
||||
description = self._og_search_description(page)
|
||||
thumbnail = media_info.find(ns('./KeyFrame')).text
|
||||
duration = int(media_info.find(ns('./Duration')).text) / 1000.0
|
||||
|
||||
formats = []
|
||||
|
||||
for stream_info in media_info.findall(ns('./Streams/StreamInfo')):
|
||||
media_location = stream_info.find(ns('./MediaLocation'))
|
||||
if media_location is None:
|
||||
continue
|
||||
|
||||
server = media_location.find(ns('./Server')).text
|
||||
app = media_location.find(ns('./App')).text
|
||||
media_id = stream_info.find(ns('./Id')).text
|
||||
quality_id = stream_info.find(ns('./QualityId')).text
|
||||
name = stream_info.find(ns('./Name')).text
|
||||
width = int(stream_info.find(ns('./Width')).text)
|
||||
height = int(stream_info.find(ns('./Height')).text)
|
||||
|
||||
formats.append({
|
||||
'url': 'rtmp://%s/%s' % (server, app),
|
||||
'app': app,
|
||||
'play_path': '01/%s' % video_guid.upper(),
|
||||
'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
|
||||
'page_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': quality_id,
|
||||
'format_note': name,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_guid,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@ -73,14 +73,16 @@ class NFBIE(InfoExtractor):
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
formats = [{
|
||||
'url': x.find('default/streamerURI').text,
|
||||
'app': x.find('default/streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('default/url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': x.get('quality'),
|
||||
} for x in media.findall('assets/asset')]
|
||||
for asset in media.findall('assets/asset'):
|
||||
for x in asset:
|
||||
formats.append({
|
||||
'url': x.find('streamerURI').text,
|
||||
'app': x.find('streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('url').text,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s-%s' % (x.tag, asset.get('quality')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
post_view = json.loads(self._html_search_regex(
|
||||
r'var postView = new app\.PostView\({ post: ({.+?}),', webpage, 'post view'))
|
||||
r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view'))
|
||||
|
||||
youtube_id = post_view['videoExternalId']
|
||||
title = post_view['title']
|
||||
|
@ -26,7 +26,8 @@ class NocoIE(InfoExtractor):
|
||||
'uploader': 'Nolife',
|
||||
'uploader_id': 'NOL',
|
||||
'duration': 2851.2,
|
||||
}
|
||||
},
|
||||
'skip': 'Requires noco account',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -4,9 +4,7 @@ import re
|
||||
|
||||
from .brightcove import BrightcoveIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NownessIE(InfoExtractor):
|
||||
@ -14,9 +12,10 @@ class NownessIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
||||
'file': '2520295746001.mp4',
|
||||
'md5': '0ece2f70a7bd252c7b00f3070182d418',
|
||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||
'info_dict': {
|
||||
'id': '2520295746001',
|
||||
'ext': 'mp4',
|
||||
'description': 'Candor: The Art of Gesticulation',
|
||||
'uploader': 'Nowness',
|
||||
'title': 'Candor: The Art of Gesticulation',
|
||||
|
145
youtube_dl/extractor/nrk.py
Normal file
145
youtube_dl/extractor/nrk.py
Normal file
@ -0,0 +1,145 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NRKIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
|
||||
'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'flv',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
|
||||
'md5': '3471f2a51718195164e88f46bf427668',
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
|
||||
|
||||
data = self._download_json(
|
||||
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
|
||||
|
||||
if data['usageRights']['isGeoBlocked']:
|
||||
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
|
||||
|
||||
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
|
||||
|
||||
images = data.get('images')
|
||||
if images:
|
||||
thumbnails = images['webImages']
|
||||
thumbnails.sort(key=lambda image: image['pixelWidth'])
|
||||
thumbnail = thumbnails[-1]['imageUrl']
|
||||
else:
|
||||
thumbnail = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014',
|
||||
'md5': '7b96112fbae1faf09a6f9ae1aff6cb84',
|
||||
'info_dict': {
|
||||
'id': 'muhh48000314',
|
||||
'ext': 'flv',
|
||||
'title': '20 spørsmål',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'upload_date': '20140523',
|
||||
'duration': 1741.52,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://tv.nrk.no/program/mdfp15000514',
|
||||
'md5': '383650ece2b25ecec996ad7b5bb2a384',
|
||||
'info_dict': {
|
||||
'id': 'mdfp15000514',
|
||||
'ext': 'flv',
|
||||
'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
|
||||
'upload_date': '20140524',
|
||||
'duration': 4605.0,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta('title', page, 'title')
|
||||
description = self._html_search_meta('description', page, 'description')
|
||||
thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
|
||||
duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)
|
||||
if duration:
|
||||
duration = float(duration)
|
||||
|
||||
formats = []
|
||||
|
||||
f4m_url = re.search(r'data-media="([^"]+)"', page)
|
||||
if f4m_url:
|
||||
formats.append({
|
||||
'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
|
||||
'format_id': 'f4m',
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
|
||||
if m3u8_url:
|
||||
formats.append({
|
||||
'url': m3u8_url.group(1),
|
||||
'format_id': 'm3u8',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@ -24,9 +24,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 136,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/video/novosti/750370/',
|
||||
@ -38,9 +38,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 172,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
@ -52,9 +52,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 1496,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/kino/Koma_film',
|
||||
@ -66,9 +66,9 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 5592,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
|
||||
@ -80,33 +80,25 @@ class NTVIE(InfoExtractor):
|
||||
'duration': 2590,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||
r'<video restriction[^>]+><key>(\d+)</key>'
|
||||
r'<video restriction[^>]+><key>(\d+)</key>',
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
for pattern in self._VIDEO_ID_REGEXES:
|
||||
mobj = re.search(pattern, page)
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if not mobj:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
|
||||
|
||||
player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
|
||||
title = unescapeHTML(player.find('./data/title').text)
|
||||
@ -124,7 +116,7 @@ class NTVIE(InfoExtractor):
|
||||
'7': 'video2',
|
||||
}
|
||||
|
||||
app = apps[puid22] if puid22 in apps else apps['4']
|
||||
app = apps.get(puid22, apps['4'])
|
||||
|
||||
formats = []
|
||||
for format_id in ['', 'hi', 'webm']:
|
||||
|
48
youtube_dl/extractor/nuvid.py
Normal file
48
youtube_dl/extractor/nuvid.py
Normal file
@ -0,0 +1,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class NuvidIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://m.nuvid.com/video/1310741/',
|
||||
'md5': 'eab207b7ac4fccfb4e23c86201f11277',
|
||||
'info_dict': {
|
||||
'id': '1310741',
|
||||
'ext': 'mp4',
|
||||
"title": "Horny babes show their awesome bodeis and",
|
||||
"age_limit": 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
murl = url.replace('://www.', '://m.')
|
||||
webpage = self._download_webpage(murl, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>',
|
||||
webpage, 'title').strip()
|
||||
|
||||
url_end = self._html_search_regex(
|
||||
r'href="(/[^"]+)"[^>]*data-link_type="mp4"',
|
||||
webpage, 'video_url')
|
||||
video_url = 'http://m.nuvid.com' + url_end
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
77
youtube_dl/extractor/nytimes.py
Normal file
77
youtube_dl/extractor/nytimes.py
Normal file
@ -0,0 +1,77 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
|
||||
|
||||
class NYTimesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
title = video_data['headline']
|
||||
description = video_data['summary']
|
||||
duration = video_data['duration'] / 1000.0
|
||||
|
||||
uploader = video_data['byline']
|
||||
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
||||
|
||||
def get_file_size(file_size):
|
||||
if isinstance(file_size, int):
|
||||
return file_size
|
||||
elif isinstance(file_size, dict):
|
||||
return int(file_size.get('value', 0))
|
||||
else:
|
||||
return 0
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['type'],
|
||||
'vcodec': video['video_codec'],
|
||||
'width': video['width'],
|
||||
'height': video['height'],
|
||||
'filesize': get_file_size(video['fileSize']),
|
||||
} for video in video_data['renditions']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [
|
||||
{
|
||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||
'resolution': '%dx%d' % (image['width'], image['height']),
|
||||
} for image in video_data['images']
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse
|
||||
|
||||
|
||||
class PhotobucketIE(InfoExtractor):
|
||||
@ -14,6 +14,7 @@ class PhotobucketIE(InfoExtractor):
|
||||
'file': 'zpsc0c3b9fa.mp4',
|
||||
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
'info_dict': {
|
||||
'timestamp': 1367669341,
|
||||
'upload_date': '20130504',
|
||||
'uploader': 'rachaneronas',
|
||||
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
|
||||
@ -32,11 +33,12 @@ class PhotobucketIE(InfoExtractor):
|
||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||
webpage, 'info json')
|
||||
info = json.loads(info_json)
|
||||
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': info['downloadUrl'],
|
||||
'url': url,
|
||||
'uploader': info['username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
|
||||
'timestamp': info['creationDate'],
|
||||
'title': info['title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbUrl'],
|
||||
|
@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
|
||||
r'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
|
@ -8,8 +8,6 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
|
||||
|
||||
@ -188,16 +186,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
def extract(patterns, name, page, fatal=False):
|
||||
for pattern in patterns:
|
||||
mobj = re.search(pattern, page)
|
||||
if mobj:
|
||||
return clean_html(mobj.group(1))
|
||||
if fatal:
|
||||
raise RegexNotFoundError(u'Unable to extract %s' % name)
|
||||
return None
|
||||
|
||||
clip_id = extract(self._CLIPID_REGEXES, 'clip id', page, fatal=True)
|
||||
clip_id = self._html_search_regex(self._CLIPID_REGEXES, page, 'clip id')
|
||||
|
||||
access_token = 'testclient'
|
||||
client_name = 'kolibri-1.2.5'
|
||||
@ -246,13 +235,12 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
|
||||
urls = self._download_json(url_api_url, clip_id, 'Downloading urls JSON')
|
||||
|
||||
title = extract(self._TITLE_REGEXES, 'title', page, fatal=True)
|
||||
description = extract(self._DESCRIPTION_REGEXES, 'description', page)
|
||||
title = self._html_search_regex(self._TITLE_REGEXES, page, 'title')
|
||||
description = self._html_search_regex(self._DESCRIPTION_REGEXES, page, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(page)
|
||||
|
||||
upload_date = extract(self._UPLOAD_DATE_REGEXES, 'upload date', page)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
self._UPLOAD_DATE_REGEXES, page, 'upload date', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
|
49
youtube_dl/extractor/rtbf.py
Normal file
49
youtube_dl/extractor/rtbf.py
Normal file
@ -0,0 +1,49 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RTBFIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.rtbf.be/video/[^\?]+\?id=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
|
||||
'md5': '799f334ddf2c0a582ba80c44655be570',
|
||||
'info_dict': {
|
||||
'id': '1921274',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les Diables au coeur (épisode 2)',
|
||||
'description': 'Football - Diables Rouges',
|
||||
'duration': 3099,
|
||||
'timestamp': 1398456336,
|
||||
'upload_date': '20140425',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
|
||||
|
||||
data = json.loads(self._html_search_regex(
|
||||
r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
|
||||
|
||||
video_url = data.get('downloadUrl') or data.get('url')
|
||||
|
||||
if data['provider'].lower() == 'youtube':
|
||||
return self.url_result(video_url, 'Youtube')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': data['title'],
|
||||
'description': data.get('description') or data.get('subtitle'),
|
||||
'thumbnail': data['thumbnail']['large'],
|
||||
'duration': data.get('duration') or data.get('realDuration'),
|
||||
'timestamp': data['created'],
|
||||
'view_count': data['viewCount'],
|
||||
}
|
@ -47,7 +47,7 @@ class RutubeIE(InfoExtractor):
|
||||
author = video.get('author') or {}
|
||||
|
||||
options = self._download_json(
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' %video_id,
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading options JSON')
|
||||
|
||||
m3u8_url = options['video_balancer'].get('m3u8')
|
||||
|
@ -12,7 +12,12 @@ from ..utils import (
|
||||
|
||||
class RUTVIE(InfoExtractor):
|
||||
IE_DESC = 'RUTV.RU'
|
||||
_VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://player\.(?:rutv\.ru|vgtrk\.com)/
|
||||
(?P<path>flash2v/container\.swf\?id=
|
||||
|iframe/(?P<type>swf|video|live)/id/
|
||||
|index/iframe/cast_id/)
|
||||
(?P<id>\d+)'''
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@ -90,7 +95,7 @@ class RUTVIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
@ -103,10 +108,16 @@ class RUTVIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('type')
|
||||
video_path = mobj.group('path')
|
||||
|
||||
if not video_type or video_type == 'swf':
|
||||
if video_path.startswith('flash2v'):
|
||||
video_type = 'video'
|
||||
elif video_path.startswith('iframe'):
|
||||
video_type = mobj.group('type')
|
||||
if video_type == 'swf':
|
||||
video_type = 'video'
|
||||
elif video_path.startswith('index/iframe/cast_id'):
|
||||
video_type = 'live'
|
||||
|
||||
json_data = self._download_json(
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||
|
56
youtube_dl/extractor/scivee.py
Normal file
56
youtube_dl/extractor/scivee.py
Normal file
@ -0,0 +1,56 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class SciVeeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?scivee\.tv/node/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.scivee.tv/node/62352',
|
||||
'md5': 'b16699b74c9e6a120f6772a44960304f',
|
||||
'info_dict': {
|
||||
'id': '62352',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting',
|
||||
'description': 'md5:81f1710638e11a481358fab1b11059d7',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# annotations XML is malformed
|
||||
annotations = self._download_webpage(
|
||||
'http://www.scivee.tv/assets/annotations/%s' % video_id, video_id, 'Downloading annotations')
|
||||
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', annotations, 'title')
|
||||
description = self._html_search_regex(r'<abstract>([^<]+)</abstract>', annotations, 'abstract', fatal=False)
|
||||
filesize = int_or_none(self._html_search_regex(
|
||||
r'<filesize>([^<]+)</filesize>', annotations, 'filesize', fatal=False))
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': 'http://www.scivee.tv/assets/audio/%s' % video_id,
|
||||
'ext': 'mp3',
|
||||
'format_id': 'audio',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.scivee.tv/assets/video/%s' % video_id,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'video',
|
||||
'filesize': filesize,
|
||||
},
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': 'http://www.scivee.tv/assets/videothumb/%s' % video_id,
|
||||
'formats': formats,
|
||||
}
|
47
youtube_dl/extractor/slutload.py
Normal file
47
youtube_dl/extractor/slutload.py
Normal file
@ -0,0 +1,47 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class SlutloadIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
||||
_TEST = {
|
||||
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
||||
'md5': '0cf531ae8006b530bd9df947a6a0df77',
|
||||
'info_dict': {
|
||||
'id': 'TD73btpBqSxc',
|
||||
'ext': 'mp4',
|
||||
"title": "virginie baisee en cam",
|
||||
"age_limit": 18,
|
||||
'thumbnail': 're:https?://.*?\.jpg'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
||||
webpage, 'title').strip()
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
||||
webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18
|
||||
}
|
@ -25,7 +25,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
of the stream token and uid
|
||||
"""
|
||||
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
_VALID_URL = r'''(?x)^(?:https?://)?
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!sets/)(?P<title>[\w\d-]+)/?
|
||||
@ -94,10 +94,6 @@ class SoundcloudIE(InfoExtractor):
|
||||
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
|
||||
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
|
||||
|
||||
def report_resolve(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen('%s: Resolving id' % video_id)
|
||||
@ -141,11 +137,10 @@ class SoundcloudIE(InfoExtractor):
|
||||
# We have to retrieve the url
|
||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||
stream_json = self._download_webpage(
|
||||
format_dict = self._download_json(
|
||||
streams_url,
|
||||
track_id, 'Downloading track url')
|
||||
|
||||
format_dict = json.loads(stream_json)
|
||||
for key, stream_url in format_dict.items():
|
||||
if key.startswith('http'):
|
||||
formats.append({
|
||||
@ -198,7 +193,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
full_title = track_id
|
||||
elif mobj.group('player'):
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
return self.url_result(query['url'][0], ie='Soundcloud')
|
||||
return self.url_result(query['url'][0])
|
||||
else:
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group('uploader')
|
||||
@ -213,11 +208,11 @@ class SoundcloudIE(InfoExtractor):
|
||||
|
||||
url = 'http://soundcloud.com/%s' % resolve_title
|
||||
info_json_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON')
|
||||
info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
|
||||
|
||||
info = json.loads(info_json)
|
||||
return self._extract_info_dict(info, full_title, secret_token=token)
|
||||
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||
IE_NAME = 'soundcloud:set'
|
||||
@ -232,16 +227,15 @@ class SoundcloudSetIE(SoundcloudIE):
|
||||
# extract uploader (which is in the url)
|
||||
uploader = mobj.group(1)
|
||||
# extract simple title (uploader + slug of song title)
|
||||
slug_title = mobj.group(2)
|
||||
slug_title = mobj.group(2)
|
||||
full_title = '%s/sets/%s' % (uploader, slug_title)
|
||||
|
||||
self.report_resolve(full_title)
|
||||
|
||||
url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
|
||||
resolv_url = self._resolv_url(url)
|
||||
info_json = self._download_webpage(resolv_url, full_title)
|
||||
info = self._download_json(resolv_url, full_title)
|
||||
|
||||
info = json.loads(info_json)
|
||||
if 'errors' in info:
|
||||
for err in info['errors']:
|
||||
self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
|
||||
@ -268,26 +262,55 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
|
||||
url = 'http://soundcloud.com/%s/' % uploader
|
||||
resolv_url = self._resolv_url(url)
|
||||
user_json = self._download_webpage(resolv_url, uploader,
|
||||
'Downloading user info')
|
||||
user = json.loads(user_json)
|
||||
user = self._download_json(
|
||||
resolv_url, uploader, 'Downloading user info')
|
||||
base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader
|
||||
|
||||
tracks = []
|
||||
entries = []
|
||||
for i in itertools.count():
|
||||
data = compat_urllib_parse.urlencode({'offset': i*50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
|
||||
response = self._download_webpage(tracks_url, uploader,
|
||||
'Downloading tracks page %s' % (i+1))
|
||||
new_tracks = json.loads(response)
|
||||
tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
|
||||
if len(new_tracks) < 50:
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'offset': i * 50,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
new_entries = self._download_json(
|
||||
base_url + data, uploader, 'Downloading track page %s' % (i + 1))
|
||||
entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries)
|
||||
if len(new_entries) < 50:
|
||||
break
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': compat_str(user['id']),
|
||||
'title': user['username'],
|
||||
'entries': tracks,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
|
||||
# it's in tests/test_playlists.py
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
|
||||
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
data = self._download_json(
|
||||
base_url + data, playlist_id, 'Downloading playlist')
|
||||
|
||||
entries = [
|
||||
self._extract_info_dict(t, quiet=True) for t in data['tracks']]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': data.get('title'),
|
||||
'description': data.get('description'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
@ -5,13 +5,16 @@ import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class StreamCZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||
'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
|
||||
'info_dict': {
|
||||
@ -22,7 +25,18 @@ class StreamCZIE(InfoExtractor):
|
||||
'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
|
||||
'duration': 256,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
||||
'md5': '246272e753e26bbace7fcd9deca0650c',
|
||||
'info_dict': {
|
||||
'id': '10002447',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kancelář Blaník: Tři roky pro Mazánka',
|
||||
'description': 'md5:9177695a8b756a0a8ab160de4043b392',
|
||||
'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000',
|
||||
'duration': 368,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -57,7 +71,7 @@ class StreamCZIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': str(jsonData['id']),
|
||||
'id': compat_str(jsonData['episode_id']),
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
|
||||
'formats': formats,
|
||||
|
104
youtube_dl/extractor/swrmediathek.py
Normal file
104
youtube_dl/extractor/swrmediathek.py
Normal file
@ -0,0 +1,104 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
|
||||
|
||||
class SWRMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
|
||||
'md5': '8c5f6f0172753368547ca8413a7768ac',
|
||||
'info_dict': {
|
||||
'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
|
||||
'ext': 'mp4',
|
||||
'title': 'SWR odysso',
|
||||
'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
|
||||
'thumbnail': 're:^http:.*\.jpg$',
|
||||
'duration': 2602,
|
||||
'upload_date': '20140515',
|
||||
'uploader': 'SWR Fernsehen',
|
||||
'uploader_id': '990030',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
|
||||
'md5': 'b10ab854f912eecc5a6b55cd6fc1f545',
|
||||
'info_dict': {
|
||||
'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
|
||||
'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 5305,
|
||||
'upload_date': '20140516',
|
||||
'uploader': 'SWR Fernsehen',
|
||||
'uploader_id': '990030',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6',
|
||||
'md5': '4382e4ef2c9d7ce6852535fa867a0dd3',
|
||||
'info_dict': {
|
||||
'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6',
|
||||
'ext': 'mp3',
|
||||
'title': 'Saša Stanišic: Vor dem Fest',
|
||||
'description': 'md5:5b792387dc3fbb171eb709060654e8c9',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
'duration': 3366,
|
||||
'upload_date': '20140520',
|
||||
'uploader': 'SWR 2',
|
||||
'uploader_id': '284670',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
attr = video['attr']
|
||||
media_type = attr['entry_etype']
|
||||
|
||||
formats = []
|
||||
for entry in video['sub']:
|
||||
if entry['name'] != 'entry_media':
|
||||
continue
|
||||
|
||||
entry_attr = entry['attr']
|
||||
codec = entry_attr['val0']
|
||||
quality = int(entry_attr['val1'])
|
||||
|
||||
fmt = {
|
||||
'url': entry_attr['val2'],
|
||||
'quality': quality,
|
||||
}
|
||||
|
||||
if media_type == 'Video':
|
||||
fmt.update({
|
||||
'format_note': ['144p', '288p', '544p'][quality-1],
|
||||
'vcodec': codec,
|
||||
})
|
||||
elif media_type == 'Audio':
|
||||
fmt.update({
|
||||
'acodec': codec,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': attr['entry_title'],
|
||||
'description': attr['entry_descl'],
|
||||
'thumbnail': attr['entry_image_16_9'],
|
||||
'duration': parse_duration(attr['entry_durat']),
|
||||
'upload_date': attr['entry_pdatet'][:-4],
|
||||
'uploader': attr['channel_title'],
|
||||
'uploader_id': attr['channel_idkey'],
|
||||
'formats': formats,
|
||||
}
|
@ -6,9 +6,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class SyfyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P<id>[0-9]+)|(?!videos)(?P<video_name>[^/]+)(?:$|[?#]))'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458',
|
||||
'md5': 'e07de1d52c7278adbb9b9b1c93a66849',
|
||||
'info_dict': {
|
||||
@ -18,10 +18,30 @@ class SyfyIE(InfoExtractor):
|
||||
'description': 'Listen to what insights George Lucas give his daughter Amanda.',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.syfy.com/wilwheaton',
|
||||
'md5': '94dfa54ee3ccb63295b276da08c415f6',
|
||||
'info_dict': {
|
||||
'id': '4yoffOOXC767',
|
||||
'ext': 'flv',
|
||||
'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.',
|
||||
'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Blocked outside the US',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_name = mobj.group('video_name')
|
||||
if video_name:
|
||||
generic_webpage = self._download_webpage(url, video_name)
|
||||
video_id = self._search_regex(
|
||||
r'<iframe.*?class="video_iframe_page"\s+src="/_utils/video/thP_video_controller.php.*?_vid([0-9]+)">',
|
||||
generic_webpage, 'video ID')
|
||||
url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % (
|
||||
video_name, video_name, video_id)
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self.url_result(self._og_search_video_url(webpage))
|
||||
|
@ -51,16 +51,13 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
|
||||
'md5': '49144e345a899b8cb34d315f3b9cfeeb',
|
||||
'info_dict': {
|
||||
'id': '1972',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Be passionate. Be courageous. Be your best.',
|
||||
'uploader': 'Gabby Giffords and Mark Kelly',
|
||||
'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||
},
|
||||
}]
|
||||
|
||||
@ -97,7 +94,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
playlist_info = info['playlist']
|
||||
|
||||
playlist_entries = [
|
||||
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
for talk in info['talks']
|
||||
]
|
||||
return self.playlist_result(
|
||||
@ -163,7 +160,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
else:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
|
||||
def _watch_info(self, url, name):
|
||||
@ -178,7 +175,10 @@ class TEDIE(SubtitlesInfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||
[
|
||||
r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
|
||||
r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
|
||||
],
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
@ -18,17 +20,17 @@ class ThePlatformIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
# from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
|
||||
u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
|
||||
u'info_dict': {
|
||||
u'id': u'e9I_cZgTgIPd',
|
||||
u'ext': u'flv',
|
||||
u'title': u'Blackberry\'s big, bold Z30',
|
||||
u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
|
||||
u'duration': 247,
|
||||
'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
|
||||
'info_dict': {
|
||||
'id': 'e9I_cZgTgIPd',
|
||||
'ext': 'flv',
|
||||
'title': 'Blackberry\'s big, bold Z30',
|
||||
'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.',
|
||||
'duration': 247,
|
||||
},
|
||||
u'params': {
|
||||
'params': {
|
||||
# rtmp download
|
||||
u'skip_download': True,
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
@ -39,7 +41,7 @@ class ThePlatformIE(InfoExtractor):
|
||||
error_msg = next(
|
||||
n.attrib['abstract']
|
||||
for n in meta.findall(_x('.//smil:ref'))
|
||||
if n.attrib.get('title') == u'Geographic Restriction')
|
||||
if n.attrib.get('title') == 'Geographic Restriction')
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
@ -52,7 +54,7 @@ class ThePlatformIE(InfoExtractor):
|
||||
head = meta.find(_x('smil:head'))
|
||||
body = meta.find(_x('smil:body'))
|
||||
|
||||
f4m_node = body.find(_x('smil:seq/smil:video'))
|
||||
f4m_node = body.find(_x('smil:seq//smil:video'))
|
||||
if f4m_node is not None:
|
||||
f4m_url = f4m_node.attrib['src']
|
||||
if 'manifest.f4m?' not in f4m_url:
|
||||
@ -101,8 +103,7 @@ class ThePlatformIE(InfoExtractor):
|
||||
config_url = url+ '&form=json'
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
config_url = config_url.replace('onsite/', 'onsite/config/')
|
||||
config_json = self._download_webpage(config_url, video_id, u'Downloading config')
|
||||
config = json.loads(config_json)
|
||||
config = self._download_json(config_url, video_id, 'Downloading config')
|
||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||
|
@ -11,29 +11,36 @@ from ..utils import (
|
||||
|
||||
|
||||
class UstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
|
||||
_VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)'
|
||||
IE_NAME = 'ustream'
|
||||
_TEST = {
|
||||
'url': 'http://www.ustream.tv/recorded/20274954',
|
||||
'file': '20274954.flv',
|
||||
'md5': '088f151799e8f572f84eb62f17d73e5c',
|
||||
'info_dict': {
|
||||
"uploader": "Young Americans for Liberty",
|
||||
"title": "Young Americans for Liberty February 7, 2012 2:28 AM",
|
||||
'id': '20274954',
|
||||
'ext': 'flv',
|
||||
'uploader': 'Young Americans for Liberty',
|
||||
'title': 'Young Americans for Liberty February 7, 2012 2:28 AM',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('videoID')
|
||||
|
||||
# some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990)
|
||||
if m.group('type') == 'embed/recorded':
|
||||
video_id = m.group('videoID')
|
||||
desktop_url = 'http://www.ustream.tv/recorded/' + video_id
|
||||
return self.url_result(desktop_url, 'Ustream')
|
||||
if m.group('type') == 'embed':
|
||||
video_id = m.group('videoID')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
|
||||
desktop_video_id = self._html_search_regex(
|
||||
r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
|
||||
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
|
||||
return self.url_result(desktop_url, 'Ustream')
|
||||
|
||||
video_id = m.group('videoID')
|
||||
|
||||
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
|
@ -16,7 +16,7 @@ class VevoIE(InfoExtractor):
|
||||
(currently used by MTVIE)
|
||||
"""
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?|
|
||||
(?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?|
|
||||
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
|
||||
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
|
||||
vevo:)
|
||||
@ -134,7 +134,13 @@ class VevoIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id
|
||||
video_info = self._download_json(json_url, video_id)['video']
|
||||
response = self._download_json(json_url, video_id)
|
||||
video_info = response['video']
|
||||
|
||||
if not video_info:
|
||||
if 'statusMessage' in response:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusMessage']), expected=True)
|
||||
raise ExtractorError('Unable to extract videos')
|
||||
|
||||
formats = self._formats_from_json(video_info)
|
||||
|
||||
|
58
youtube_dl/extractor/videott.py
Normal file
58
youtube_dl/extractor/videott.py
Normal file
@ -0,0 +1,58 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class VideoTtIE(InfoExtractor):
|
||||
ID_NAME = 'video.tt'
|
||||
IE_DESC = 'video.tt - Your True Tube'
|
||||
_VALID_URL = r'http://(?:www\.)?video\.tt/(?:video/|watch_video\.php\?v=)(?P<id>[\da-zA-Z]{9})'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.video.tt/watch_video.php?v=amd5YujV8',
|
||||
'md5': 'b13aa9e2f267effb5d1094443dff65ba',
|
||||
'info_dict': {
|
||||
'id': 'amd5YujV8',
|
||||
'ext': 'flv',
|
||||
'title': 'Motivational video Change your mind in just 2.50 mins',
|
||||
'description': '',
|
||||
'upload_date': '20130827',
|
||||
'uploader': 'joseph313',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
settings = self._download_json(
|
||||
'http://www.video.tt/player_control/settings.php?v=%s' % video_id, video_id,
|
||||
'Downloading video JSON')['settings']
|
||||
|
||||
video = settings['video_details']['video']
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': base64.b64decode(res['u']).decode('utf-8'),
|
||||
'ext': 'flv',
|
||||
'format_id': res['l'],
|
||||
} for res in settings['res'] if res['u']
|
||||
]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video['description'],
|
||||
'thumbnail': settings['config']['thumbnail'],
|
||||
'upload_date': unified_strdate(video['added']),
|
||||
'uploader': video['owner'],
|
||||
'view_count': int(video['view_count']),
|
||||
'comment_count': int(video['comment_count']),
|
||||
'like_count': int(video['liked']),
|
||||
'dislike_count': int(video['disliked']),
|
||||
'formats': formats,
|
||||
}
|
@ -17,10 +17,39 @@ from ..utils import (
|
||||
RegexNotFoundError,
|
||||
std_headers,
|
||||
unsmuggle_url,
|
||||
urlencode_postdata,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VimeoIE(SubtitlesInfoExtractor):
|
||||
class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
_LOGIN_REQUIRED = False
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
data = urlencode_postdata({
|
||||
'email': username,
|
||||
'password': password,
|
||||
'action': 'login',
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
|
||||
class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
@ -33,7 +62,6 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
(?:videos?/)?
|
||||
(?P<id>[0-9]+)
|
||||
/?(?:[?&].*)?(?:[#].*)?$'''
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = 'vimeo'
|
||||
_TESTS = [
|
||||
{
|
||||
@ -47,40 +75,47 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
"uploader_id": "user7108434",
|
||||
"uploader": "Filippo Valsorda",
|
||||
"title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
|
||||
"duration": 10,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
|
||||
'file': '68093876.mp4',
|
||||
'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
|
||||
'note': 'Vimeo Pro video (#1197)',
|
||||
'info_dict': {
|
||||
'id': '68093876',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': 'openstreetmapus',
|
||||
'uploader': 'OpenStreetMap US',
|
||||
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
'duration': 1595,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.vimeo.com/video/54469442',
|
||||
'file': '54469442.mp4',
|
||||
'md5': '619b811a4417aa4abe78dc653becf511',
|
||||
'note': 'Videos that embed the url in the player page',
|
||||
'info_dict': {
|
||||
'id': '54469442',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software',
|
||||
'uploader': 'The BLN & Business of Software',
|
||||
'uploader_id': 'theblnbusinessofsoftware',
|
||||
'duration': 3610,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/68375962',
|
||||
'file': '68375962.mp4',
|
||||
'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
|
||||
'note': 'Video protected with password',
|
||||
'info_dict': {
|
||||
'id': '68375962',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl password protected test video',
|
||||
'upload_date': '20130614',
|
||||
'uploader_id': 'user18948128',
|
||||
'uploader': 'Jaime Marquínez Ferrándiz',
|
||||
'duration': 10,
|
||||
},
|
||||
'params': {
|
||||
'videopassword': 'youtube-dl',
|
||||
@ -98,6 +133,7 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
'upload_date': '20131015',
|
||||
'uploader_id': 'staff',
|
||||
'uploader': 'Vimeo Staff',
|
||||
'duration': 62,
|
||||
}
|
||||
},
|
||||
]
|
||||
@ -111,38 +147,21 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
else:
|
||||
return super(VimeoIE, cls).suitable(url)
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
self.report_login()
|
||||
login_url = 'https://vimeo.com/log_in'
|
||||
webpage = self._download_webpage(login_url, None, False)
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
data = compat_urllib_parse.urlencode({'email': username,
|
||||
'password': password,
|
||||
'action': 'login',
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
})
|
||||
login_request = compat_urllib_request.Request(login_url, data)
|
||||
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
login_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
self._download_webpage(login_request, None, False, 'Wrong login info')
|
||||
|
||||
def _verify_video_password(self, url, video_id, webpage):
|
||||
password = self._downloader.params.get('videopassword', None)
|
||||
if password is None:
|
||||
raise ExtractorError('This video is protected by a password, use the --video-password option')
|
||||
token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
|
||||
data = compat_urllib_parse.urlencode({'password': password,
|
||||
'token': token})
|
||||
data = compat_urllib_parse.urlencode({
|
||||
'password': password,
|
||||
'token': token,
|
||||
})
|
||||
# I didn't manage to use the password with https
|
||||
if url.startswith('https'):
|
||||
pass_url = url.replace('https','http')
|
||||
pass_url = url.replace('https', 'http')
|
||||
else:
|
||||
pass_url = url
|
||||
password_request = compat_urllib_request.Request(pass_url+'/password', data)
|
||||
password_request = compat_urllib_request.Request(pass_url + '/password', data)
|
||||
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
password_request.add_header('Cookie', 'xsrft=%s' % token)
|
||||
self._download_webpage(password_request, video_id,
|
||||
@ -249,8 +268,9 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
# Extract video description
|
||||
video_description = None
|
||||
try:
|
||||
video_description = get_element_by_attribute("itemprop", "description", webpage)
|
||||
if video_description: video_description = clean_html(video_description)
|
||||
video_description = get_element_by_attribute("class", "description_wrapper", webpage)
|
||||
if video_description:
|
||||
video_description = clean_html(video_description)
|
||||
except AssertionError as err:
|
||||
# On some pages like (http://player.vimeo.com/video/54469442) the
|
||||
# html tags are not closed, python 2.6 cannot handle it
|
||||
@ -259,6 +279,9 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
else:
|
||||
raise
|
||||
|
||||
# Extract video duration
|
||||
video_duration = int_or_none(config["video"].get("duration"))
|
||||
|
||||
# Extract upload date
|
||||
video_upload_date = None
|
||||
mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
|
||||
@ -296,7 +319,7 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
file_info = {}
|
||||
if video_url is None:
|
||||
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
|
||||
%(video_id, sig, timestamp, quality, codec_name.upper())
|
||||
% (video_id, sig, timestamp, quality, codec_name.upper())
|
||||
|
||||
files[key].append({
|
||||
'ext': codec_extension,
|
||||
@ -330,6 +353,7 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'duration': video_duration,
|
||||
'formats': formats,
|
||||
'webpage_url': url,
|
||||
'view_count': view_count,
|
||||
@ -355,7 +379,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
video_ids = []
|
||||
for pagenum in itertools.count(1):
|
||||
webpage = self._download_webpage(
|
||||
self._page_url(base_url, pagenum) ,list_id,
|
||||
self._page_url(base_url, pagenum), list_id,
|
||||
'Downloading page %s' % pagenum)
|
||||
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||
@ -371,7 +395,7 @@ class VimeoChannelIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
channel_id = mobj.group('id')
|
||||
return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
|
||||
|
||||
|
||||
@ -438,3 +462,25 @@ class VimeoReviewIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
player_url = 'https://player.vimeo.com/player/' + video_id
|
||||
return self.url_result(player_url, 'Vimeo', video_id)
|
||||
|
||||
|
||||
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:watchlater'
|
||||
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
|
||||
_LOGIN_REQUIRED = True
|
||||
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _page_url(self, base_url, pagenum):
|
||||
url = '%s/page:%d/' % (base_url, pagenum)
|
||||
request = compat_urllib_request.Request(url)
|
||||
# Set the header to get a partial html page with the ids,
|
||||
# the normal page doesn't contain them.
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
return request
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
||||
|
@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
@ -57,4 +58,34 @@ class VineIE(InfoExtractor):
|
||||
'comment_count': data['comments']['count'],
|
||||
'repost_count': data['reposts']['count'],
|
||||
'formats': formats,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class VineUserIE(InfoExtractor):
|
||||
IE_NAME = 'vine:user'
|
||||
_VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
|
||||
_VINE_BASE_URL = "https://vine.co/"
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
|
||||
profile_url = "%sapi/users/profiles/vanity/%s" % (
|
||||
self._VINE_BASE_URL, user)
|
||||
profile_data = self._download_json(
|
||||
profile_url, user, note='Downloading user profile data')
|
||||
|
||||
user_id = profile_data['data']['userId']
|
||||
timeline_data = []
|
||||
for pagenum in itertools.count(1):
|
||||
timeline_url = "%sapi/timelines/users/%s?page=%s" % (
|
||||
self._VINE_BASE_URL, user_id, pagenum)
|
||||
timeline_page = self._download_json(
|
||||
timeline_url, user, note='Downloading page %d' % pagenum)
|
||||
timeline_data.extend(timeline_page['data']['records'])
|
||||
if timeline_page['data']['nextPage'] is None:
|
||||
break
|
||||
|
||||
entries = [
|
||||
self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
|
||||
return self.playlist_result(entries, user)
|
||||
|
@ -37,7 +37,7 @@ class VKIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '163339118',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Elvira Dzhonik',
|
||||
'uploader': 'Elya Iskhakova',
|
||||
'title': 'Dream Theater - Hollow Years Live at Budokan 720*',
|
||||
'duration': 558,
|
||||
}
|
||||
@ -108,7 +108,7 @@ class VKIE(InfoExtractor):
|
||||
|
||||
m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page)
|
||||
if m_yt is not None:
|
||||
self.to_screen(u'Youtube video detected')
|
||||
self.to_screen('Youtube video detected')
|
||||
return self.url_result(m_yt.group(1), 'Youtube')
|
||||
data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars')
|
||||
data = json.loads(data_json)
|
||||
|
@ -1,47 +1,69 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class VubeIE(InfoExtractor):
|
||||
IE_NAME = 'vube'
|
||||
IE_DESC = 'Vube.com'
|
||||
_VALID_URL = r'http://vube\.com/[^/]+/(?P<id>[\da-zA-Z]{10})'
|
||||
_VALID_URL = r'http://vube\.com/(?:[^/]+/)+(?P<id>[\da-zA-Z]{10})\b'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||
'info_dict': {
|
||||
'id': 'YL2qNPkqon',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||
'uploader': 'Chiara.Grispo',
|
||||
'uploader_id': '1u3hX0znhP',
|
||||
'upload_date': '20140103',
|
||||
'duration': 170.56
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||
'info_dict': {
|
||||
'id': 'YL2qNPkqon',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chiara Grispo - Price Tag by Jessie J',
|
||||
'description': 'md5:8ea652a1f36818352428cb5134933313',
|
||||
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg',
|
||||
'uploader': 'Chiara.Grispo',
|
||||
'uploader_id': '1u3hX0znhP',
|
||||
'timestamp': 1388743358,
|
||||
'upload_date': '20140103',
|
||||
'duration': 170.56
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
|
||||
'md5': '5d4a52492d76f72712117ce6b0d98d08',
|
||||
'info_dict': {
|
||||
'id': 'UeBhTudbfS',
|
||||
'ext': 'mp4',
|
||||
'title': 'My 7 year old Sister and I singing "Alive" by Krewella',
|
||||
'description': 'md5:40bcacb97796339f1690642c21d56f4a',
|
||||
'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102265d5a9f-0f17-4f6b-5753-adf08484ee1e.jpg',
|
||||
'uploader': 'Seraina',
|
||||
'uploader_id': 'XU9VE2BQ2q',
|
||||
'timestamp': 1396492438,
|
||||
'upload_date': '20140403',
|
||||
'duration': 240.107
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json('http://vube.com/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
video = self._download_json(
|
||||
'http://vube.com/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
public_id = video['public_id']
|
||||
|
||||
formats = [{'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||
'height': int(fmt['height']),
|
||||
'abr': int(fmt['audio_bitrate']),
|
||||
'vbr': int(fmt['video_bitrate']),
|
||||
'format_id': fmt['media_resolution_id']
|
||||
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed']
|
||||
formats = [
|
||||
{
|
||||
'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
||||
'height': int(fmt['height']),
|
||||
'abr': int(fmt['audio_bitrate']),
|
||||
'vbr': int(fmt['video_bitrate']),
|
||||
'format_id': fmt['media_resolution_id']
|
||||
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'
|
||||
]
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -52,16 +74,16 @@ class VubeIE(InfoExtractor):
|
||||
thumbnail = 'http:' + thumbnail
|
||||
uploader = video['user_alias']
|
||||
uploader_id = video['user_url_id']
|
||||
upload_date = datetime.datetime.fromtimestamp(int(video['upload_time'])).strftime('%Y%m%d')
|
||||
timestamp = int(video['upload_time'])
|
||||
duration = video['duration']
|
||||
view_count = video['raw_view_count']
|
||||
like_count = video['total_likes']
|
||||
dislike_count= video['total_hates']
|
||||
view_count = video.get('raw_view_count')
|
||||
like_count = video.get('total_likes')
|
||||
dislike_count= video.get('total_hates')
|
||||
|
||||
comment = self._download_json('http://vube.com/api/video/%s/comment' % video_id,
|
||||
video_id, 'Downloading video comment JSON')
|
||||
comment = self._download_json(
|
||||
'http://vube.com/api/video/%s/comment' % video_id, video_id, 'Downloading video comment JSON')
|
||||
|
||||
comment_count = comment['total']
|
||||
comment_count = int_or_none(comment.get('total'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -71,7 +93,7 @@ class VubeIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
|
66
youtube_dl/extractor/vuclip.py
Normal file
66
youtube_dl/extractor/vuclip.py
Normal file
@ -0,0 +1,66 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
parse_duration,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class VuClipIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:m\.)?vuclip\.com/w\?.*?cid=(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://m.vuclip.com/w?cid=843902317&fid=63532&z=1007&nvar&frm=index.html&bu=4757321434',
|
||||
'md5': '92ac9d1ccefec4f0bb474661ab144fcf',
|
||||
'info_dict': {
|
||||
'id': '843902317',
|
||||
'ext': '3gp',
|
||||
'title': 'Movie Trailer: Noah',
|
||||
'duration': 139,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
ad_m = re.search(
|
||||
r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage)
|
||||
if ad_m:
|
||||
urlr = compat_urllib_parse_urlparse(url)
|
||||
adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1)
|
||||
webpage = self._download_webpage(
|
||||
adfree_url, video_id, note='Download post-ad page')
|
||||
|
||||
links_code = self._search_regex(
|
||||
r'(?s)<div class="social align_c".*?>(.*?)<hr\s*/?>', webpage,
|
||||
'links')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)-\s*Vuclip</title>', webpage, 'title').strip()
|
||||
|
||||
quality_order = qualities(['Reg', 'Hi'])
|
||||
formats = []
|
||||
for url, q in re.findall(
|
||||
r'<a href="(?P<url>[^"]+)".*?>(?P<q>[^<]+)</a>', links_code):
|
||||
format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': url,
|
||||
'quality': quality_order(q),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'\(([0-9:]+)\)</span></h1>', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
}
|
@ -115,6 +115,34 @@ class WDRIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class WDRMobileIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://mobile-ondemand\.wdr\.de/
|
||||
.*?/fsk(?P<age_limit>[0-9]+)
|
||||
/[0-9]+/[0-9]+/
|
||||
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
||||
IE_NAME = 'wdr:mobile'
|
||||
_TEST = {
|
||||
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
||||
'info_dict': {
|
||||
'title': '4283021',
|
||||
'id': '421735',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'_skip': 'Will be depublicized shortly'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
return {
|
||||
'id': mobj.group('id'),
|
||||
'title': mobj.group('title'),
|
||||
'age_limit': int(mobj.group('age_limit')),
|
||||
'url': url,
|
||||
'user_agent': 'mobile',
|
||||
}
|
||||
|
||||
|
||||
class WDRMausIE(InfoExtractor):
|
||||
_VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
|
||||
IE_DESC = 'Sendung mit der Maus'
|
||||
|
@ -210,23 +210,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40},
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
|
||||
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50},
|
||||
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50},
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
|
||||
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||
|
||||
# RTMP (unnamed)
|
||||
'_rtmp': {'protocol': 'rtmp'},
|
||||
@ -242,7 +242,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
u"uploader": u"Philipp Hagemeister",
|
||||
u"uploader_id": u"phihag",
|
||||
u"upload_date": u"20121002",
|
||||
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
|
||||
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
|
||||
u"categories": [u'Science & Technology'],
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -252,7 +253,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120506",
|
||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
||||
u"description": u"md5:5b292926389560516e384ac437c0ec07",
|
||||
u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
|
||||
u"uploader": u"Icona Pop",
|
||||
u"uploader_id": u"IconaPop"
|
||||
}
|
||||
@ -304,7 +305,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
u'id': u'IB3lcPjvWLA',
|
||||
u'ext': u'm4a',
|
||||
u'title': u'Afrojack - The Spark ft. Spree Wilson',
|
||||
u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
|
||||
u'description': u'md5:9717375db5a9a3992be4668bbf3bc0a8',
|
||||
u'uploader': u'AfrojackVEVO',
|
||||
u'uploader_id': u'AfrojackVEVO',
|
||||
u'upload_date': u'20131011',
|
||||
@ -1082,9 +1083,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
break
|
||||
if 'token' not in video_info:
|
||||
if 'reason' in video_info:
|
||||
raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
|
||||
raise ExtractorError(
|
||||
u'YouTube said: %s' % video_info['reason'][0],
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
raise ExtractorError(u'"token" parameter not in video info for unknown reason')
|
||||
raise ExtractorError(
|
||||
u'"token" parameter not in video info for unknown reason',
|
||||
video_id=video_id)
|
||||
|
||||
if 'view_count' in video_info:
|
||||
view_count = int(video_info['view_count'][0])
|
||||
@ -1113,7 +1118,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
|
||||
video_title = video_info['title'][0]
|
||||
else:
|
||||
self._downloader.report_warning(u'Unable to extract video title')
|
||||
video_title = u'_'
|
||||
@ -1132,11 +1137,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# upload date
|
||||
upload_date = None
|
||||
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
||||
mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>',
|
||||
video_webpage)
|
||||
if mobj is not None:
|
||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
m_cat_container = get_element_by_id("eow-category", video_webpage)
|
||||
if m_cat_container:
|
||||
category = self._html_search_regex(
|
||||
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
||||
default=None)
|
||||
video_categories = None if category is None else [category]
|
||||
else:
|
||||
video_categories = None
|
||||
|
||||
# description
|
||||
video_description = get_element_by_id("eow-description", video_webpage)
|
||||
if video_description:
|
||||
@ -1343,6 +1361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'categories': video_categories,
|
||||
'subtitles': video_subtitles,
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
@ -1432,6 +1451,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
page = self._download_webpage(url, playlist_id)
|
||||
more_widget_html = content_html = page
|
||||
|
||||
# Check if the playlist exists or is private
|
||||
if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None:
|
||||
raise ExtractorError(
|
||||
u'The playlist doesn\'t exist or is private, use --username or '
|
||||
'--netrc to access it.',
|
||||
expected=True)
|
||||
|
||||
# Extract the video ids from the playlist pages
|
||||
ids = []
|
||||
|
||||
@ -1749,9 +1775,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
feed_entries.extend(
|
||||
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in ids)
|
||||
if info['paging'] is None:
|
||||
mobj = re.search(
|
||||
r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
|
||||
feed_html)
|
||||
if mobj is None:
|
||||
break
|
||||
paging = info['paging']
|
||||
paging = mobj.group('paging')
|
||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||
|
@ -9,6 +9,7 @@ from .common import AudioConversionError, PostProcessor
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
compat_subprocess_get_DEVNULL,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
@ -48,7 +49,7 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
for path in input_paths:
|
||||
files_cmd.extend(['-i', encodeFilename(path, True)])
|
||||
cmd = ([self._get_executable(), '-y'] + files_cmd
|
||||
+ opts +
|
||||
+ [encodeArgument(o) for o in opts] +
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
|
||||
|
||||
if self._downloader.params.get('verbose', False):
|
||||
|
@ -6,6 +6,7 @@ from .common import PostProcessor
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
hyphenate_date,
|
||||
subprocess_check_output
|
||||
)
|
||||
|
||||
|
||||
@ -57,7 +58,7 @@ class XAttrMetadataPP(PostProcessor):
|
||||
elif user_has_xattr:
|
||||
cmd = ['xattr', '-w', key, value, path]
|
||||
|
||||
subprocess.check_output(cmd)
|
||||
subprocess_check_output(cmd)
|
||||
|
||||
else:
|
||||
# On Unix, and can't find pyxattr, setfattr, or xattr.
|
||||
|
@ -540,6 +540,16 @@ def encodeFilename(s, for_subprocess=False):
|
||||
encoding = 'utf-8'
|
||||
return s.encode(encoding, 'ignore')
|
||||
|
||||
|
||||
def encodeArgument(s):
|
||||
if not isinstance(s, compat_str):
|
||||
# Legacy code that uses byte strings
|
||||
# Uncomment the following line after fixing all post processors
|
||||
#assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
|
||||
s = s.decode('ascii')
|
||||
return encodeFilename(s, True)
|
||||
|
||||
|
||||
def decodeOption(optval):
|
||||
if optval is None:
|
||||
return optval
|
||||
@ -594,13 +604,15 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
|
||||
|
||||
class ExtractorError(Exception):
|
||||
"""Error during info extraction."""
|
||||
def __init__(self, msg, tb=None, expected=False, cause=None):
|
||||
def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
|
||||
""" tb, if given, is the original traceback (so that it can be printed out).
|
||||
If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
|
||||
"""
|
||||
|
||||
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
|
||||
expected = True
|
||||
if video_id is not None:
|
||||
msg = video_id + ': ' + msg
|
||||
if not expected:
|
||||
msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
|
||||
super(ExtractorError, self).__init__(msg)
|
||||
@ -608,6 +620,7 @@ class ExtractorError(Exception):
|
||||
self.traceback = tb
|
||||
self.exc_info = sys.exc_info() # preserve original exception
|
||||
self.cause = cause
|
||||
self.video_id = video_id
|
||||
|
||||
def format_traceback(self):
|
||||
if self.traceback is None:
|
||||
@ -923,7 +936,11 @@ def _windows_write_string(s, out):
|
||||
2: -12,
|
||||
}
|
||||
|
||||
fileno = out.fileno()
|
||||
try:
|
||||
fileno = out.fileno()
|
||||
except AttributeError:
|
||||
# If the output stream doesn't have a fileno, it's virtual
|
||||
return False
|
||||
if fileno not in WIN_OUTPUT_IDS:
|
||||
return False
|
||||
|
||||
@ -1245,7 +1262,10 @@ class HEADRequest(compat_urllib_request.Request):
|
||||
return "HEAD"
|
||||
|
||||
|
||||
def int_or_none(v, scale=1, default=None):
|
||||
def int_or_none(v, scale=1, default=None, get_attr=None):
|
||||
if get_attr:
|
||||
if v is not None:
|
||||
v = getattr(v, get_attr, None)
|
||||
return default if v is None else (int(v) // scale)
|
||||
|
||||
|
||||
@ -1406,3 +1426,28 @@ US_RATINGS = {
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code)
|
||||
|
||||
|
||||
def qualities(quality_ids):
|
||||
""" Get a numeric quality value out of a list of possible values """
|
||||
def q(qid):
|
||||
try:
|
||||
return quality_ids.index(qid)
|
||||
except ValueError:
|
||||
return -1
|
||||
return q
|
||||
|
||||
|
||||
DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
|
||||
|
||||
try:
|
||||
subprocess_check_output = subprocess.check_output
|
||||
except AttributeError:
|
||||
def subprocess_check_output(*args, **kwargs):
|
||||
assert 'input' not in kwargs
|
||||
p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
|
||||
output, _ = p.communicate()
|
||||
ret = p.poll()
|
||||
if ret:
|
||||
raise subprocess.CalledProcessError(ret, p.args, output=output)
|
||||
return output
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.04.21.1'
|
||||
__version__ = '2014.06.02'
|
||||
|
Reference in New Issue
Block a user