Compare commits
190 Commits
2014.02.27
...
2014.03.21
Author | SHA1 | Date | |
---|---|---|---|
747373d4ae | |||
18d367c0a5 | |||
a1a530b067 | |||
cb9722cb3f | |||
773c0b4bb8 | |||
23c322a531 | |||
7e8c0af004 | |||
d2983ccb25 | |||
f24e9833dc | |||
bc2bdf5709 | |||
627a209f74 | |||
1a4895453a | |||
aab74fa106 | |||
2bd9efd4c2 | |||
39a743fb9b | |||
4966a0b22d | |||
fc26023120 | |||
8d7c0cca13 | |||
f66ede4328 | |||
cc88b90ec8 | |||
b6c5fa9a0b | |||
dff10eaa77 | |||
4e6f9aeca1 | |||
e68301af21 | |||
17286a96f2 | |||
0892363e6d | |||
f102372b5f | |||
ecbe1ad207 | |||
9d840c43b5 | |||
6f50f63382 | |||
ff14fc4964 | |||
e125c21531 | |||
93d020dd65 | |||
a7515ec265 | |||
b6c1ceccc2 | |||
4056ad8f36 | |||
6563837ee1 | |||
fd5e6f7ef2 | |||
15fd51b37c | |||
f1cef7a9ff | |||
8264223511 | |||
bc6d597828 | |||
aba77bbfc2 | |||
955c451456 | |||
e5de3f6c89 | |||
2a1db721d4 | |||
1e0eb60f1a | |||
87a29e6f25 | |||
c3d36f134f | |||
84769e708c | |||
9d2ecdbc71 | |||
9b69af5342 | |||
c21215b421 | |||
cddcfd90b4 | |||
f36aacba0f | |||
355271fb61 | |||
2a5b502364 | |||
98ff9d82d4 | |||
b1ff87224c | |||
b461641fb9 | |||
b047de6f6e | |||
34ca5d9ba0 | |||
60cc4dc4b4 | |||
db95dc13a1 | |||
777ac90791 | |||
04f9bebbcb | |||
4ea3137e41 | |||
a0792b738e | |||
19a41fc613 | |||
3ee52157fb | |||
c4d197ee2d | |||
a33932cfe3 | |||
bcf89ce62c | |||
e3899d0e00 | |||
dcb00da49c | |||
aa51d20d19 | |||
ae7ed92057 | |||
e45b31d9bd | |||
5a25f39653 | |||
963d7ec412 | |||
e712d94adf | |||
6a72423955 | |||
4126826b10 | |||
b773ead7fd | |||
855e2750bc | |||
805ef3c60b | |||
fbc2dcb40b | |||
5375d7ad84 | |||
90f3476180 | |||
ee95c09333 | |||
75d06db9fc | |||
439a1fffcb | |||
9d9d70c462 | |||
b4a186b7be | |||
bdebf51c8f | |||
264b86f9b4 | |||
9e55e37a2e | |||
1471956573 | |||
27865b2169 | |||
6d07ce0162 | |||
edb7fc5435 | |||
31f77343f2 | |||
63ad031583 | |||
957688cee6 | |||
806d6c2e8c | |||
0ef68e04d9 | |||
a496524db2 | |||
935c7360cc | |||
340b046876 | |||
cc1db7f9b7 | |||
a4ff6c4762 | |||
1060425cbb | |||
e9c092f125 | |||
22ff5d2105 | |||
136db7881b | |||
dae313e725 | |||
b74fa8cd2c | |||
94eae04c94 | |||
16ff7ebc77 | |||
c361c505b0 | |||
d37c07c575 | |||
9d6105c9f0 | |||
8dec03ecba | |||
826547870b | |||
52d6a9a61d | |||
ad242b5fbc | |||
3524175625 | |||
7b9965ea93 | |||
0a5bce566f | |||
8012bd2424 | |||
f55a1f0a88 | |||
bacac173a9 | |||
ca1fee34f2 | |||
6dadaa9930 | |||
553f6e4633 | |||
652bee05f0 | |||
d63516e9cd | |||
e477dcf649 | |||
9d3f7781f3 | |||
c7095dada3 | |||
607dbbad76 | |||
17b75c0de1 | |||
ab24f4f3be | |||
e1a52d9e10 | |||
d0ff838433 | |||
b37b94501c | |||
cb3bb2cfef | |||
e2cc7983e9 | |||
c9ae7b9565 | |||
86fb4347f7 | |||
2fcec131f5 | |||
9f62eaf4ef | |||
f92259c026 | |||
0afef30b23 | |||
dcdfd1c711 | |||
2acc1f8f50 | |||
2c39b0c695 | |||
e77c5b4f63 | |||
409a16cb72 | |||
94d5e90b4f | |||
2d73b45805 | |||
271a2dbfa2 | |||
bf4adcac66 | |||
fb8b8fdd62 | |||
5a0b26252e | |||
7d78f0cc48 | |||
f00fc78674 | |||
392017874c | |||
c3cb92d1ab | |||
aa5590fa07 | |||
8cfb5bbf92 | |||
69bb54ebf9 | |||
ca97a56e4b | |||
fc26f3b4c2 | |||
f604c93c64 | |||
dc3727b65c | |||
aba3231de1 | |||
9193bab91d | |||
fbcf3e416d | |||
c0e5d85631 | |||
ca7fa3dcb3 | |||
4ccfba28d9 | |||
abb82f1ddc | |||
546582ec3e | |||
4534485586 | |||
50a138d95c | |||
1b86cc41cf | |||
83cebb8b7a | |||
9e68f9fdf1 | |||
60daf7f0bb |
17
README.md
17
README.md
@ -36,6 +36,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
an empty string (--proxy "") for direct
|
||||
connection
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
@ -124,8 +127,12 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
video id, %(playlist)s for the playlist the
|
||||
video is in, %(playlist_index)s for the
|
||||
position in the playlist and %% for a
|
||||
literal percent. Use - to output to stdout.
|
||||
Can also be used to download to a different
|
||||
literal percent. %(height)s and %(width)s
|
||||
for the width and height of the video
|
||||
format. %(resolution)s for a textual
|
||||
description of the resolution of the video
|
||||
format. Use - to output to stdout. Can also
|
||||
be used to download to a different
|
||||
directory, for example with -o '/my/downloa
|
||||
ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in
|
||||
@ -187,9 +194,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
preference using slashes: "-f 22/17/18".
|
||||
"-f mp4" and "-f flv" are also supported.
|
||||
You can also use the special names "best",
|
||||
"bestaudio", "worst", and "worstaudio". By
|
||||
default, youtube-dl will pick the best
|
||||
quality.
|
||||
"bestvideo", "bestaudio", "worst",
|
||||
"worstvideo" and "worstaudio". By default,
|
||||
youtube-dl will pick the best quality.
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific
|
||||
one is requested
|
||||
|
@ -70,7 +70,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
|
||||
git checkout HEAD -- youtube-dl youtube-dl.exe
|
||||
|
||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
|
||||
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
|
||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
|
||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
|
||||
ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
||||
@ -97,7 +97,7 @@ rm -rf build
|
||||
|
||||
make pypi-files
|
||||
echo "Uploading to PyPi ..."
|
||||
python setup.py sdist upload
|
||||
python setup.py sdist bdist_wheel upload
|
||||
make clean
|
||||
|
||||
/bin/echo -e "\n### DONE!"
|
||||
|
@ -71,7 +71,7 @@ class FakeYDL(YoutubeDL):
|
||||
old_report_warning(message)
|
||||
self.report_warning = types.MethodType(report_warning, self)
|
||||
|
||||
def get_testcases():
|
||||
def gettestcases():
|
||||
for ie in youtube_dl.extractor.gen_extractors():
|
||||
t = getattr(ie, '_TEST', None)
|
||||
if t:
|
||||
|
44
test/test_InfoExtractor.py
Normal file
44
test/test_InfoExtractor.py
Normal file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class TestInfoExtractor(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.ie = TestIE(FakeYDL())
|
||||
|
||||
def test_ie_key(self):
|
||||
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
|
||||
|
||||
def test_html_search_regex(self):
|
||||
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
|
||||
search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
|
||||
self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
|
||||
|
||||
def test_opengraph(self):
|
||||
ie = self.ie
|
||||
html = '''
|
||||
<meta name="og:title" content='Foo'/>
|
||||
<meta content="Some video's description " name="og:description"/>
|
||||
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
||||
'''
|
||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -182,6 +182,24 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'vid-high')
|
||||
|
||||
def test_format_selection_video(self):
|
||||
formats = [
|
||||
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none'},
|
||||
{'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none'},
|
||||
{'format_id': 'vid', 'ext': 'mp4', 'preference': 3},
|
||||
]
|
||||
info_dict = {'formats': formats, 'extractor': 'test'}
|
||||
|
||||
ydl = YDL({'format': 'bestvideo'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'dash-video-high')
|
||||
|
||||
ydl = YDL({'format': 'worstvideo'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'dash-video-low')
|
||||
|
||||
def test_youtube_format_selection(self):
|
||||
order = [
|
||||
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '36', '17', '13',
|
||||
|
@ -9,7 +9,7 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from test.helper import get_testcases
|
||||
from test.helper import gettestcases
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
FacebookIE,
|
||||
@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_youtube_truncated(self):
|
||||
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
|
||||
|
||||
def test_youtube_search_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
||||
def test_justin_tv_channelid_matching(self):
|
||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||
@ -101,7 +105,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
|
||||
def test_no_duplicates(self):
|
||||
ies = gen_extractors()
|
||||
for tc in get_testcases():
|
||||
for tc in gettestcases():
|
||||
url = tc['url']
|
||||
for ie in ies:
|
||||
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||
@ -120,6 +124,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
|
||||
def test_vimeo_matching(self):
|
||||
self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
|
||||
self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
|
||||
self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
|
||||
self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
|
||||
self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
|
||||
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
|
||||
@ -135,6 +141,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
def test_pbs(self):
|
||||
# https://github.com/rg3/youtube-dl/issues/2350
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['PBS'])
|
||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -8,7 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import (
|
||||
get_params,
|
||||
get_testcases,
|
||||
gettestcases,
|
||||
try_rm,
|
||||
md5,
|
||||
report_warning
|
||||
@ -51,7 +51,7 @@ def _file_md5(fn):
|
||||
with open(fn, 'rb') as f:
|
||||
return hashlib.md5(f.read()).hexdigest()
|
||||
|
||||
defs = get_testcases()
|
||||
defs = gettestcases()
|
||||
|
||||
|
||||
class TestDownload(unittest.TestCase):
|
||||
@ -144,6 +144,10 @@ def generator(test_case):
|
||||
self.assertTrue(
|
||||
isinstance(got, compat_str) and match_rex.match(got),
|
||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||
elif isinstance(expected, type):
|
||||
got = info_dict.get(info_field)
|
||||
self.assertTrue(isinstance(got, expected),
|
||||
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
|
||||
else:
|
||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||
got = 'md5:' + md5(info_dict.get(info_field))
|
||||
@ -152,19 +156,19 @@ def generator(test_case):
|
||||
self.assertEqual(expected, got,
|
||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# If checkable fields are missing from the test case, print the info_dict
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in info_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
|
||||
if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
|
||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(key in info_dict.keys() and info_dict[key])
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
|
||||
|
||||
# If checkable fields are missing from the test case, print the info_dict
|
||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||
for key, value in info_dict.items()
|
||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||
if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
|
||||
sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
|
||||
finally:
|
||||
try_rm_tcs_files()
|
||||
|
||||
|
@ -36,6 +36,7 @@ from youtube_dl.extractor import (
|
||||
RutubeChannelIE,
|
||||
GoogleSearchIE,
|
||||
GenericIE,
|
||||
TEDIE,
|
||||
)
|
||||
|
||||
|
||||
@ -98,7 +99,7 @@ class TestPlaylists(unittest.TestCase):
|
||||
result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '5124905')
|
||||
self.assertTrue(len(result['entries']) >= 11)
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
|
||||
def test_soundcloud_set(self):
|
||||
dl = FakeYDL()
|
||||
@ -248,16 +249,25 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'python language')
|
||||
self.assertEqual(result['title'], 'python language')
|
||||
self.assertTrue(len(result['entries']) == 15)
|
||||
self.assertEqual(len(result['entries']), 15)
|
||||
|
||||
def test_generic_rss_feed(self):
|
||||
dl = FakeYDL()
|
||||
ie = GenericIE(dl)
|
||||
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
||||
result = ie.extract('http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
||||
self.assertEqual(result['id'], 'http://phihag.de/2014/youtube-dl/rss.xml')
|
||||
self.assertEqual(result['title'], 'Zero Punctuation')
|
||||
self.assertTrue(len(result['entries']) > 10)
|
||||
|
||||
def test_ted_playlist(self):
|
||||
dl = FakeYDL()
|
||||
ie = TEDIE(dl)
|
||||
result = ie.extract('http://www.ted.com/playlists/who_are_the_hackers')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], '10')
|
||||
self.assertEqual(result['title'], 'Who are the hackers?')
|
||||
self.assertTrue(len(result['entries']) >= 6)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -33,6 +33,7 @@ from youtube_dl.utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
urlencode_postdata,
|
||||
xpath_with_ns,
|
||||
)
|
||||
|
||||
@ -261,5 +262,9 @@ class TestUtil(unittest.TestCase):
|
||||
bam''')
|
||||
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
||||
|
||||
def test_urlencode_postdata(self):
|
||||
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
|
||||
self.assertTrue(isinstance(data, bytes))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -16,6 +16,7 @@ from youtube_dl.extractor import (
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeTopListIE,
|
||||
YoutubeSearchURLIE,
|
||||
)
|
||||
|
||||
|
||||
@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
def test_youtube_search_url(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeSearchURLIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
||||
entries = result['entries']
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'youtube-dl test video')
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -1,4 +0,0 @@
|
||||
# Legacy file for backwards compatibility, use youtube_dl.extractor instead!
|
||||
|
||||
from .extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
from .extractor import gen_extractors, get_info_extractor
|
@ -4,6 +4,7 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
import collections
|
||||
import datetime
|
||||
import errno
|
||||
import io
|
||||
import json
|
||||
@ -147,6 +148,8 @@ class YoutubeDL(object):
|
||||
again.
|
||||
cookiefile: File name where cookies should be read from and dumped to.
|
||||
nocheckcertificate:Do not verify SSL certificates
|
||||
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
|
||||
At the moment, this is only supported by YouTube.
|
||||
proxy: URL of the proxy server to use
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
@ -370,12 +373,15 @@ class YoutubeDL(object):
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(message)
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
if self._err_file.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message)
|
||||
|
||||
def report_error(self, message, tb=None):
|
||||
'''
|
||||
@ -409,6 +415,13 @@ class YoutubeDL(object):
|
||||
template_dict['autonumber'] = autonumber_templ % self._num_downloads
|
||||
if template_dict.get('playlist_index') is not None:
|
||||
template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
|
||||
if template_dict.get('resolution') is None:
|
||||
if template_dict.get('width') and template_dict.get('height'):
|
||||
template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
|
||||
elif template_dict.get('height'):
|
||||
template_dict['resolution'] = '%sp' % template_dict['height']
|
||||
elif template_dict.get('width'):
|
||||
template_dict['resolution'] = '?x%d' % template_dict['width']
|
||||
|
||||
sanitize = lambda k, v: sanitize_filename(
|
||||
compat_str(v),
|
||||
@ -522,7 +535,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor: %s' % url)
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
|
||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
"""
|
||||
@ -656,6 +669,18 @@ class YoutubeDL(object):
|
||||
if f.get('vcodec') == 'none']
|
||||
if audio_formats:
|
||||
return audio_formats[0]
|
||||
elif format_spec == 'bestvideo':
|
||||
video_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
return video_formats[-1]
|
||||
elif format_spec == 'worstvideo':
|
||||
video_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('acodec') == 'none']
|
||||
if video_formats:
|
||||
return video_formats[0]
|
||||
else:
|
||||
extensions = ['mp4', 'flv', 'webm', '3gp']
|
||||
if format_spec in extensions:
|
||||
@ -675,6 +700,14 @@ class YoutubeDL(object):
|
||||
info_dict['playlist'] = None
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||
info_dict['display_id'] = info_dict['id']
|
||||
|
||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(
|
||||
info_dict['timestamp'])
|
||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in ['Youku']:
|
||||
if download:
|
||||
@ -688,8 +721,11 @@ class YoutubeDL(object):
|
||||
else:
|
||||
formats = info_dict['formats']
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
# We check that all the formats have the format and format_id fields
|
||||
for (i, format) in enumerate(formats):
|
||||
for i, format in enumerate(formats):
|
||||
if format.get('format_id') is None:
|
||||
format['format_id'] = compat_str(i)
|
||||
if format.get('format') is None:
|
||||
@ -908,7 +944,7 @@ class YoutubeDL(object):
|
||||
self.to_screen('[%s] %s: Downloading thumbnail ...' %
|
||||
(info_dict['extractor'], info_dict['id']))
|
||||
try:
|
||||
uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
|
||||
uf = self.urlopen(info_dict['thumbnail'])
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||
@ -1154,7 +1190,7 @@ class YoutubeDL(object):
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
return self._opener.open(req)
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
@ -1185,7 +1221,7 @@ class YoutubeDL(object):
|
||||
|
||||
def _setup_opener(self):
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
timeout = 600 if timeout_val is None else float(timeout_val)
|
||||
self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
|
||||
|
||||
opts_cookiefile = self.params.get('cookiefile')
|
||||
opts_proxy = self.params.get('proxy')
|
||||
@ -1223,7 +1259,3 @@ class YoutubeDL(object):
|
||||
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
self._opener = opener
|
||||
|
||||
# TODO remove this global modification
|
||||
compat_urllib_request.install_opener(opener)
|
||||
socket.setdefaulttimeout(timeout)
|
||||
|
@ -48,12 +48,14 @@ __authors__ = (
|
||||
'Niklas Laxström',
|
||||
'David Triendl',
|
||||
'Anthony Weems',
|
||||
'David Wagner',
|
||||
'Juan C. Olivares',
|
||||
'Mattias Harrysson',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
||||
import codecs
|
||||
import getpass
|
||||
import io
|
||||
import locale
|
||||
import optparse
|
||||
@ -65,6 +67,7 @@ import sys
|
||||
|
||||
|
||||
from .utils import (
|
||||
compat_getpass,
|
||||
compat_print,
|
||||
DateRange,
|
||||
decodeOption,
|
||||
@ -234,6 +237,9 @@ def parseOpts(overrideArguments=None):
|
||||
'--proxy', dest='proxy', default=None, metavar='URL',
|
||||
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
|
||||
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
|
||||
general.add_option(
|
||||
'--prefer-insecure', action='store_true', dest='prefer_insecure',
|
||||
help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
|
||||
general.add_option(
|
||||
'--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
|
||||
help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
|
||||
@ -254,7 +260,6 @@ def parseOpts(overrideArguments=None):
|
||||
action='store_true',
|
||||
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
|
||||
|
||||
|
||||
selection.add_option(
|
||||
'--playlist-start',
|
||||
dest='playliststart', metavar='NUMBER', default=1, type=int,
|
||||
@ -313,7 +318,7 @@ def parseOpts(overrideArguments=None):
|
||||
|
||||
video_format.add_option('-f', '--format',
|
||||
action='store', dest='format', metavar='FORMAT', default=None,
|
||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestaudio", "worst", and "worstaudio". By default, youtube-dl will pick the best quality.')
|
||||
help='video format code, specify the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality.')
|
||||
video_format.add_option('--all-formats',
|
||||
action='store_const', dest='format', help='download all available video formats', const='all')
|
||||
video_format.add_option('--prefer-free-formats',
|
||||
@ -428,6 +433,8 @@ def parseOpts(overrideArguments=None):
|
||||
'%(extractor)s for the provider (youtube, metacafe, etc), '
|
||||
'%(id)s for the video id, %(playlist)s for the playlist the video is in, '
|
||||
'%(playlist_index)s for the position in the playlist and %% for a literal percent. '
|
||||
'%(height)s and %(width)s for the width and height of the video format. '
|
||||
'%(resolution)s for a textual description of the resolution of the video format. '
|
||||
'Use - to output to stdout. Can also be used to download to a different directory, '
|
||||
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
|
||||
filesystem.add_option('--autonumber-size',
|
||||
@ -606,7 +613,7 @@ def _real_main(argv=None):
|
||||
if opts.usetitle and opts.useid:
|
||||
parser.error(u'using title conflicts with using video ID')
|
||||
if opts.username is not None and opts.password is None:
|
||||
opts.password = getpass.getpass(u'Type account password and press return:')
|
||||
opts.password = compat_getpass(u'Type account password and press [Return]: ')
|
||||
if opts.ratelimit is not None:
|
||||
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
|
||||
if numeric_limit is None:
|
||||
@ -751,6 +758,7 @@ def _real_main(argv=None):
|
||||
'download_archive': download_archive_fn,
|
||||
'cookiefile': opts.cookiefile,
|
||||
'nocheckcertificate': opts.no_check_certificate,
|
||||
'prefer_insecure': opts.prefer_insecure,
|
||||
'proxy': opts.proxy,
|
||||
'socket_timeout': opts.socket_timeout,
|
||||
'bidi_workaround': opts.bidi_workaround,
|
||||
|
@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
|
||||
while count <= retries:
|
||||
# Establish connection
|
||||
try:
|
||||
data = compat_urllib_request.urlopen(request)
|
||||
data = self.ydl.urlopen(request)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
@ -59,7 +59,7 @@ class HttpFD(FileDownloader):
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = compat_urllib_request.urlopen(basic_request)
|
||||
data = self.ydl.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
@ -85,6 +85,7 @@ class HttpFD(FileDownloader):
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
resume_len = 0
|
||||
open_mode = 'wb'
|
||||
break
|
||||
# Retry
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@ -22,7 +24,7 @@ class RtmpFD(FileDownloader):
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
line = ''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
@ -46,7 +48,7 @@ class RtmpFD(FileDownloader):
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
data_len_str = '~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
@ -76,12 +78,12 @@ class RtmpFD(FileDownloader):
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
self.to_screen('[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
self.to_screen('')
|
||||
return proc.returncode
|
||||
|
||||
url = info_dict['url']
|
||||
@ -102,7 +104,7 @@ class RtmpFD(FileDownloader):
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
self.report_error('RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
@ -127,7 +129,7 @@ class RtmpFD(FileDownloader):
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
basic_args += ['--conn', conn]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
@ -150,26 +152,35 @@ class RtmpFD(FileDownloader):
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
self.to_screen('[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
RD_SUCCESS = 0
|
||||
RD_FAILED = 1
|
||||
RD_INCOMPLETE = 2
|
||||
RD_NO_CONNECT = 3
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
if retval == RD_NO_CONNECT:
|
||||
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||
return False
|
||||
|
||||
while (retval == RD_INCOMPLETE or retval == RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
|
||||
self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = RD_SUCCESS
|
||||
break
|
||||
if retval == 0 or (test and retval == 2):
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.to_screen('[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
@ -179,6 +190,6 @@ class RtmpFD(FileDownloader):
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
||||
self.to_stderr('\n')
|
||||
self.report_error('rtmpdump exited with code %d' % retval)
|
||||
return False
|
||||
|
@ -1,5 +1,6 @@
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
@ -9,6 +10,7 @@ from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVConcertIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVDDCIE,
|
||||
)
|
||||
@ -23,9 +25,11 @@ from .br import BRIE
|
||||
from .breakcom import BreakIE
|
||||
from .brightcove import BrightcoveIE
|
||||
from .c56 import C56IE
|
||||
from .canal13cl import Canal13clIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .cbs import CBSIE
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .chilloutzone import ChilloutzoneIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
@ -50,7 +54,6 @@ from .dailymotion import (
|
||||
DailymotionUserIE,
|
||||
)
|
||||
from .daum import DaumIE
|
||||
from .depositfiles import DepositFilesIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
@ -89,6 +92,7 @@ from .funnyordie import FunnyOrDieIE
|
||||
from .gamekings import GamekingsIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
@ -133,6 +137,7 @@ from .lynda import (
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
@ -171,6 +176,7 @@ from .ooyala import OoyalaIE
|
||||
from .orf import ORFIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .playvid import PlayvidIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import PornHubIE
|
||||
@ -191,6 +197,7 @@ from .rutube import (
|
||||
RutubeMovieIE,
|
||||
RutubePersonIE,
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sina import SinaIE
|
||||
@ -235,7 +242,12 @@ from .tube8 import Tube8IE
|
||||
from .tudou import TudouIE
|
||||
from .tumblr import TumblrIE
|
||||
from .tutv import TutvIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvp import TvpIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .unistra import UnistraIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .vbox7 import Vbox7IE
|
||||
@ -262,6 +274,7 @@ from .viki import VikiIE
|
||||
from .vk import VKIE
|
||||
from .vube import VubeIE
|
||||
from .wat import WatIE
|
||||
from .wdr import WDRIE
|
||||
from .weibo import WeiboIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
@ -280,19 +293,20 @@ from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeWatchLaterIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTopListIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
69
youtube_dl/extractor/aftonbladet.py
Normal file
69
youtube_dl/extractor/aftonbladet.py
Normal file
@ -0,0 +1,69 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
||||
'info_dict': {
|
||||
'id': 'article36015',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
|
||||
'description': 'Jupiters måne mest aktiv av alla himlakroppar',
|
||||
'upload_date': '20140306',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('video_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# find internal video meta data
|
||||
META_URL = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json'
|
||||
internal_meta_id = self._html_search_regex(
|
||||
r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id')
|
||||
internal_meta_url = META_URL % internal_meta_id
|
||||
internal_meta_json = self._download_json(
|
||||
internal_meta_url, video_id, 'Downloading video meta data')
|
||||
|
||||
# find internal video formats
|
||||
FORMATS_URL = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
|
||||
internal_video_id = internal_meta_json['videoId']
|
||||
internal_formats_url = FORMATS_URL % internal_video_id
|
||||
internal_formats_json = self._download_json(
|
||||
internal_formats_url, video_id, 'Downloading video formats')
|
||||
|
||||
formats = []
|
||||
for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
|
||||
p = fmt['paths'][0]
|
||||
formats.append({
|
||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||
'ext': 'mp4',
|
||||
'width': fmt['width'],
|
||||
'height': fmt['height'],
|
||||
'tbr': fmt['bitrate'],
|
||||
'protocol': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = datetime.datetime.fromtimestamp(internal_meta_json['timePublished'])
|
||||
upload_date = timestamp.strftime('%Y%m%d')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json['imageUrl'],
|
||||
'description': internal_meta_json['shortPreamble'],
|
||||
'upload_date': upload_date,
|
||||
'duration': internal_meta_json['duration'],
|
||||
'view_count': internal_meta_json['views'],
|
||||
}
|
@ -72,18 +72,22 @@ class ArteTvIE(InfoExtractor):
|
||||
return self._extract_liveweb(url, name, lang)
|
||||
|
||||
if re.search(self._LIVE_URL, url) is not None:
|
||||
raise ExtractorError(u'Arte live streams are not yet supported, sorry')
|
||||
raise ExtractorError('Arte live streams are not yet supported, sorry')
|
||||
# self.extractLiveStream(url)
|
||||
# return
|
||||
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
def _extract_video(self, url, video_id, lang):
|
||||
"""Extract from videos.arte.tv"""
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
|
||||
ref_xml_doc = self._download_xml(
|
||||
ref_xml_url, video_id, note='Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
|
||||
config_xml = self._download_webpage(
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
video_urls = list(re.finditer(r'<url quality="(?P<quality>.*?)">(?P<url>.*?)</url>', config_xml))
|
||||
def _key(m):
|
||||
@ -127,7 +131,7 @@ class ArteTvIE(InfoExtractor):
|
||||
|
||||
class ArteTVPlus7IE(InfoExtractor):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'
|
||||
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
@ -198,6 +202,8 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
|
||||
# The version with sourds/mal subtitles has also lower relevance
|
||||
re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
|
||||
# Prefer http downloads over m3u8
|
||||
0 if f['url'].endswith('m3u8') else 1,
|
||||
)
|
||||
formats = sorted(formats, key=sort_key)
|
||||
def _format(format_info):
|
||||
@ -238,8 +244,9 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||
'file': '050489-002.mp4',
|
||||
'info_dict': {
|
||||
'id': '050489-002',
|
||||
'ext': 'mp4',
|
||||
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
||||
},
|
||||
}
|
||||
@ -251,8 +258,9 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081',
|
||||
'file': '050940-003.mp4',
|
||||
'info_dict': {
|
||||
'id': '050940-003',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les champignons au secours de la planète',
|
||||
},
|
||||
}
|
||||
@ -266,7 +274,7 @@ class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
|
||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'http?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>.+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
@ -280,3 +288,19 @@ class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
||||
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>de|fr)/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||
'info_dict': {
|
||||
'id': '186',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
|
||||
'upload_date': '20140128',
|
||||
},
|
||||
}
|
||||
|
@ -9,21 +9,35 @@ from ..utils import ExtractorError
|
||||
|
||||
class BRIE(InfoExtractor):
|
||||
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
||||
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
|
||||
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-/]+/)?(?P<id>[a-z0-9\-]+)\.html$"
|
||||
_BASE_URL = "http://www.br.de"
|
||||
|
||||
_TEST = {
|
||||
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
||||
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
||||
"info_dict": {
|
||||
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
||||
"ext": "mp4",
|
||||
"title": "Feiern und Verzichten",
|
||||
"description": "Anselm Grün: Feiern und Verzichten",
|
||||
"uploader": "BR/Birgit Baier",
|
||||
"upload_date": "20140301"
|
||||
_TESTS = [
|
||||
{
|
||||
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
||||
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
||||
"info_dict": {
|
||||
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
||||
"ext": "mp4",
|
||||
"title": "Feiern und Verzichten",
|
||||
"description": "Anselm Grün: Feiern und Verzichten",
|
||||
"uploader": "BR/Birgit Baier",
|
||||
"upload_date": "20140301"
|
||||
}
|
||||
},
|
||||
{
|
||||
"url": "http://www.br.de/mediathek/video/sendungen/unter-unserem-himmel/unter-unserem-himmel-alpen-ueber-den-pass-100.html",
|
||||
"md5": "ab451b09d861dbed7d7cc9ab0be19ebe",
|
||||
"info_dict": {
|
||||
"id": "2c060e69-3a27-4e13-b0f0-668fac17d812",
|
||||
"ext": "mp4",
|
||||
"title": "Über den Pass",
|
||||
"description": "Die Eroberung der Alpen: Über den Pass",
|
||||
"uploader": None,
|
||||
"upload_date": None
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -33,16 +47,21 @@ class BRIE(InfoExtractor):
|
||||
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
||||
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
||||
|
||||
videos = [{
|
||||
"id": xml_video.get("externalId"),
|
||||
"title": xml_video.find("title").text,
|
||||
"formats": self._extract_formats(xml_video.find("assets")),
|
||||
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
||||
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
||||
"uploader": xml_video.find("author").text,
|
||||
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
|
||||
"webpage_url": xml_video.find("permalink").text,
|
||||
} for xml_video in xml.findall("video")]
|
||||
videos = []
|
||||
for xml_video in xml.findall("video"):
|
||||
video = {
|
||||
"id": xml_video.get("externalId"),
|
||||
"title": xml_video.find("title").text,
|
||||
"formats": self._extract_formats(xml_video.find("assets")),
|
||||
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
||||
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
||||
"webpage_url": xml_video.find("permalink").text
|
||||
}
|
||||
if xml_video.find("author").text:
|
||||
video["uploader"] = xml_video.find("author").text
|
||||
if xml_video.find("broadcastDate").text:
|
||||
video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split(".")))
|
||||
videos.append(video)
|
||||
|
||||
if len(videos) > 1:
|
||||
self._downloader.report_warning(
|
||||
|
48
youtube_dl/extractor/canal13cl.py
Normal file
48
youtube_dl/extractor/canal13cl.py
Normal file
@ -0,0 +1,48 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal13clIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||
'md5': '4cb1fa38adcad8fea88487a078831755',
|
||||
'info_dict': {
|
||||
'id': '1403022125',
|
||||
'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||
'ext': 'mp4',
|
||||
'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
|
||||
'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
description = self._html_search_meta(
|
||||
'twitter:description', webpage, 'description')
|
||||
url = self._html_search_regex(
|
||||
r'articuloVideo = \"(.*?)\"', webpage, 'url')
|
||||
real_id = self._search_regex(
|
||||
r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'display_id': display_id,
|
||||
'url': url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
126
youtube_dl/extractor/ceskatelevize.py
Normal file
126
youtube_dl/extractor/ceskatelevize.py
Normal file
@ -0,0 +1,126 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_urlparse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
|
||||
'info_dict': {
|
||||
'id': '213512120230004',
|
||||
'ext': 'flv',
|
||||
'title': 'První republika: Španělská chřipka',
|
||||
'duration': 3107.4,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
'skip': 'Works only from Czech Republic.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
|
||||
'info_dict': {
|
||||
'id': '20138143440',
|
||||
'ext': 'flv',
|
||||
'title': 'Tsatsiki, maminka a policajt',
|
||||
'duration': 6754.1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
'skip': 'Works only from Czech Republic.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
|
||||
'info_dict': {
|
||||
'id': '14716',
|
||||
'ext': 'flv',
|
||||
'title': 'První republika: Zpěvačka z Dupárny Bobina',
|
||||
'duration': 90,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
typ = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
|
||||
episode_id = self._html_search_regex(r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
|
||||
|
||||
data = {
|
||||
'playlist[0][type]': typ,
|
||||
'playlist[0][id]': episode_id,
|
||||
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
|
||||
data=compat_urllib_parse.urlencode(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlistpage = self._download_json(req, video_id)
|
||||
|
||||
req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist = self._download_xml(req, video_id)
|
||||
|
||||
formats = []
|
||||
for i in playlist.find('smilRoot/body'):
|
||||
if 'AD' not in i.attrib['id']:
|
||||
base_url = i.attrib['base']
|
||||
parsedurl = compat_urllib_parse_urlparse(base_url)
|
||||
duration = i.attrib['duration']
|
||||
|
||||
for video in i.findall('video'):
|
||||
if video.attrib['label'] != 'AD':
|
||||
format_id = video.attrib['label']
|
||||
play_path = video.attrib['src']
|
||||
vbr = int(video.attrib['system-bitrate'])
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'vbr': vbr,
|
||||
'play_path': play_path,
|
||||
'app': parsedurl.path[1:] + '?' + parsedurl.query,
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
|
||||
'duration': float(duration),
|
||||
'formats': formats,
|
||||
}
|
@ -17,8 +17,9 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'id': '6902724',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comic-Con Cosplay Catastrophe',
|
||||
'description': 'Fans get creative this year',
|
||||
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
||||
'age_limit': 13,
|
||||
'duration': 187,
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -28,22 +29,22 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'id': '3505939',
|
||||
'ext': 'mp4',
|
||||
'title': 'Font Conference',
|
||||
'description': 'This video wasn\'t long enough,',
|
||||
'description': "This video wasn't long enough, so we made it double-spaced.",
|
||||
'age_limit': 10,
|
||||
'duration': 179,
|
||||
},
|
||||
},
|
||||
# embedded youtube video
|
||||
{
|
||||
'url': 'http://www.collegehumor.com/embed/6950457',
|
||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||
'info_dict': {
|
||||
'id': 'W5gMp3ZjYg4',
|
||||
'id': 'Z-bao9fg6Yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
|
||||
'uploader': 'Funnyplox TV',
|
||||
'uploader_id': 'funnyploxtv',
|
||||
'description': 'md5:7ded37421526d54afdf005e25bc2b7a3',
|
||||
'upload_date': '20140128',
|
||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||
'uploader': 'Mark Dice',
|
||||
'uploader_id': 'MarkDice',
|
||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||
'upload_date': '20140127',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -87,6 +88,7 @@ class CollegeHumorIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(vdata.get('duration'), 1000)
|
||||
like_count = int_or_none(vdata.get('likes'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -96,4 +98,5 @@ class CollegeHumorIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'duration': duration,
|
||||
'like_count': like_count,
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections)
|
||||
/(?P<title>.*)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
@ -88,12 +88,18 @@ class InfoExtractor(object):
|
||||
|
||||
The following fields are optional:
|
||||
|
||||
display_id An alternative identifier for the video, not necessarily
|
||||
unique, but available before title. Typically, id is
|
||||
something like "4234987", title "Dancing naked mole rats",
|
||||
and display_id "dancing-naked-mole-rats"
|
||||
thumbnails: A list of dictionaries (with the entries "resolution" and
|
||||
"url") for the varying thumbnails
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
description: One-line video description.
|
||||
uploader: Full name of the video uploader.
|
||||
timestamp: UNIX timestamp of the moment the video became available.
|
||||
upload_date: Video upload date (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
location: Physical location of the video.
|
||||
subtitles: The subtitle file contents as a dictionary in the format
|
||||
@ -114,9 +120,6 @@ class InfoExtractor(object):
|
||||
_real_extract() methods and define a _VALID_URL regexp.
|
||||
Probably, they should also be added to the list of extractors.
|
||||
|
||||
_real_extract() must return a *list* of information dictionaries as
|
||||
described above.
|
||||
|
||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
"""
|
||||
@ -432,14 +435,14 @@ class InfoExtractor(object):
|
||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||
return self._html_search_regex(regexes, html, name, **kargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None):
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_regex(
|
||||
r'''(?ix)<meta
|
||||
(?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
|
||||
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
|
||||
html, display_name, fatal=False)
|
||||
html, display_name, fatal=fatal)
|
||||
|
||||
def _dc_search_uploader(self, html):
|
||||
return self._html_search_meta('dc.creator', html, 'uploader')
|
||||
|
@ -12,6 +12,7 @@ from ..utils import (
|
||||
get_element_by_id,
|
||||
orderedSet,
|
||||
str_to_int,
|
||||
int_or_none,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
@ -124,7 +125,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if video_url is not None:
|
||||
m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
|
||||
if m_size is not None:
|
||||
width, height = m_size.group(1), m_size.group(2)
|
||||
width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
|
||||
else:
|
||||
width, height = None, None
|
||||
formats.append({
|
||||
|
@ -1,60 +0,0 @@
|
||||
import re
|
||||
import os
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class DepositFilesIE(InfoExtractor):
|
||||
"""Information extractor for depositfiles.com"""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
file_id = url.split('/')[-1]
|
||||
# Rebuild url in english locale
|
||||
url = 'http://depositfiles.com/en/files/' + file_id
|
||||
|
||||
# Retrieve file webpage with 'Free download' button pressed
|
||||
free_download_indication = {'gateway_result' : '1'}
|
||||
request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
|
||||
try:
|
||||
self.report_download_webpage(file_id)
|
||||
webpage = compat_urllib_request.urlopen(request).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
|
||||
|
||||
# Search for the real file URL
|
||||
mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
|
||||
if (mobj is None) or (mobj.group(1) is None):
|
||||
# Try to figure out reason of the error.
|
||||
mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
|
||||
if (mobj is not None) and (mobj.group(1) is not None):
|
||||
restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
|
||||
raise ExtractorError(u'%s' % restriction_message)
|
||||
else:
|
||||
raise ExtractorError(u'Unable to extract download URL from: %s' % url)
|
||||
|
||||
file_url = mobj.group(1)
|
||||
file_extension = os.path.splitext(file_url)[1][1:]
|
||||
|
||||
# Search for file title
|
||||
file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
|
||||
|
||||
return [{
|
||||
'id': file_id.decode('utf-8'),
|
||||
'url': file_url.decode('utf-8'),
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': file_title,
|
||||
'ext': file_extension.decode('utf-8'),
|
||||
}]
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
@ -9,16 +11,15 @@ from ..utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
urlencode_postdata,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class FacebookIE(InfoExtractor):
|
||||
"""Information Extractor for Facebook"""
|
||||
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?(?:\w+\.)?facebook\.com/
|
||||
https?://(?:\w+\.)?facebook\.com/
|
||||
(?:[^#?]*\#!/)?
|
||||
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=(?P<id>[0-9]+)
|
||||
@ -26,21 +27,18 @@ class FacebookIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = u'facebook'
|
||||
IE_NAME = 'facebook'
|
||||
_TEST = {
|
||||
u'url': u'https://www.facebook.com/photo.php?v=120708114770723',
|
||||
u'file': u'120708114770723.mp4',
|
||||
u'md5': u'48975a41ccc4b7a581abd68651c1a5a8',
|
||||
u'info_dict': {
|
||||
u"duration": 279,
|
||||
u"title": u"PEOPLE ARE AWESOME 2013"
|
||||
'url': 'https://www.facebook.com/photo.php?v=120708114770723',
|
||||
'md5': '48975a41ccc4b7a581abd68651c1a5a8',
|
||||
'info_dict': {
|
||||
'id': '120708114770723',
|
||||
'ext': 'mp4',
|
||||
'duration': 279,
|
||||
'title': 'PEOPLE ARE AWESOME 2013',
|
||||
}
|
||||
}
|
||||
|
||||
def report_login(self):
|
||||
"""Report attempt to log in."""
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
if useremail is None:
|
||||
@ -48,11 +46,13 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||
self.report_login()
|
||||
login_page = self._download_webpage(login_page_req, None, note=False,
|
||||
errnote=u'Unable to download login page')
|
||||
lsd = self._search_regex(r'"lsd":"(\w*?)"', login_page, u'lsd')
|
||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, u'lgnrnd')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
errnote='Unable to download login page')
|
||||
lsd = self._search_regex(
|
||||
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||
login_page, 'lsd')
|
||||
lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
|
||||
|
||||
login_form = {
|
||||
'email': useremail,
|
||||
@ -65,27 +65,29 @@ class FacebookIE(InfoExtractor):
|
||||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
try:
|
||||
login_results = compat_urllib_request.urlopen(request).read()
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||
return
|
||||
|
||||
check_form = {
|
||||
'fb_dtsg': self._search_regex(r'"fb_dtsg":"(.*?)"', login_results, u'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, u'nh'),
|
||||
'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
|
||||
'nh': self._search_regex(r'name="nh" value="(\w*?)"', login_results, 'nh'),
|
||||
'name_action_selected': 'dont_save',
|
||||
'submit[Continue]': self._search_regex(r'<input value="(.*?)" name="submit\[Continue\]"', login_results, u'continue'),
|
||||
'submit[Continue]': self._search_regex(r'<button[^>]+value="(.*?)"[^>]+name="submit\[Continue\]"', login_results, 'continue'),
|
||||
}
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, compat_urllib_parse.urlencode(check_form))
|
||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_response = compat_urllib_request.urlopen(check_req).read()
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
self._downloader.report_warning(u'Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
||||
self._downloader.report_warning('unable to log in: %s' % compat_str(err))
|
||||
return
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -93,8 +95,6 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
|
||||
@ -107,10 +107,10 @@ class FacebookIE(InfoExtractor):
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
u'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||
expected=True)
|
||||
else:
|
||||
raise ExtractorError(u'Cannot parse data')
|
||||
raise ExtractorError('Cannot parse data')
|
||||
data = dict(json.loads(m.group(1)))
|
||||
params_raw = compat_urllib_parse.unquote(data['params'])
|
||||
params = json.loads(params_raw)
|
||||
@ -119,19 +119,15 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_url:
|
||||
video_url = video_data['sd_src']
|
||||
if not video_url:
|
||||
raise ExtractorError(u'Cannot find video URL')
|
||||
video_duration = int(video_data['video_duration'])
|
||||
thumbnail = video_data['thumbnail_src']
|
||||
raise ExtractorError('Cannot find video URL')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title')
|
||||
r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title')
|
||||
|
||||
info = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'duration': video_duration,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int(video_data['video_duration']),
|
||||
'thumbnail': video_data['thumbnail_src'],
|
||||
}
|
||||
return [info]
|
||||
|
@ -8,8 +8,8 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
parse_duration,
|
||||
clean_html,
|
||||
)
|
||||
from youtube_dl.utils import clean_html
|
||||
|
||||
|
||||
class FourTubeIE(InfoExtractor):
|
||||
|
@ -1,12 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
|
||||
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
|
||||
_TEST = {
|
||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
'file': '0732f586d7.mp4',
|
||||
@ -30,10 +31,23 @@ class FunnyOrDieIE(InfoExtractor):
|
||||
[r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
|
||||
webpage, 'video URL', flags=re.DOTALL)
|
||||
|
||||
if mobj.group('type') == 'embed':
|
||||
post_json = self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
|
||||
post = json.loads(post_json)
|
||||
title = post['name']
|
||||
description = post.get('description')
|
||||
thumbnail = post.get('picture')
|
||||
else:
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -6,13 +8,14 @@ from .common import InfoExtractor
|
||||
class GamekingsIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'
|
||||
_TEST = {
|
||||
u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/",
|
||||
u'file': u'20130811.mp4',
|
||||
'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
||||
# MD5 is flaky, seems to change regularly
|
||||
#u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
# 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',
|
||||
u'info_dict': {
|
||||
u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review",
|
||||
u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.",
|
||||
'id': '20130811',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review',
|
||||
'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',
|
||||
}
|
||||
}
|
||||
|
||||
|
134
youtube_dl/extractor/gdcvault.py
Normal file
134
youtube_dl/extractor/gdcvault.py
Normal file
@ -0,0 +1,134 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
|
||||
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
|
||||
'info_dict': {
|
||||
'id': '1019721',
|
||||
'ext': 'mp4',
|
||||
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
|
||||
'info_dict': {
|
||||
'id': '1015683',
|
||||
'ext': 'flv',
|
||||
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _parse_mp4(self, xml_description):
|
||||
video_formats = []
|
||||
mp4_video = xml_description.find('./metadata/mp4video')
|
||||
if mp4_video is None:
|
||||
return None
|
||||
|
||||
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text)
|
||||
video_root = mobj.group('root')
|
||||
formats = xml_description.findall('./metadata/MBRVideos/MBRVideo')
|
||||
for format in formats:
|
||||
mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text)
|
||||
url = video_root + mobj.group('path')
|
||||
vbr = format.find('bitrate').text
|
||||
video_formats.append({
|
||||
'url': url,
|
||||
'vbr': int(vbr),
|
||||
})
|
||||
return video_formats
|
||||
|
||||
def _parse_flv(self, xml_description):
|
||||
video_formats = []
|
||||
akami_url = xml_description.find('./metadata/akamaiHost').text
|
||||
slide_video_path = xml_description.find('./metadata/slideVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + slide_video_path,
|
||||
'format_note': 'slide deck video',
|
||||
'quality': -2,
|
||||
'preference': -2,
|
||||
'format_id': 'slides',
|
||||
})
|
||||
speaker_video_path = xml_description.find('./metadata/speakerVideo').text
|
||||
video_formats.append({
|
||||
'url': 'rtmp://' + akami_url + '/' + speaker_video_path,
|
||||
'format_note': 'speaker video',
|
||||
'quality': -1,
|
||||
'preference': -1,
|
||||
'format_id': 'speaker',
|
||||
})
|
||||
return video_formats
|
||||
|
||||
def _login(self, webpage_url, video_id):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
|
||||
return None
|
||||
|
||||
mobj = re.match(r'(?P<root_url>https?://.*?/).*', webpage_url)
|
||||
login_url = mobj.group('root_url') + 'api/login.php'
|
||||
logout_url = mobj.group('root_url') + 'logout'
|
||||
|
||||
login_form = {
|
||||
'email': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(request, video_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
|
||||
self._download_webpage(logout_url, video_id, 'Logging out')
|
||||
|
||||
return start_page
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||
start_page = self._download_webpage(webpage_url, video_id)
|
||||
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
|
||||
|
||||
if xml_root is None:
|
||||
# Probably need to authenticate
|
||||
start_page = self._login(webpage_url, video_id)
|
||||
if start_page is None:
|
||||
self.report_warning('Could not login.')
|
||||
else:
|
||||
# Grab the url from the authenticated page
|
||||
xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
|
||||
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
|
||||
if xml_name is None:
|
||||
# Fallback to the older format
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
||||
xml_decription_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_decription_url, video_id)
|
||||
|
||||
video_title = xml_description.find('./metadata/title').text
|
||||
video_formats = self._parse_mp4(xml_description)
|
||||
if video_formats is None:
|
||||
video_formats = self._parse_flv(xml_description)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'formats': video_formats,
|
||||
}
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
@ -17,6 +16,7 @@ from ..utils import (
|
||||
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
parse_xml,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@ -24,6 +24,7 @@ from ..utils import (
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -116,6 +117,60 @@ class GenericIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': False,
|
||||
}
|
||||
},
|
||||
# embed.ly video
|
||||
{
|
||||
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
|
||||
'info_dict': {
|
||||
'id': '9ODmcdjQcHQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
|
||||
'upload_date': '20140225',
|
||||
'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
|
||||
'uploader': 'Tested',
|
||||
'uploader_id': 'testedcom',
|
||||
},
|
||||
# No need to test YoutubeIE here
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# funnyordie embed
|
||||
{
|
||||
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
|
||||
'md5': '7cf780be104d40fea7bae52eed4a470e',
|
||||
'info_dict': {
|
||||
'id': '18e820ec3f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
|
||||
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
|
||||
},
|
||||
},
|
||||
# RUTV embed
|
||||
{
|
||||
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
|
||||
'info_dict': {
|
||||
'id': '776940',
|
||||
'ext': 'mp4',
|
||||
'title': 'Охотское море стало целиком российским',
|
||||
'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Embedded TED video
|
||||
{
|
||||
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
|
||||
'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
|
||||
'info_dict': {
|
||||
'id': '981',
|
||||
'ext': 'mp4',
|
||||
'title': 'My web playroom',
|
||||
'uploader': 'Ze Frank',
|
||||
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@ -142,9 +197,14 @@ class GenericIE(InfoExtractor):
|
||||
newurl = newurl.replace(' ', '%20')
|
||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||
if k.lower() not in ("content-length", "content-type"))
|
||||
try:
|
||||
# This function was deprecated in python 3.3 and removed in 3.4
|
||||
origin_req_host = req.get_origin_req_host()
|
||||
except AttributeError:
|
||||
origin_req_host = req.origin_req_host
|
||||
return HEADRequest(newurl,
|
||||
headers=newheaders,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
origin_req_host=origin_req_host,
|
||||
unverifiable=True)
|
||||
else:
|
||||
raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
|
||||
@ -211,7 +271,7 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
assert ':' in default_search
|
||||
return self.url_result(default_search + url)
|
||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
||||
|
||||
self.to_screen('%s: Requesting header' % video_id)
|
||||
|
||||
@ -257,7 +317,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Is it an RSS feed?
|
||||
try:
|
||||
doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
|
||||
doc = parse_xml(webpage)
|
||||
if doc.tag == 'rss':
|
||||
return self._extract_rss(url, video_id, doc)
|
||||
except compat_xml_parse_error:
|
||||
@ -296,9 +356,9 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
if mobj:
|
||||
player_url = unescapeHTML(mobj.group(1))
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl, 'Vimeo')
|
||||
|
||||
@ -396,12 +456,38 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Facebook')
|
||||
|
||||
# Look for embedded VK player
|
||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'VK')
|
||||
|
||||
# Look for embedded Huffington Post player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'HuffPost')
|
||||
|
||||
# Look for embed.ly
|
||||
mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
|
||||
|
||||
# Look for funnyordie embed
|
||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
|
||||
for eurl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for embedded RUTV player
|
||||
rutv_url = RUTVIE._extract_url(webpage)
|
||||
if rutv_url:
|
||||
return self.url_result(rutv_url, 'RUTV')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
@ -413,6 +499,13 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is None:
|
||||
# Broaden the search a little bit: JWPlayer JS loader
|
||||
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||
|
||||
# Look for embedded TED player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'TED')
|
||||
|
||||
if mobj is None:
|
||||
# Try to find twitter cards info
|
||||
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
|
@ -46,6 +46,6 @@ class GoogleSearchIE(SearchInfoExtractor):
|
||||
'url': mobj.group(1)
|
||||
})
|
||||
|
||||
if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage):
|
||||
if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage):
|
||||
res['entries'] = entries[:n]
|
||||
return res
|
||||
|
@ -6,7 +6,10 @@ from random import random
|
||||
from math import floor
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_request
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
@ -36,6 +39,7 @@ class IPrimaIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True, # requires rtmpdump
|
||||
},
|
||||
'skip': 'Do not have permission to access this page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -44,6 +48,10 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if re.search(r'Nemáte oprávnění přistupovat na tuto stránku\.\s*</div>', webpage):
|
||||
raise ExtractorError(
|
||||
'%s said: You do not have permission to access this page' % self.IE_NAME, expected=True)
|
||||
|
||||
player_url = (
|
||||
'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' %
|
||||
(floor(random()*1073741824), floor(random()*1073741824))
|
||||
|
@ -1,56 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
RegexNotFoundError,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class JukeboxIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+)\.html'
|
||||
_IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
|
||||
_VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
|
||||
_TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
|
||||
_IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
|
||||
_TEST = {
|
||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||
'md5': '5dc6477e74b1e37042ac5acedd8413e5',
|
||||
'info_dict': {
|
||||
'id': 'r303r',
|
||||
'ext': 'flv',
|
||||
'title': 'Kosheen-En Vivo Pride',
|
||||
'uploader': 'Kosheen',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
|
||||
html = self._download_webpage(url, video_id)
|
||||
|
||||
mobj = re.search(self._IFRAME, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract iframe url')
|
||||
iframe_url = unescapeHTML(mobj.group('iframe'))
|
||||
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
||||
|
||||
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
|
||||
mobj = re.search(r'class="jkb_waiting"', iframe_html)
|
||||
if mobj is not None:
|
||||
raise ExtractorError(u'Video is not available(in your country?)!')
|
||||
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
|
||||
raise ExtractorError('Video is not available(in your country?)!')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(self._VIDEO_URL, iframe_html)
|
||||
if mobj is None:
|
||||
mobj = re.search(self._IS_YOUTUBE, iframe_html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract video url')
|
||||
youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
|
||||
self.to_screen(u'Youtube video detected')
|
||||
return self.url_result(youtube_url,ie='Youtube')
|
||||
video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
|
||||
video_ext = unescapeHTML(mobj.group('video_ext'))
|
||||
try:
|
||||
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
||||
iframe_html, 'video url')
|
||||
video_url = unescapeHTML(video_url).replace('\/', '/')
|
||||
except RegexNotFoundError:
|
||||
youtube_url = self._search_regex(
|
||||
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
|
||||
iframe_html, 'youtube url')
|
||||
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
|
||||
self.to_screen('Youtube video detected')
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
mobj = re.search(self._TITLE, html)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Cannot extract title')
|
||||
title = unescapeHTML(mobj.group('title'))
|
||||
artist = unescapeHTML(mobj.group('artist'))
|
||||
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
||||
html, 'title')
|
||||
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
||||
html, 'artist')
|
||||
|
||||
return [{'id': video_id,
|
||||
'url': video_url,
|
||||
'title': artist + '-' + title,
|
||||
'ext': video_ext
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': artist + '-' + title,
|
||||
'uploader': artist,
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class KontrTubeIE(InfoExtractor):
|
||||
@ -32,27 +33,26 @@ class KontrTubeIE(InfoExtractor):
|
||||
|
||||
video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
|
||||
title = self._html_search_regex(r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage,
|
||||
'video title')
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?) - Труба зовёт - Интересный видеохостинг</title>', webpage, 'video title')
|
||||
description = self._html_search_meta('description', webpage, 'video description')
|
||||
|
||||
mobj = re.search(r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
|
||||
webpage)
|
||||
mobj = re.search(
|
||||
r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
|
||||
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
|
||||
|
||||
view_count = self._html_search_regex(r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage,
|
||||
'view count', fatal=False)
|
||||
view_count = int(view_count) if view_count is not None else None
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
|
||||
|
||||
comment_count = None
|
||||
comment_str = self._html_search_regex(r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count',
|
||||
fatal=False)
|
||||
comment_str = self._html_search_regex(
|
||||
r'Комментарии: <span>([^<]+)</span>', webpage, 'comment count', fatal=False)
|
||||
if comment_str.startswith('комментариев нет'):
|
||||
comment_count = 0
|
||||
else:
|
||||
mobj = re.search(r'\d+ из (?P<total>\d+) комментариев', comment_str)
|
||||
if mobj:
|
||||
comment_count = int(mobj.group('total'))
|
||||
comment_count = mobj.group('total')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -61,6 +61,6 @@ class KontrTubeIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
}
|
@ -6,7 +6,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -34,11 +35,9 @@ class LifeNewsIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
|
||||
videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage)
|
||||
if not videos:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - Первый по срочным новостям — LIFE | NEWS'
|
||||
@ -57,13 +56,19 @@ class LifeNewsIE(InfoExtractor):
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
def make_entry(video_id, media, video_number=None):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': media[1],
|
||||
'thumbnail': media[0],
|
||||
'title': title if video_number is None else '%s-video%s' % (title, video_number),
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
if len(videos) == 1:
|
||||
return make_entry(video_id, videos[0])
|
||||
else:
|
||||
return [make_entry(video_id, media, video_number+1) for video_number, media in enumerate(videos)]
|
@ -8,7 +8,9 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
@ -19,16 +21,17 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
_LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
|
||||
_NETRC_MACHINE = 'lynda'
|
||||
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
|
||||
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
|
||||
|
||||
ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
|
||||
'file': '114408.mp4',
|
||||
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
|
||||
'info_dict': {
|
||||
'id': '114408',
|
||||
'ext': 'mp4',
|
||||
'title': 'Using the exercise files',
|
||||
'duration': 68
|
||||
}
|
||||
@ -41,27 +44,44 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
||||
'Downloading video JSON')
|
||||
video_json = json.loads(page)
|
||||
|
||||
if 'Status' in video_json:
|
||||
raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
|
||||
|
||||
if video_json['HasAccess'] is False:
|
||||
raise ExtractorError('Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
raise ExtractorError(
|
||||
'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
|
||||
|
||||
video_id = video_json['ID']
|
||||
video_id = compat_str(video_json['ID'])
|
||||
duration = video_json['DurationInSeconds']
|
||||
title = video_json['Title']
|
||||
|
||||
formats = [{'url': fmt['Url'],
|
||||
formats = []
|
||||
|
||||
fmts = video_json.get('Formats')
|
||||
if fmts:
|
||||
formats.extend([
|
||||
{
|
||||
'url': fmt['Url'],
|
||||
'ext': fmt['Extension'],
|
||||
'width': fmt['Width'],
|
||||
'height': fmt['Height'],
|
||||
'filesize': fmt['FileSize'],
|
||||
'format_id': str(fmt['Resolution'])
|
||||
} for fmt in video_json['Formats']]
|
||||
} for fmt in fmts])
|
||||
|
||||
prioritized_streams = video_json.get('PrioritizedStreams')
|
||||
if prioritized_streams:
|
||||
formats.extend([
|
||||
{
|
||||
'url': video_url,
|
||||
'width': int_or_none(format_id),
|
||||
'format_id': format_id,
|
||||
} for format_id, video_url in prioritized_streams['0'].items()
|
||||
])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@ -91,7 +111,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
'stayPut': 'false'
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||
login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
|
||||
|
||||
# Not (yet) logged in
|
||||
m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
|
||||
@ -116,7 +136,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
'stayPut': 'false',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
|
||||
login_page = self._download_webpage(request, None, note='Confirming log in and log out from another device')
|
||||
login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
|
||||
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
raise ExtractorError('Unable to log in')
|
||||
@ -150,7 +170,7 @@ class LyndaIE(SubtitlesInfoExtractor):
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
sub = self._download_webpage(url, None, note=False)
|
||||
sub = self._download_webpage(url, None, False)
|
||||
sub_json = json.loads(sub)
|
||||
return {'en': url} if len(sub_json) > 0 else {}
|
||||
|
||||
@ -179,6 +199,9 @@ class LyndaCourseIE(InfoExtractor):
|
||||
videos = []
|
||||
(username, _) = self._get_login_info()
|
||||
|
||||
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
|
||||
# by single video API anymore
|
||||
|
||||
for chapter in course_json['Chapters']:
|
||||
for video in chapter['Videos']:
|
||||
if username is None and video['HasAccess'] is False:
|
||||
|
66
youtube_dl/extractor/mailru.py
Normal file
66
youtube_dl/extractor/mailru.py
Normal file
@ -0,0 +1,66 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MailRuIE(InfoExtractor):
|
||||
IE_NAME = 'mailru'
|
||||
IE_DESC = 'Видео@Mail.Ru'
|
||||
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
|
||||
'md5': 'dea205f03120046894db4ebb6159879a',
|
||||
'info_dict': {
|
||||
'id': '46301138',
|
||||
'ext': 'mp4',
|
||||
'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
|
||||
'upload_date': '20140224',
|
||||
'uploader': 'sonypicturesrus',
|
||||
'uploader_id': 'sonypicturesrus@mail.ru',
|
||||
'duration': 184,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON')
|
||||
|
||||
author = video_data['author']
|
||||
uploader = author['name']
|
||||
uploader_id = author['id']
|
||||
|
||||
movie = video_data['movie']
|
||||
content_id = str(movie['contentId'])
|
||||
title = movie['title']
|
||||
thumbnail = movie['poster']
|
||||
duration = movie['duration']
|
||||
|
||||
upload_date = datetime.datetime.fromtimestamp(video_data['timestamp']).strftime('%Y%m%d')
|
||||
view_count = video_data['views_count']
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['name'],
|
||||
} for video in video_data['videos']
|
||||
]
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@ -5,6 +5,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urllib_parse,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@ -15,8 +16,9 @@ class MixcloudIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||
'file': 'dholbach-cryptkeeper.mp3',
|
||||
'info_dict': {
|
||||
'id': 'dholbach-cryptkeeper',
|
||||
'ext': 'mp3',
|
||||
'title': 'Cryptkeeper',
|
||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||
'uploader': 'Daniel Holbach',
|
||||
@ -45,7 +47,7 @@ class MixcloudIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader = mobj.group(1)
|
||||
cloudcast_name = mobj.group(2)
|
||||
track_id = '-'.join((uploader, cloudcast_name))
|
||||
track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name)))
|
||||
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
|
||||
|
@ -5,9 +5,12 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
HEADRequest,
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
RegexNotFoundError,
|
||||
)
|
||||
@ -18,6 +21,7 @@ def _media_xml_tag(tag):
|
||||
|
||||
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
_MOBILE_TEMPLATE = None
|
||||
@staticmethod
|
||||
def _id_from_uri(uri):
|
||||
return uri.split(':')[-1]
|
||||
@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
else:
|
||||
return thumb_node.attrib['url']
|
||||
|
||||
def _extract_video_formats(self, mdoc):
|
||||
if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None:
|
||||
raise ExtractorError('This video is not available from your country.', expected=True)
|
||||
def _extract_mobile_video_formats(self, mtvn_id):
|
||||
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
# Otherwise we get a webpage that would execute some javascript
|
||||
req.add_header('Youtubedl-user-agent', 'curl/7')
|
||||
webpage = self._download_webpage(req, mtvn_id,
|
||||
'Downloading mobile page')
|
||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||
req = HEADRequest(metrics_url)
|
||||
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
||||
url = response.geturl()
|
||||
# Transform the url to get the best quality:
|
||||
url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
|
||||
return [{'url': url,'ext': 'mp4'}]
|
||||
|
||||
def _extract_video_formats(self, mdoc, mtvn_id):
|
||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||
self.to_screen('The normal version is not available from your '
|
||||
'country, trying with the mobile version')
|
||||
return self._extract_mobile_video_formats(mtvn_id)
|
||||
raise ExtractorError('This video is not available from your country.',
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
for rendition in mdoc.findall('.//rendition'):
|
||||
@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
raise ExtractorError('Could not find video title')
|
||||
title = title.strip()
|
||||
|
||||
# This a short id that's used in the webpage urls
|
||||
mtvn_id = None
|
||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||
'scheme', 'urn:mtvn:id')
|
||||
if mtvn_id_node is not None:
|
||||
mtvn_id = mtvn_id_node.text
|
||||
|
||||
return {
|
||||
'title': title,
|
||||
'formats': self._extract_video_formats(mediagen_doc),
|
||||
'formats': self._extract_video_formats(mediagen_doc, mtvn_id),
|
||||
'id': video_id,
|
||||
'thumbnail': self._get_thumbnail_url(uri, itemdoc),
|
||||
'description': description,
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import base64
|
||||
import hashlib
|
||||
@ -14,18 +16,16 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
|
||||
class MyVideoIE(InfoExtractor):
|
||||
"""Information Extractor for myvideo.de."""
|
||||
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'
|
||||
IE_NAME = u'myvideo'
|
||||
_VALID_URL = r'http://(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/(?P<id>[0-9]+)/[^?/]+.*'
|
||||
IE_NAME = 'myvideo'
|
||||
_TEST = {
|
||||
u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
u'file': u'8229274.flv',
|
||||
u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
|
||||
u'info_dict': {
|
||||
u"title": u"bowling-fail-or-win"
|
||||
'url': 'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
|
||||
'md5': '2d2753e8130479ba2cb7e0a37002053e',
|
||||
'info_dict': {
|
||||
'id': '8229274',
|
||||
'ext': 'flv',
|
||||
'title': 'bowling-fail-or-win',
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,10 +53,7 @@ class MyVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
GK = (
|
||||
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
|
||||
@ -74,37 +71,33 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = mobj.group(1) + '.flv'
|
||||
|
||||
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
}
|
||||
|
||||
mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage)
|
||||
if mobj is not None:
|
||||
request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '')
|
||||
response = self._download_webpage(request, video_id,
|
||||
u'Downloading video info')
|
||||
'Downloading video info')
|
||||
info = json.loads(base64.b64decode(response).decode('utf-8'))
|
||||
return {'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['streaming_url'].replace('rtmpe', 'rtmpt'),
|
||||
'play_path': info['filename'],
|
||||
'ext': 'flv',
|
||||
'thumbnail': info['thumbnail'][0]['url'],
|
||||
}
|
||||
|
||||
# try encxml
|
||||
mobj = re.search('var flashvars={(.+?)}', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract video')
|
||||
raise ExtractorError('Unable to extract video')
|
||||
|
||||
params = {}
|
||||
encxml = ''
|
||||
@ -118,7 +111,7 @@ class MyVideoIE(InfoExtractor):
|
||||
params['domain'] = 'www.myvideo.de'
|
||||
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
|
||||
if 'flash_playertype=MTV' in xmldata_url:
|
||||
self._downloader.report_warning(u'avoiding MTV player')
|
||||
self._downloader.report_warning('avoiding MTV player')
|
||||
xmldata_url = (
|
||||
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
|
||||
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
|
||||
@ -144,7 +137,7 @@ class MyVideoIE(InfoExtractor):
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1))
|
||||
if 'myvideo2flash' in video_url:
|
||||
self.report_warning(
|
||||
u'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
'Rewriting URL to use unencrypted rtmp:// ...',
|
||||
video_id)
|
||||
video_url = video_url.replace('rtmpe://', 'rtmp://')
|
||||
|
||||
@ -152,39 +145,31 @@ class MyVideoIE(InfoExtractor):
|
||||
# extract non rtmp videos
|
||||
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'unable to extract url')
|
||||
raise ExtractorError('unable to extract url')
|
||||
video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
|
||||
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
|
||||
video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file')
|
||||
video_file = compat_urllib_parse.unquote(video_file)
|
||||
|
||||
if not video_file.endswith('f4m'):
|
||||
ppath, prefix = video_file.split('.')
|
||||
video_playpath = '%s:%s' % (prefix, ppath)
|
||||
video_hls_playlist = ''
|
||||
else:
|
||||
video_playpath = ''
|
||||
video_hls_playlist = (
|
||||
video_file
|
||||
).replace('.f4m', '.m3u8')
|
||||
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
|
||||
video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj')
|
||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||
|
||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||
webpage, u'title')
|
||||
webpage, 'title')
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'tc_url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': u'flv',
|
||||
'play_path': video_playpath,
|
||||
'video_file': video_file,
|
||||
'video_hls_playlist': video_hls_playlist,
|
||||
'player_url': video_swfobj,
|
||||
}]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'tc_url': video_url,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'play_path': video_playpath,
|
||||
'player_url': video_swfobj,
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -12,10 +11,13 @@ class NineGagIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
"url": "http://9gag.tv/v/1912",
|
||||
"file": "1912.mp4",
|
||||
"info_dict": {
|
||||
"id": "1912",
|
||||
"ext": "mp4",
|
||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome"
|
||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
|
||||
"view_count": int,
|
||||
"thumbnail": "re:^https?://",
|
||||
},
|
||||
'add_ie': ['Youtube']
|
||||
}
|
||||
@ -25,21 +27,27 @@ class NineGagIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._html_search_regex(r'''(?x)
|
||||
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
||||
data-video-meta="([^"]+)"''', webpage, 'video metadata')
|
||||
|
||||
data = json.loads(data_json)
|
||||
youtube_id = self._html_search_regex(
|
||||
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
|
||||
webpage, 'video ID')
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
|
||||
'description', fatal=False)
|
||||
view_count_str = self._html_search_regex(
|
||||
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
|
||||
fatal=False)
|
||||
view_count = (
|
||||
None if view_count_str is None
|
||||
else int(view_count_str.replace(',', '')))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['youtubeVideoId'],
|
||||
'url': youtube_id,
|
||||
'ie_key': 'Youtube',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'view_count': int(data['view_count']),
|
||||
'like_count': int(data['statistic']['like']),
|
||||
'dislike_count': int(data['statistic']['dislike']),
|
||||
'thumbnail': data['thumbnail_url'],
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@ -35,7 +36,15 @@ class ORFIE(InfoExtractor):
|
||||
data_json = self._search_regex(
|
||||
r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
|
||||
all_data = json.loads(data_json)
|
||||
sdata = all_data[0]['values']['segments']
|
||||
|
||||
def get_segments(all_data):
|
||||
for data in all_data:
|
||||
if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM':
|
||||
return data['values']['segments']
|
||||
|
||||
sdata = get_segments(all_data)
|
||||
if not sdata:
|
||||
raise ExtractorError('Unable to extract segments')
|
||||
|
||||
def quality_to_int(s):
|
||||
m = re.search('([0-9]+)', s)
|
||||
|
@ -3,6 +3,9 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
US_RATINGS,
|
||||
)
|
||||
|
||||
|
||||
class PBSIE(InfoExtractor):
|
||||
@ -13,7 +16,7 @@ class PBSIE(InfoExtractor):
|
||||
# Article with embedded player
|
||||
(?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
|
||||
# Player
|
||||
video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
|
||||
video\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/
|
||||
)
|
||||
'''
|
||||
|
||||
@ -57,6 +60,11 @@ class PBSIE(InfoExtractor):
|
||||
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
|
||||
info = self._download_json(info_url, display_id)
|
||||
|
||||
rating_str = info.get('rating')
|
||||
if rating_str is not None:
|
||||
rating_str = rating_str.rpartition('-')[2]
|
||||
age_limit = US_RATINGS.get(rating_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
@ -65,4 +73,5 @@ class PBSIE(InfoExtractor):
|
||||
'description': info['program'].get('description'),
|
||||
'thumbnail': info.get('image_url'),
|
||||
'duration': info.get('duration'),
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@ -1,76 +1,43 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
class PhotobucketIE(InfoExtractor):
|
||||
"""Information extractor for photobucket.com."""
|
||||
|
||||
# TODO: the original _VALID_URL was:
|
||||
# r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
|
||||
# Check if it's necessary to keep the old extracion process
|
||||
_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||
IE_NAME = u'photobucket'
|
||||
_VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
|
||||
_TEST = {
|
||||
u'url': u'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||
u'file': u'zpsc0c3b9fa.mp4',
|
||||
u'md5': u'7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20130504",
|
||||
u"uploader": u"rachaneronas",
|
||||
u"title": u"Tired of Link Building? Try BacklinkMyDomain.com!"
|
||||
'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
|
||||
'file': 'zpsc0c3b9fa.mp4',
|
||||
'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
|
||||
'info_dict': {
|
||||
'upload_date': '20130504',
|
||||
'uploader': 'rachaneronas',
|
||||
'title': 'Tired of Link Building? Try BacklinkMyDomain.com!',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
# Extract id from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_extension = mobj.group('ext')
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Extract URL, uploader, and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
# We try first by looking the javascript code:
|
||||
mobj = re.search(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (?P<json>.*?)\);', webpage)
|
||||
if mobj is not None:
|
||||
info = json.loads(mobj.group('json'))
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': info[u'downloadUrl'],
|
||||
'uploader': info[u'username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info[u'creationDate']).strftime('%Y%m%d'),
|
||||
'title': info[u'title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info[u'thumbUrl'],
|
||||
}]
|
||||
|
||||
# We try looking in other parts of the webpage
|
||||
video_url = self._search_regex(r'<link rel="video_src" href=".*\?file=([^"]+)" />',
|
||||
webpage, u'video URL')
|
||||
|
||||
mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Unable to extract title')
|
||||
video_title = mobj.group(1).decode('utf-8')
|
||||
video_uploader = mobj.group(2).decode('utf-8')
|
||||
|
||||
return [{
|
||||
'id': video_id.decode('utf-8'),
|
||||
'url': video_url.decode('utf-8'),
|
||||
'uploader': video_uploader,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': video_extension.decode('utf-8'),
|
||||
}]
|
||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||
webpage, 'info json')
|
||||
info = json.loads(info_json)
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': info['downloadUrl'],
|
||||
'uploader': info['username'],
|
||||
'upload_date': datetime.date.fromtimestamp(info['creationDate']).strftime('%Y%m%d'),
|
||||
'title': info['title'],
|
||||
'ext': video_extension,
|
||||
'thumbnail': info['thumbUrl'],
|
||||
}
|
||||
|
80
youtube_dl/extractor/playvid.py
Normal file
80
youtube_dl/extractor/playvid.py
Normal file
@ -0,0 +1,80 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
)
|
||||
|
||||
|
||||
class PlayvidIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
|
||||
_TEST = {
|
||||
'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
|
||||
'md5': '44930f8afa616efdf9482daf4fe53e1e',
|
||||
'info_dict': {
|
||||
'id': 'agbDDi7WZTV',
|
||||
'ext': 'mp4',
|
||||
'title': 'Michelle Lewin in Miami Beach',
|
||||
'duration': 240,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = None
|
||||
duration = None
|
||||
video_thumbnail = None
|
||||
formats = []
|
||||
|
||||
# most of the information is stored in the flashvars
|
||||
flashvars = self._html_search_regex(
|
||||
r'flashvars="(.+?)"', webpage, 'flashvars')
|
||||
|
||||
infos = compat_urllib_parse.unquote(flashvars).split(r'&')
|
||||
for info in infos:
|
||||
videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info)
|
||||
if videovars_match:
|
||||
key = videovars_match.group(1)
|
||||
val = videovars_match.group(2)
|
||||
|
||||
if key == 'title':
|
||||
video_title = compat_urllib_parse.unquote_plus(val)
|
||||
if key == 'duration':
|
||||
try:
|
||||
duration = int(val)
|
||||
except ValueError:
|
||||
pass
|
||||
if key == 'big_thumb':
|
||||
video_thumbnail = val
|
||||
|
||||
videourl_match = re.match(
|
||||
r'^video_urls\]\[(?P<resolution>[0-9]+)p', key)
|
||||
if videourl_match:
|
||||
height = int(videourl_match.group('resolution'))
|
||||
formats.append({
|
||||
'height': height,
|
||||
'url': val,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Extract title - should be in the flashvars; if not, look elsewhere
|
||||
if video_title is None:
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)</title', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'duration': duration,
|
||||
'description': None,
|
||||
'age_limit': 18
|
||||
}
|
@ -44,7 +44,7 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password').replace('+', ' ')
|
||||
password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
|
||||
|
||||
formats = []
|
||||
|
@ -51,14 +51,14 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'skip': 'Seems to be broken',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.prosiebenmaxx.de/yep/one-piece/video/148-folge-48-gold-rogers-heimat-ganze-folge',
|
||||
'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
|
||||
'info_dict': {
|
||||
'id': '2437108',
|
||||
'id': '2429369',
|
||||
'ext': 'mp4',
|
||||
'title': 'Folge 48: Gold Rogers Heimat',
|
||||
'description': 'Ruffy erreicht die Insel, auf der der berühmte Gold Roger lebte und hingerichtet wurde.',
|
||||
'upload_date': '20140226',
|
||||
'duration': 1401.48,
|
||||
'title': 'Countdown für die Autowerkstatt',
|
||||
'description': 'md5:809fc051a457b5d8666013bc40698817',
|
||||
'upload_date': '20140223',
|
||||
'duration': 2595.04,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
|
183
youtube_dl/extractor/rutv.py
Normal file
183
youtube_dl/extractor/rutv.py
Normal file
@ -0,0 +1,183 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none
|
||||
)
|
||||
|
||||
|
||||
class RUTVIE(InfoExtractor):
|
||||
IE_DESC = 'RUTV.RU'
|
||||
_VALID_URL = r'https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:flash2v/container\.swf\?id=|iframe/(?P<type>swf|video|live)/id/)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
|
||||
'info_dict': {
|
||||
'id': '774471',
|
||||
'ext': 'mp4',
|
||||
'title': 'Монологи на все времена',
|
||||
'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
|
||||
'duration': 2906,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
|
||||
'info_dict': {
|
||||
'id': '774016',
|
||||
'ext': 'mp4',
|
||||
'title': 'Чужой в семье Сталина',
|
||||
'description': '',
|
||||
'duration': 2539,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
|
||||
'info_dict': {
|
||||
'id': '766888',
|
||||
'ext': 'mp4',
|
||||
'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
|
||||
'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
|
||||
'info_dict': {
|
||||
'id': '771852',
|
||||
'ext': 'mp4',
|
||||
'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
|
||||
'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
|
||||
'duration': 3096,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
|
||||
'info_dict': {
|
||||
'id': '51499',
|
||||
'ext': 'flv',
|
||||
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
|
||||
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Translation has finished',
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/iframe/(?:swf|video|live)/id/.+?)\1', webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>http://player\.(?:rutv\.ru|vgtrk\.com)/flash2v/container\.swf\?id=.+?\2)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_type = mobj.group('type')
|
||||
|
||||
if not video_type or video_type == 'swf':
|
||||
video_type = 'video'
|
||||
|
||||
json_data = self._download_json(
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||
video_id, 'Downloading JSON')
|
||||
|
||||
if json_data['errors']:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True)
|
||||
|
||||
playlist = json_data['data']['playlist']
|
||||
medialist = playlist['medialist']
|
||||
media = medialist[0]
|
||||
|
||||
if media['errors']:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True)
|
||||
|
||||
view_count = playlist.get('count_views')
|
||||
priority_transport = playlist['priority_transport']
|
||||
|
||||
thumbnail = media['picture']
|
||||
width = int_or_none(media['width'])
|
||||
height = int_or_none(media['height'])
|
||||
description = media['anons']
|
||||
title = media['title']
|
||||
duration = int_or_none(media.get('duration'))
|
||||
|
||||
formats = []
|
||||
|
||||
for transport, links in media['sources'].items():
|
||||
for quality, url in links.items():
|
||||
if transport == 'rtmp':
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
|
||||
if not mobj:
|
||||
continue
|
||||
fmt = {
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': 'http://player.rutv.ru',
|
||||
'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
'vbr': int(quality),
|
||||
}
|
||||
elif transport == 'm3u8':
|
||||
fmt = {
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
else:
|
||||
fmt = {
|
||||
'url': url
|
||||
}
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'format_id': '%s-%s' % (transport, quality),
|
||||
'preference': -1 if priority_transport == transport else -2,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
'id': '47127627',
|
||||
'ext': 'mp3',
|
||||
'title': 'Goldrushed',
|
||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||
'uploader': 'The Royal Concept',
|
||||
'upload_date': '20120521',
|
||||
},
|
||||
@ -217,7 +218,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
return self._extract_info_dict(info, full_title, secret_token=token)
|
||||
|
||||
class SoundcloudSetIE(SoundcloudIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
|
||||
IE_NAME = 'soundcloud:set'
|
||||
# it's in tests/test_playlists.py
|
||||
_TESTS = []
|
||||
|
@ -1,10 +1,15 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
|
||||
|
||||
class SpikeIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(www\.spike\.com/(video-clips|episodes)/.+|
|
||||
m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+))
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',
|
||||
'md5': '1a9265f32b0c375793d6c4ce45255256',
|
||||
@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):
|
||||
}
|
||||
|
||||
_FEED_URL = 'http://www.spike.com/feeds/mrss/'
|
||||
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.search(self._VALID_URL, url)
|
||||
mobile_id = mobj.group('mobile_id')
|
||||
if mobile_id is not None:
|
||||
url = 'http://www.spike.com/video-clips/%s' % mobile_id
|
||||
return super(SpikeIE, self)._real_extract(url)
|
||||
|
@ -6,115 +6,120 @@ import re
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
RegexNotFoundError,
|
||||
compat_str,
|
||||
)
|
||||
|
||||
|
||||
class TEDIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL=r'''http://www\.ted\.com/
|
||||
(
|
||||
((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
'''
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<proto>https?://)
|
||||
(?P<type>www|embed)(?P<urlmain>\.ted\.com/
|
||||
(
|
||||
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
||||
|
|
||||
((?P<type_talk>talks)) # We have a simple talk
|
||||
)
|
||||
(/lang/(.*?))? # The url may contain the language
|
||||
/(?P<name>\w+) # Here goes the name and then ".html"
|
||||
.*)$
|
||||
'''
|
||||
_TEST = {
|
||||
'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
|
||||
'file': '102.mp4',
|
||||
'md5': '4ea1dada91e4174b53dac2bb8ace429d',
|
||||
'info_dict': {
|
||||
"description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
|
||||
"title": "Dan Dennett: The illusion of consciousness"
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'title': 'The illusion of consciousness',
|
||||
'description': ('Philosopher Dan Dennett makes a compelling '
|
||||
'argument that not only don\'t we understand our own '
|
||||
'consciousness, but that half the time our brains are '
|
||||
'actively fooling us.'),
|
||||
'uploader': 'Dan Dennett',
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
"""Receives a URL and returns True if suitable for this IE."""
|
||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||
_FORMATS_PREFERENCE = {
|
||||
'low': 1,
|
||||
'medium': 2,
|
||||
'high': 3,
|
||||
}
|
||||
|
||||
def _extract_info(self, webpage):
|
||||
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
||||
webpage, 'info json')
|
||||
return json.loads(info_json)
|
||||
|
||||
def _real_extract(self, url):
|
||||
m=re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
m = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if m.group('type') == 'embed':
|
||||
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
|
||||
return self.url_result(desktop_url, 'TED')
|
||||
name = m.group('name')
|
||||
if m.group('type_talk'):
|
||||
return self._talk_info(url)
|
||||
else :
|
||||
playlist_id=m.group('playlist_id')
|
||||
name=m.group('name')
|
||||
self.to_screen(u'Getting info of playlist %s: "%s"' % (playlist_id,name))
|
||||
return [self._playlist_videos_info(url,name,playlist_id)]
|
||||
return self._talk_info(url, name)
|
||||
else:
|
||||
return self._playlist_videos_info(url, name)
|
||||
|
||||
|
||||
def _playlist_videos_info(self, url, name, playlist_id):
|
||||
def _playlist_videos_info(self, url, name):
|
||||
'''Returns the videos of the playlist'''
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, 'Downloading playlist webpage')
|
||||
matches = re.finditer(
|
||||
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
|
||||
webpage)
|
||||
|
||||
playlist_title = self._html_search_regex(r'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>',
|
||||
webpage, 'playlist title')
|
||||
webpage = self._download_webpage(url, name,
|
||||
'Downloading playlist webpage')
|
||||
info = self._extract_info(webpage)
|
||||
playlist_info = info['playlist']
|
||||
|
||||
playlist_entries = [
|
||||
self.url_result(u'http://www.ted.com' + m.group('talk_url'), 'TED')
|
||||
for m in matches
|
||||
self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
||||
for talk in info['talks']
|
||||
]
|
||||
return self.playlist_result(
|
||||
playlist_entries, playlist_id=playlist_id, playlist_title=playlist_title)
|
||||
playlist_entries,
|
||||
playlist_id=compat_str(playlist_info['id']),
|
||||
playlist_title=playlist_info['title'])
|
||||
|
||||
def _talk_info(self, url, video_id=0):
|
||||
"""Return the video for the talk in the url"""
|
||||
m = re.match(self._VALID_URL, url,re.VERBOSE)
|
||||
video_name = m.group('name')
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
|
||||
def _talk_info(self, url, video_name):
|
||||
webpage = self._download_webpage(url, video_name)
|
||||
self.report_extraction(video_name)
|
||||
# If the url includes the language we get the title translated
|
||||
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
|
||||
webpage, 'title')
|
||||
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
|
||||
webpage, 'json data')
|
||||
info = json.loads(json_data)
|
||||
desc = self._html_search_regex(r'<div class="talk-intro">.*?<p.*?>(.*?)</p>',
|
||||
webpage, 'description', flags = re.DOTALL)
|
||||
|
||||
thumbnail = self._search_regex(r'</span>[\s.]*</div>[\s.]*<img src="(.*?)"',
|
||||
webpage, 'thumbnail')
|
||||
|
||||
talk_info = self._extract_info(webpage)['talks'][0]
|
||||
|
||||
formats = [{
|
||||
'ext': 'mp4',
|
||||
'url': stream['file'],
|
||||
'format': stream['id']
|
||||
} for stream in info['htmlStreams']]
|
||||
|
||||
video_id = info['id']
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'format': format_id,
|
||||
'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
|
||||
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_id = compat_str(talk_info['id'])
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, webpage)
|
||||
video_subtitles = self.extract_subtitles(video_id, talk_info)
|
||||
if self._downloader.params.get('listsubtitles', False):
|
||||
self._list_available_subtitles(video_id, webpage)
|
||||
self._list_available_subtitles(video_id, talk_info)
|
||||
return
|
||||
|
||||
thumbnail = talk_info['thumb']
|
||||
if not thumbnail.startswith('http'):
|
||||
thumbnail = 'http://' + thumbnail
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': talk_info['title'],
|
||||
'uploader': talk_info['speaker'],
|
||||
'thumbnail': thumbnail,
|
||||
'description': desc,
|
||||
'description': self._og_search_description(webpage),
|
||||
'subtitles': video_subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _get_available_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
|
||||
languages = re.findall(r'(?:<option value=")(\S+)"', options)
|
||||
if languages:
|
||||
sub_lang_list = {}
|
||||
for l in languages:
|
||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
except RegexNotFoundError:
|
||||
def _get_available_subtitles(self, video_id, talk_info):
|
||||
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
|
||||
if languages:
|
||||
sub_lang_list = {}
|
||||
for l in languages:
|
||||
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
|
||||
sub_lang_list[l] = url
|
||||
return sub_lang_list
|
||||
else:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
return {}
|
||||
return {}
|
||||
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from youtube_dl.utils import ExtractorError
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class TinyPicIE(InfoExtractor):
|
||||
|
84
youtube_dl/extractor/tvigle.py
Normal file
84
youtube_dl/extractor/tvigle.py
Normal file
@ -0,0 +1,84 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TvigleIE(InfoExtractor):
|
||||
IE_NAME = 'tvigle'
|
||||
IE_DESC = 'Интернет-телевидение Tvigle.ru'
|
||||
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
|
||||
'md5': '09afba4616666249f087efc6dcf83cb3',
|
||||
'info_dict': {
|
||||
'id': '503081',
|
||||
'ext': 'flv',
|
||||
'title': 'Брат 2 ',
|
||||
'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
|
||||
'upload_date': '20110919',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
|
||||
'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
|
||||
'info_dict': {
|
||||
'id': '676433',
|
||||
'ext': 'flv',
|
||||
'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
|
||||
'description': 'md5:027f7dc872948f14c96d19b4178428a4',
|
||||
'upload_date': '20121218',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_data = self._download_xml(
|
||||
'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
|
||||
|
||||
video = video_data.find('./video')
|
||||
|
||||
title = video.get('name')
|
||||
description = video.get('anons')
|
||||
if description:
|
||||
description = clean_html(description)
|
||||
thumbnail = video_data.get('img')
|
||||
upload_date = unified_strdate(video.get('date'))
|
||||
like_count = int_or_none(video.get('vtp'))
|
||||
|
||||
formats = []
|
||||
for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
|
||||
video_url = video.get(format_id)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'format_note': format_note,
|
||||
'quality': num,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'like_count': like_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
164
youtube_dl/extractor/udemy.py
Normal file
164
youtube_dl/extractor/udemy.py
Normal file
@ -0,0 +1,164 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class UdemyIE(InfoExtractor):
|
||||
IE_NAME = 'udemy'
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'https://www.udemy.com/join/login-submit/'
|
||||
_NETRC_MACHINE = 'udemy'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
|
||||
'md5': '98eda5b657e752cf945d8445e261b5c5',
|
||||
'info_dict': {
|
||||
'id': '160614',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introduction and Installation',
|
||||
'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876',
|
||||
'duration': 579.29,
|
||||
},
|
||||
'skip': 'Requires udemy account credentials',
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
if error:
|
||||
error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message'))
|
||||
error_data = error.get('data')
|
||||
if error_data:
|
||||
error_str += ' - %s' % error_data.get('formErrors')
|
||||
raise ExtractorError(error_str, expected=True)
|
||||
|
||||
def _download_json(self, url, video_id, note='Downloading JSON metadata'):
|
||||
response = super(UdemyIE, self)._download_json(url, video_id, note)
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
'Udemy account is required, use --username and --password options to provide account credentials.',
|
||||
expected=True)
|
||||
|
||||
login_popup = self._download_webpage(
|
||||
'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None,
|
||||
'Downloading login popup')
|
||||
|
||||
if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>':
|
||||
return
|
||||
|
||||
csrf = self._html_search_regex(r'<input type="hidden" name="csrf" value="(.+?)"', login_popup, 'csrf token')
|
||||
|
||||
login_form = {
|
||||
'email': username,
|
||||
'password': password,
|
||||
'csrf': csrf,
|
||||
'displayType': 'json',
|
||||
'isSubmitted': '1',
|
||||
}
|
||||
request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
|
||||
response = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
||||
if 'returnUrl' not in response:
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
lecture_id = mobj.group('id')
|
||||
|
||||
lecture = self._download_json(
|
||||
'https://www.udemy.com/api-1.1/lectures/%s' % lecture_id, lecture_id, 'Downloading lecture JSON')
|
||||
|
||||
if lecture['assetType'] != 'Video':
|
||||
raise ExtractorError('Lecture %s is not a video' % lecture_id, expected=True)
|
||||
|
||||
asset = lecture['asset']
|
||||
|
||||
stream_url = asset['streamUrl']
|
||||
mobj = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1), 'Youtube')
|
||||
|
||||
video_id = asset['id']
|
||||
thumbnail = asset['thumbnailUrl']
|
||||
duration = asset['data']['duration']
|
||||
|
||||
download_url = asset['downloadUrl']
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': download_url['Video480p'][0],
|
||||
'format_id': '360p',
|
||||
},
|
||||
{
|
||||
'url': download_url['Video'][0],
|
||||
'format_id': '720p',
|
||||
},
|
||||
]
|
||||
|
||||
title = lecture['title']
|
||||
description = lecture['description']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats
|
||||
}
|
||||
|
||||
|
||||
class UdemyCourseIE(UdemyIE):
|
||||
IE_NAME = 'udemy:course'
|
||||
_VALID_URL = r'https?://www\.udemy\.com/(?P<coursepath>[\da-z-]+)'
|
||||
_SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<'
|
||||
_ALREADY_ENROLLED = '>You are already taking this course.<'
|
||||
_TESTS = []
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_path = mobj.group('coursepath')
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.udemy.com/api-1.1/courses/%s' % course_path, course_path, 'Downloading course JSON')
|
||||
|
||||
course_id = int(response['id'])
|
||||
course_title = response['title']
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.udemy.com/course/subscribe/?courseId=%s' % course_id, course_id, 'Enrolling in the course')
|
||||
|
||||
if self._SUCCESSFULLY_ENROLLED in webpage:
|
||||
self.to_screen('%s: Successfully enrolled in' % course_id)
|
||||
elif self._ALREADY_ENROLLED in webpage:
|
||||
self.to_screen('%s: Already enrolled in' % course_id)
|
||||
|
||||
response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
||||
course_id, 'Downloading course curriculum')
|
||||
|
||||
entries = [
|
||||
self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
|
||||
for asset in response if asset.get('assetType') == 'Video'
|
||||
]
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
@ -4,14 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
from .rutv import RUTVIE
|
||||
|
||||
|
||||
class VestiIE(InfoExtractor):
|
||||
IE_NAME = 'vesti'
|
||||
IE_DESC = 'Вести.Ru'
|
||||
_VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P<id>.+)'
|
||||
|
||||
@ -30,6 +27,20 @@ class VestiIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vesti.ru/doc.html?id=1349233',
|
||||
'info_dict': {
|
||||
'id': '773865',
|
||||
'ext': 'mp4',
|
||||
'title': 'Участники митинга штурмуют Донецкую областную администрацию',
|
||||
'description': 'md5:1a160e98b3195379b4c849f2f4958009',
|
||||
'duration': 210,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.vesti.ru/only_video.html?vid=576180',
|
||||
'info_dict': {
|
||||
@ -44,6 +55,20 @@ class VestiIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://hitech.vesti.ru/news/view/id/4000',
|
||||
'info_dict': {
|
||||
'id': '766888',
|
||||
'ext': 'mp4',
|
||||
'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
|
||||
'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403',
|
||||
'info_dict': {
|
||||
@ -57,7 +82,7 @@ class VestiIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Blocked outside Russia'
|
||||
'skip': 'Blocked outside Russia',
|
||||
},
|
||||
{
|
||||
'url': 'http://sochi2014.vesti.ru/live/play/live_id/301',
|
||||
@ -72,7 +97,7 @@ class VestiIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Translation has finished'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -81,90 +106,16 @@ class VestiIE(InfoExtractor):
|
||||
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
mobj = re.search(r'<meta property="og:video" content=".+?\.swf\?v?id=(?P<id>\d+).*?" />', page)
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P<id>\d+)',
|
||||
page)
|
||||
if mobj:
|
||||
video_type = 'video'
|
||||
video_id = mobj.group('id')
|
||||
else:
|
||||
mobj = re.search(
|
||||
r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page)
|
||||
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
|
||||
'Downloading video page')
|
||||
|
||||
if not mobj:
|
||||
raise ExtractorError('No media found')
|
||||
rutv_url = RUTVIE._extract_url(page)
|
||||
if rutv_url:
|
||||
return self.url_result(rutv_url, 'RUTV')
|
||||
|
||||
video_type = mobj.group('type')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
json_data = self._download_json(
|
||||
'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id),
|
||||
video_id, 'Downloading JSON')
|
||||
|
||||
if json_data['errors']:
|
||||
raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True)
|
||||
|
||||
playlist = json_data['data']['playlist']
|
||||
medialist = playlist['medialist']
|
||||
media = medialist[0]
|
||||
|
||||
if media['errors']:
|
||||
raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True)
|
||||
|
||||
view_count = playlist.get('count_views')
|
||||
priority_transport = playlist['priority_transport']
|
||||
|
||||
thumbnail = media['picture']
|
||||
width = media['width']
|
||||
height = media['height']
|
||||
description = media['anons']
|
||||
title = media['title']
|
||||
duration = int_or_none(media.get('duration'))
|
||||
|
||||
formats = []
|
||||
|
||||
for transport, links in media['sources'].items():
|
||||
for quality, url in links.items():
|
||||
if transport == 'rtmp':
|
||||
mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url)
|
||||
if not mobj:
|
||||
continue
|
||||
fmt = {
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': 'http://player.rutv.ru',
|
||||
'player_url': 'http://player.rutv.ru/flash2v/osmf.swf?i=22',
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
'vbr': int(quality),
|
||||
}
|
||||
elif transport == 'm3u8':
|
||||
fmt = {
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
else:
|
||||
fmt = {
|
||||
'url': url
|
||||
}
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'format_id': '%s-%s' % (transport, quality),
|
||||
'preference': -1 if priority_transport == transport else -2,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
raise ExtractorError('No video found', expected=True)
|
@ -2,7 +2,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -22,6 +21,7 @@ class VevoIE(InfoExtractor):
|
||||
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
|
||||
vevo:)
|
||||
(?P<id>[^&?#]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
||||
@ -34,6 +34,8 @@ class VevoIE(InfoExtractor):
|
||||
"duration": 230.12,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
# timestamp and upload_date are often incorrect; seem to change randomly
|
||||
'timestamp': int,
|
||||
}
|
||||
}, {
|
||||
'note': 'v3 SMIL format',
|
||||
@ -47,6 +49,7 @@ class VevoIE(InfoExtractor):
|
||||
'title': 'I Wish I Could Break Your Heart',
|
||||
'duration': 226.101,
|
||||
'age_limit': 0,
|
||||
'timestamp': int,
|
||||
}
|
||||
}, {
|
||||
'note': 'Age-limited video',
|
||||
@ -57,7 +60,8 @@ class VevoIE(InfoExtractor):
|
||||
'age_limit': 18,
|
||||
'title': 'Tunnel Vision (Explicit)',
|
||||
'uploader': 'Justin Timberlake',
|
||||
'upload_date': '20130704',
|
||||
'upload_date': 're:2013070[34]',
|
||||
'timestamp': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'true',
|
||||
@ -169,13 +173,13 @@ class VevoIE(InfoExtractor):
|
||||
|
||||
timestamp_ms = int(self._search_regex(
|
||||
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
|
||||
upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_info['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': video_info['imageUrl'],
|
||||
'upload_date': upload_date.strftime('%Y%m%d'),
|
||||
'timestamp': timestamp_ms // 1000,
|
||||
'uploader': video_info['mainArtists'][0]['artistName'],
|
||||
'duration': video_info['duration'],
|
||||
'age_limit': age_limit,
|
||||
|
@ -29,6 +29,7 @@ class VideoBamIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'pqLvq',
|
||||
'ext': 'mp4',
|
||||
'title': '_',
|
||||
}
|
||||
},
|
||||
]
|
||||
@ -61,7 +62,7 @@ class VideoBamIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(page, default='VideoBam', fatal=False)
|
||||
title = self._og_search_title(page, default='_', fatal=False)
|
||||
description = self._og_search_description(page, default=None)
|
||||
thumbnail = self._og_search_thumbnail(page)
|
||||
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
||||
|
@ -1,22 +1,23 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import compat_urlparse
|
||||
|
||||
|
||||
class VideoDetectiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487',
|
||||
u'file': u'194487.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'KICK-ASS 2',
|
||||
u'description': u'md5:65ba37ad619165afac7d432eaded6013',
|
||||
u'duration': 135,
|
||||
'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
|
||||
'info_dict': {
|
||||
'id': '194487',
|
||||
'ext': 'mp4',
|
||||
'title': 'KICK-ASS 2',
|
||||
'description': 'md5:65ba37ad619165afac7d432eaded6013',
|
||||
'duration': 135,
|
||||
},
|
||||
}
|
||||
|
||||
@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage)
|
||||
query = compat_urlparse.urlparse(og_video).query
|
||||
return self.url_result(InternetVideoArchiveIE._build_url(query),
|
||||
ie=InternetVideoArchiveIE.ie_key())
|
||||
return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key())
|
||||
|
@ -1,29 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
US_RATINGS,
|
||||
)
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
|
||||
|
||||
class VikiIE(SubtitlesInfoExtractor):
|
||||
IE_NAME = u'viki'
|
||||
IE_NAME = 'viki'
|
||||
|
||||
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||
_TEST = {
|
||||
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
u'file': u'1023585v.mp4',
|
||||
u'md5': u'a21454021c2646f5433514177e2caa5f',
|
||||
u'info_dict': {
|
||||
u'title': u'Heirs Episode 14',
|
||||
u'uploader': u'SBS',
|
||||
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||
u'upload_date': u'20131121',
|
||||
u'age_limit': 13,
|
||||
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
'md5': 'a21454021c2646f5433514177e2caa5f',
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heirs Episode 14',
|
||||
'uploader': 'SBS',
|
||||
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||
'upload_date': '20131121',
|
||||
'age_limit': 13,
|
||||
},
|
||||
u'skip': u'Blocked in the US',
|
||||
'skip': 'Blocked in the US',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -44,28 +48,21 @@ class VikiIE(SubtitlesInfoExtractor):
|
||||
|
||||
rating_str = self._html_search_regex(
|
||||
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
||||
u'rating information', default='').strip()
|
||||
RATINGS = {
|
||||
'G': 0,
|
||||
'PG': 10,
|
||||
'PG-13': 13,
|
||||
'R': 16,
|
||||
'NC': 18,
|
||||
}
|
||||
age_limit = RATINGS.get(rating_str)
|
||||
'rating information', default='').strip()
|
||||
age_limit = US_RATINGS.get(rating_str)
|
||||
|
||||
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||
info_webpage = self._download_webpage(
|
||||
info_url, video_id, note=u'Downloading info page')
|
||||
info_url, video_id, note='Downloading info page')
|
||||
if re.match(r'\s*<div\s+class="video-error', info_webpage):
|
||||
raise ExtractorError(
|
||||
u'Video %s is blocked from your location.' % video_id,
|
||||
'Video %s is blocked from your location.' % video_id,
|
||||
expected=True)
|
||||
video_url = self._html_search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
||||
r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
|
||||
|
||||
upload_date_str = self._html_search_regex(
|
||||
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
|
||||
r'"created_at":"([^"]+)"', info_webpage, 'upload date')
|
||||
upload_date = (
|
||||
unified_strdate(upload_date_str)
|
||||
if upload_date_str is not None
|
||||
|
@ -8,6 +8,7 @@ import itertools
|
||||
from .common import InfoExtractor
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..utils import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
clean_html,
|
||||
@ -101,6 +102,15 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
if VimeoChannelIE.suitable(url):
|
||||
# Otherwise channel urls like http://vimeo.com/channels/31259 would
|
||||
# match
|
||||
return False
|
||||
else:
|
||||
return super(VimeoIE, cls).suitable(url)
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
@ -172,7 +182,18 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
|
||||
# Retrieve video webpage to extract further information
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
try:
|
||||
webpage = self._download_webpage(request, video_id)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||
errmsg = ee.cause.read()
|
||||
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
|
||||
raise ExtractorError(
|
||||
'Cannot download embed-only video without embedding '
|
||||
'URL. Please call youtube-dl with the URL of the page '
|
||||
'that embeds this video.',
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
# Now we begin extracting as much information as we can from what we
|
||||
# retrieved. First we extract the information common to all extractors,
|
||||
@ -221,7 +242,9 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
# Extract video thumbnail
|
||||
video_thumbnail = config["video"].get("thumbnail")
|
||||
if video_thumbnail is None:
|
||||
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
|
||||
video_thumbs = config["video"].get("thumbs")
|
||||
if video_thumbs and isinstance(video_thumbs, dict):
|
||||
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||
|
||||
# Extract video description
|
||||
video_description = None
|
||||
@ -318,7 +341,7 @@ class VimeoIE(SubtitlesInfoExtractor):
|
||||
|
||||
class VimeoChannelIE(InfoExtractor):
|
||||
IE_NAME = 'vimeo:channel'
|
||||
_VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$'
|
||||
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
|
||||
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
|
||||
|
||||
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
class VKIE(InfoExtractor):
|
||||
IE_NAME = 'vk.com'
|
||||
_VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)'
|
||||
_VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))'
|
||||
_NETRC_MACHINE = 'vk'
|
||||
|
||||
_TESTS = [
|
||||
@ -42,6 +42,18 @@ class VKIE(InfoExtractor):
|
||||
'duration': 558,
|
||||
}
|
||||
},
|
||||
{
|
||||
'note': 'Embedded video',
|
||||
'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
|
||||
'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
|
||||
'info_dict': {
|
||||
'id': '162925554',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Vladimir Gavrin',
|
||||
'title': 'Lin Dan',
|
||||
'duration': 101,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://vk.com/video-8871596_164049491',
|
||||
'md5': 'a590bcaf3d543576c9bd162812387666',
|
||||
@ -54,7 +66,7 @@ class VKIE(InfoExtractor):
|
||||
'duration': 8352,
|
||||
},
|
||||
'skip': 'Requires vk account credentials',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _login(self):
|
||||
@ -82,7 +94,10 @@ class VKIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
if not video_id:
|
||||
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
|
||||
|
||||
info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
|
||||
info_page = self._download_webpage(info_url, video_id)
|
||||
|
@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||
'md5': 'f81dcf6d0448e3291f54380181695821',
|
||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||
'info_dict': {
|
||||
'id': 'YL2qNPkqon',
|
||||
'ext': 'mp4',
|
||||
@ -77,4 +77,4 @@ class VubeIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
}
|
||||
}
|
||||
|
114
youtube_dl/extractor/wdr.py
Normal file
114
youtube_dl/extractor/wdr.py
Normal file
@ -0,0 +1,114 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
compat_urlparse,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
class WDRIE(InfoExtractor):
|
||||
_PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
|
||||
_VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-362427',
|
||||
'ext': 'flv',
|
||||
'title': 'Servicezeit',
|
||||
'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
|
||||
'upload_date': '20140310',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-363194',
|
||||
'ext': 'flv',
|
||||
'title': 'Marga Spiegel ist tot',
|
||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||
'upload_date': '20140311',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
|
||||
'md5': '83e9e8fefad36f357278759870805898',
|
||||
'info_dict': {
|
||||
'id': 'mdb-194332',
|
||||
'ext': 'mp3',
|
||||
'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
|
||||
'description': 'md5:2309992a6716c347891c045be50992e4',
|
||||
'upload_date': '20091129',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html',
|
||||
'md5': 'cfff440d4ee64114083ac44676df5d15',
|
||||
'info_dict': {
|
||||
'id': 'mdb-363068',
|
||||
'ext': 'mp3',
|
||||
'title': 'Grenzenlos lecker - Baklava',
|
||||
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
|
||||
'upload_date': '20140311',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_url = mobj.group('url')
|
||||
page_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
if mobj.group('player') is None:
|
||||
entries = [
|
||||
self.url_result(page_url + href, 'WDR')
|
||||
for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
|
||||
]
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
||||
flashvars = compat_urlparse.parse_qs(
|
||||
self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
|
||||
|
||||
page_id = flashvars['trackerClipId'][0]
|
||||
video_url = flashvars['dslSrc'][0]
|
||||
title = flashvars['trackerClipTitle'][0]
|
||||
thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
|
||||
|
||||
if 'trackerClipAirTime' in flashvars:
|
||||
upload_date = flashvars['trackerClipAirTime'][0]
|
||||
else:
|
||||
upload_date = self._html_search_meta('DC.Date', webpage, 'upload date')
|
||||
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
if video_url.endswith('.f4m'):
|
||||
video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
|
||||
ext = 'flv'
|
||||
else:
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
description = self._html_search_meta('Description', webpage, 'description')
|
||||
|
||||
return {
|
||||
'id': page_id,
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
}
|
@ -1,55 +1,49 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class XNXXIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
|
||||
VIDEO_URL_RE = r'flv_url=(.*?)&'
|
||||
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
|
||||
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&'
|
||||
_VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)'
|
||||
_TEST = {
|
||||
u'url': u'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
||||
u'file': u'1135332.flv',
|
||||
u'md5': u'0831677e2b4761795f68d417e0b7b445',
|
||||
u'info_dict': {
|
||||
u"title": u"lida \u00bb Naked Funny Actress (5)",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_',
|
||||
'md5': '0831677e2b4761795f68d417e0b7b445',
|
||||
'info_dict': {
|
||||
'id': '1135332',
|
||||
'ext': 'flv',
|
||||
'title': 'lida » Naked Funny Actress (5)',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
video_id = mobj.group(1)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# Get webpage content
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(self.VIDEO_URL_RE,
|
||||
webpage, u'video URL')
|
||||
video_url = self._search_regex(r'flv_url=(.*?)&',
|
||||
webpage, 'video URL')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
|
||||
video_title = self._html_search_regex(self.VIDEO_TITLE_RE,
|
||||
webpage, u'title')
|
||||
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
||||
webpage, 'title')
|
||||
|
||||
video_thumbnail = self._search_regex(self.VIDEO_THUMB_RE,
|
||||
webpage, u'thumbnail', fatal=False)
|
||||
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
return [{
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': None,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': None,
|
||||
'age_limit': 18,
|
||||
}]
|
||||
}
|
||||
|
@ -7,19 +7,24 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class XTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>xtube\.com/watch\.php\?v=(?P<videoid>[^/?&]+))'
|
||||
_TEST = {
|
||||
'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
|
||||
'file': 'kVTUy_G222_.mp4',
|
||||
'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
|
||||
'info_dict': {
|
||||
"title": "strange erotica",
|
||||
"description": "surreal gay themed erotica...almost an ET kind of thing",
|
||||
"uploader": "greenshowers",
|
||||
"age_limit": 18,
|
||||
'id': 'kVTUy_G222_',
|
||||
'ext': 'mp4',
|
||||
'title': 'strange erotica',
|
||||
'description': 'surreal gay themed erotica...almost an ET kind of thing',
|
||||
'uploader': 'greenshowers',
|
||||
'duration': 450,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@ -32,10 +37,23 @@ class XTubeIE(InfoExtractor):
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<div class="p_5px[^>]*>([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
||||
video_description = self._html_search_regex(r'<p class="video_description">([^<]+)', webpage, 'description', fatal=False)
|
||||
video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
||||
video_title = self._html_search_regex(r'<p class="title">([^<]+)', webpage, 'title')
|
||||
video_uploader = self._html_search_regex(
|
||||
r'so_s\.addVariable\("owner_u", "([^"]+)', webpage, 'uploader', fatal=False)
|
||||
video_description = self._html_search_regex(
|
||||
r'<p class="fieldsDesc">([^<]+)', webpage, 'description', fatal=False)
|
||||
video_url = self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, 'video_url').replace('\\/', '/')
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<span class="bold">Runtime:</span> ([^<]+)</p>', webpage, 'duration', fatal=False))
|
||||
view_count = self._html_search_regex(
|
||||
r'<span class="bold">Views:</span> ([\d,\.]+)</p>', webpage, 'view count', fatal=False)
|
||||
if view_count:
|
||||
view_count = str_to_int(view_count)
|
||||
comment_count = self._html_search_regex(
|
||||
r'<div id="commentBar">([\d,\.]+) Comments</div>', webpage, 'comment count', fatal=False)
|
||||
if comment_count:
|
||||
comment_count = str_to_int(comment_count)
|
||||
|
||||
path = compat_urllib_parse_urlparse(video_url).path
|
||||
extension = os.path.splitext(path)[1][1:]
|
||||
format = path.split('/')[5].split('_')[:2]
|
||||
@ -48,6 +66,9 @@ class XTubeIE(InfoExtractor):
|
||||
'title': video_title,
|
||||
'uploader': video_uploader,
|
||||
'description': video_description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'url': video_url,
|
||||
'ext': extension,
|
||||
'format': format,
|
||||
|
@ -1,3 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
@ -17,24 +20,25 @@ from ..aes import (
|
||||
|
||||
|
||||
class YouPornIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
||||
_VALID_URL = r'^(?P<proto>https?://)(?:www\.)?(?P<url>youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+))'
|
||||
_TEST = {
|
||||
u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
u'file': u'505835.mp4',
|
||||
u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20101221",
|
||||
u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?",
|
||||
u"uploader": u"Ask Dan And Jennifer",
|
||||
u"title": u"Sex Ed: Is It Safe To Masturbate Daily?",
|
||||
u"age_limit": 18,
|
||||
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'md5': '71ec5fcfddacf80f495efa8b6a8d9a89',
|
||||
'info_dict': {
|
||||
'id': '505835',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20101221',
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
url = 'http://www.' + mobj.group('url')
|
||||
url = mobj.group('proto') + 'www.' + mobj.group('url')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
@ -42,7 +46,7 @@ class YouPornIE(InfoExtractor):
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
# Get JSON parameters
|
||||
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, u'JSON parameters')
|
||||
json_params = self._search_regex(r'var currentVideo = new Video\((.*)\);', webpage, 'JSON parameters')
|
||||
try:
|
||||
params = json.loads(json_params)
|
||||
except:
|
||||
@ -61,7 +65,7 @@ class YouPornIE(InfoExtractor):
|
||||
# Get all of the links from the page
|
||||
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
||||
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
||||
webpage, u'download list').strip()
|
||||
webpage, 'download list').strip()
|
||||
LINK_RE = r'<a href="([^"]+)">'
|
||||
links = re.findall(LINK_RE, download_list_html)
|
||||
|
||||
@ -86,7 +90,7 @@ class YouPornIE(InfoExtractor):
|
||||
resolution = format_parts[0]
|
||||
height = int(resolution[:-len('p')])
|
||||
bitrate = int(format_parts[1][:-len('k')])
|
||||
format = u'-'.join(format_parts) + u'-' + dn
|
||||
format = '-'.join(format_parts) + '-' + dn
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@ -194,14 +194,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'151': {'ext': 'mp4', 'height': 72, 'resolution': '72p', 'format_note': 'HLS', 'preference': -10},
|
||||
|
||||
# DASH mp4 video
|
||||
'133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'preference': -40},
|
||||
'133': {'ext': 'mp4', 'height': 240, 'resolution': '240p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'134': {'ext': 'mp4', 'height': 360, 'resolution': '360p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'135': {'ext': 'mp4', 'height': 480, 'resolution': '480p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'136': {'ext': 'mp4', 'height': 720, 'resolution': '720p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'137': {'ext': 'mp4', 'height': 1080, 'resolution': '1080p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'138': {'ext': 'mp4', 'height': 2160, 'resolution': '2160p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'160': {'ext': 'mp4', 'height': 192, 'resolution': '192p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'264': {'ext': 'mp4', 'height': 1440, 'resolution': '1440p', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
|
||||
# Dash mp4 audio
|
||||
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
|
||||
@ -209,12 +209,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
|
||||
|
||||
# Dash webm
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40},
|
||||
'242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
'244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40},
|
||||
@ -1130,14 +1130,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
|
||||
|
||||
def _real_extract(self, url):
|
||||
proto = (
|
||||
u'http' if self._downloader.params.get('prefer_insecure', False)
|
||||
else u'https')
|
||||
|
||||
# Extract original video URL from URL with redirection, like age verification, using next_url parameter
|
||||
mobj = re.search(self._NEXT_URL_RE, url)
|
||||
if mobj:
|
||||
url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||
url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
|
||||
video_id = self.extract_id(url)
|
||||
|
||||
# Get video webpage
|
||||
url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
|
||||
video_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Attempt to extract SWF player URL
|
||||
@ -1162,7 +1166,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'asv': 3,
|
||||
'sts':'1588',
|
||||
})
|
||||
video_info_url = 'https://www.youtube.com/get_video_info?' + data
|
||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
errnote='unable to download video info webpage')
|
||||
@ -1170,7 +1174,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
else:
|
||||
age_gate = False
|
||||
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
|
||||
video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
video_info_url = (proto + '://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
|
||||
% (video_id, el_type))
|
||||
video_info_webpage = self._download_webpage(video_info_url, video_id,
|
||||
note=False,
|
||||
@ -1285,10 +1289,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# Decide which formats to download
|
||||
try:
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
|
||||
mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage)
|
||||
if not mobj:
|
||||
raise ValueError('Could not find vevo ID')
|
||||
ytplayer_config = json.loads(mobj.group(1))
|
||||
json_code = uppercase_escape(mobj.group(1))
|
||||
ytplayer_config = json.loads(json_code)
|
||||
args = ytplayer_config['args']
|
||||
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
||||
# this signatures are encrypted
|
||||
@ -1444,7 +1449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
@ -1645,7 +1650,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
|
||||
class YoutubeUserIE(InfoExtractor):
|
||||
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
|
||||
_GDATA_PAGE_SIZE = 50
|
||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||
@ -1744,12 +1749,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
||||
for video_id in video_ids]
|
||||
return self.playlist_result(videos, query)
|
||||
|
||||
|
||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
|
||||
_SEARCH_KEY = 'ytsearchdate'
|
||||
IE_DESC = u'YouTube.com searches, newest videos first'
|
||||
|
||||
|
||||
class YoutubeSearchURLIE(InfoExtractor):
|
||||
IE_DESC = u'YouTube.com search URLs'
|
||||
IE_NAME = u'youtube:search_url'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
query = compat_urllib_parse.unquote_plus(mobj.group('query'))
|
||||
|
||||
webpage = self._download_webpage(url, query)
|
||||
result_code = self._search_regex(
|
||||
r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
|
||||
|
||||
part_codes = re.findall(
|
||||
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
|
||||
entries = []
|
||||
for part_code in part_codes:
|
||||
part_title = self._html_search_regex(
|
||||
r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
|
||||
part_url_snippet = self._html_search_regex(
|
||||
r'(?s)href="([^"]+)"', part_code, 'item URL')
|
||||
part_url = compat_urlparse.urljoin(
|
||||
'https://www.youtube.com/', part_url_snippet)
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'url': part_url,
|
||||
'title': part_title,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': query,
|
||||
}
|
||||
|
||||
|
||||
class YoutubeShowIE(InfoExtractor):
|
||||
IE_DESC = u'YouTube.com (multi-season) shows'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
||||
|
@ -6,6 +6,7 @@ import ctypes
|
||||
import datetime
|
||||
import email.utils
|
||||
import errno
|
||||
import getpass
|
||||
import gzip
|
||||
import itertools
|
||||
import io
|
||||
@ -22,6 +23,7 @@ import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
try:
|
||||
@ -777,6 +779,7 @@ def unified_strdate(date_str):
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
'%d.%m.%Y %H:%M',
|
||||
'%d.%m.%Y %H.%M',
|
||||
'%Y-%m-%dT%H:%M:%SZ',
|
||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||
@ -1263,3 +1266,35 @@ def read_batch_urls(batch_fd):
|
||||
|
||||
with contextlib.closing(batch_fd) as fd:
|
||||
return [url for url in map(fixup, fd) if url]
|
||||
|
||||
|
||||
def urlencode_postdata(*args, **kargs):
|
||||
return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
|
||||
|
||||
|
||||
def parse_xml(s):
|
||||
class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass # Ignore doctypes
|
||||
|
||||
parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
|
||||
kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
|
||||
return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
|
||||
|
||||
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
def compat_getpass(prompt, *args, **kwargs):
|
||||
if isinstance(prompt, compat_str):
|
||||
prompt = prompt.encode(preferredencoding())
|
||||
return getpass.getpass(prompt, *args, **kwargs)
|
||||
else:
|
||||
compat_getpass = getpass.getpass
|
||||
|
||||
|
||||
US_RATINGS = {
|
||||
'G': 0,
|
||||
'PG': 10,
|
||||
'PG-13': 13,
|
||||
'R': 16,
|
||||
'NC': 18,
|
||||
}
|
||||
|
@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.02.27.1'
|
||||
__version__ = '2014.03.21.1'
|
||||
|
Reference in New Issue
Block a user