Compare commits
63 Commits
2014.09.22
...
2014.09.28
Author | SHA1 | Date | |
---|---|---|---|
|
22dd3fad86 | ||
|
d6e6a42256 | ||
|
76e7d1e74b | ||
|
38c4d41b74 | ||
|
f0b8e3607d | ||
|
51ee08c4bb | ||
|
c841789772 | ||
|
c121a75b36 | ||
|
5a8b77551d | ||
|
0217aee154 | ||
|
b14f3a4c1d | ||
|
92f7963f6e | ||
|
88fbe4c2cc | ||
|
394599f422 | ||
|
ed9266db90 | ||
|
f4b1c7adb8 | ||
|
c95eeb7b80 | ||
|
5e43e3803c | ||
|
a89435a7a8 | ||
|
a0a90b3ba1 | ||
|
c664182323 | ||
|
6be1cd4ddb | ||
|
ee0d90707a | ||
|
f776d8f608 | ||
|
b3ac3a51ac | ||
|
0b75c2a88b | ||
|
7b7518124e | ||
|
68b0973046 | ||
|
3a203b8bfa | ||
|
70752ccefd | ||
|
0155549d6c | ||
|
b66745288e | ||
|
2a1325fdde | ||
|
2f9e8776df | ||
|
497339fa0e | ||
|
8e6f8051f0 | ||
|
11b3ce8509 | ||
|
6a5af6acb9 | ||
|
9a0d98bb40 | ||
|
fbd3162e49 | ||
|
54e9a4af95 | ||
|
8a32b82e46 | ||
|
fec02bcc90 | ||
|
c6e90caaa6 | ||
|
4bbf157794 | ||
|
6b08cdf626 | ||
|
b686fc18da | ||
|
0b97f3a936 | ||
|
eb73f2649f | ||
|
f0b5d6af74 | ||
|
2f771f6c99 | ||
|
3b2f933b01 | ||
|
cc746841e7 | ||
|
ac7553d031 | ||
|
cdc628a498 | ||
|
69ea8ca42c | ||
|
4bc3a23ec5 | ||
|
bd5650ac64 | ||
|
86916dae4b | ||
|
f7d159cf95 | ||
|
632e5684ce | ||
|
746c67d72f | ||
|
5aa38e75b2 |
@@ -139,7 +139,9 @@ def generator(test_case):
|
|||||||
|
|
||||||
if is_playlist:
|
if is_playlist:
|
||||||
self.assertEqual(res_dict['_type'], 'playlist')
|
self.assertEqual(res_dict['_type'], 'playlist')
|
||||||
|
self.assertTrue('entries' in res_dict)
|
||||||
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||||
|
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
assertGreaterEqual(
|
assertGreaterEqual(
|
||||||
self,
|
self,
|
||||||
@@ -188,7 +190,7 @@ def generator(test_case):
|
|||||||
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
expect_info_dict(self, tc.get('info_dict', {}), info_dict)
|
||||||
finally:
|
finally:
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
if is_playlist and res_dict is not None:
|
if is_playlist and res_dict is not None and res_dict.get('entries'):
|
||||||
# Remove all other files that may have been extracted if the
|
# Remove all other files that may have been extracted if the
|
||||||
# extractor returns full results even with extract_flat
|
# extractor returns full results even with extract_flat
|
||||||
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
||||||
|
@@ -10,7 +10,6 @@ from test.helper import FakeYDL
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import (
|
||||||
YoutubeUserIE,
|
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
YoutubeChannelIE,
|
||||||
@@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||||
|
|
||||||
def test_youtube_channel(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeChannelIE(dl)
|
|
||||||
#test paginated channel
|
|
||||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
|
||||||
self.assertTrue(len(result['entries']) > 90)
|
|
||||||
#test autogenerated channel
|
|
||||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
|
||||||
self.assertTrue(len(result['entries']) >= 18)
|
|
||||||
|
|
||||||
def test_youtube_user(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeUserIE(dl)
|
|
||||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
|
||||||
self.assertTrue(len(result['entries']) >= 320)
|
|
||||||
|
|
||||||
def test_youtube_show(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeShowIE(dl)
|
|
||||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
|
||||||
self.assertTrue(len(result) >= 3)
|
|
||||||
|
|
||||||
def test_youtube_mix(self):
|
def test_youtube_mix(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
@@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):
|
|||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(len(entries), 100)
|
self.assertEqual(len(entries), 100)
|
||||||
|
|
||||||
def test_youtube_toplist(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeTopListIE(dl)
|
|
||||||
result = ie.extract('yttoplist:music:Trending')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertTrue(len(entries) >= 5)
|
|
||||||
|
|
||||||
def test_youtube_search_url(self):
|
|
||||||
dl = FakeYDL()
|
|
||||||
ie = YoutubeSearchURLIE(dl)
|
|
||||||
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
|
||||||
entries = result['entries']
|
|
||||||
self.assertIsPlaylist(result)
|
|
||||||
self.assertEqual(result['title'], 'youtube-dl test video')
|
|
||||||
self.assertTrue(len(entries) >= 5)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -1250,12 +1250,13 @@ class YoutubeDL(object):
|
|||||||
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
||||||
# To work around aforementioned issue we will replace request's original URL with
|
# To work around aforementioned issue we will replace request's original URL with
|
||||||
# percent-encoded one
|
# percent-encoded one
|
||||||
url = req if isinstance(req, compat_str) else req.get_full_url()
|
req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
|
||||||
|
url = req if req_is_string else req.get_full_url()
|
||||||
url_escaped = escape_url(url)
|
url_escaped = escape_url(url)
|
||||||
|
|
||||||
# Substitute URL if any change after escaping
|
# Substitute URL if any change after escaping
|
||||||
if url != url_escaped:
|
if url != url_escaped:
|
||||||
if isinstance(req, compat_str):
|
if req_is_string:
|
||||||
req = url_escaped
|
req = url_escaped
|
||||||
else:
|
else:
|
||||||
req = compat_urllib_request.Request(
|
req = compat_urllib_request.Request(
|
||||||
|
@@ -78,6 +78,7 @@ __authors__ = (
|
|||||||
'Hari Padmanaban',
|
'Hari Padmanaban',
|
||||||
'Carlos Ramos',
|
'Carlos Ramos',
|
||||||
'5moufl',
|
'5moufl',
|
||||||
|
'lenaten',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .hls import HlsFD
|
from .hls import HlsFD
|
||||||
|
from .hls import NativeHlsFD
|
||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .mplayer import MplayerFD
|
from .mplayer import MplayerFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
@@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):
|
|||||||
|
|
||||||
if url.startswith('rtmp'):
|
if url.startswith('rtmp'):
|
||||||
return RtmpFD
|
return RtmpFD
|
||||||
|
if protocol == 'm3u8_native':
|
||||||
|
return NativeHlsFD
|
||||||
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
||||||
return HlsFD
|
return HlsFD
|
||||||
if url.startswith('mms') or url.startswith('rtsp'):
|
if url.startswith('mms') or url.startswith('rtsp'):
|
||||||
|
@@ -42,6 +42,7 @@ class FileDownloader(object):
|
|||||||
Subclasses of this one must re-define the real_download method.
|
Subclasses of this one must re-define the real_download method.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
_TEST_FILE_SIZE = 10241
|
||||||
params = None
|
params = None
|
||||||
|
|
||||||
def __init__(self, ydl, params):
|
def __init__(self, ydl, params):
|
||||||
|
@@ -1,8 +1,13 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
compat_urllib_request,
|
||||||
check_executable,
|
check_executable,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
)
|
)
|
||||||
@@ -43,3 +48,57 @@ class HlsFD(FileDownloader):
|
|||||||
self.to_stderr(u"\n")
|
self.to_stderr(u"\n")
|
||||||
self.report_error(u'%s exited with code %d' % (program, retval))
|
self.report_error(u'%s exited with code %d' % (program, retval))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class NativeHlsFD(FileDownloader):
|
||||||
|
""" A more limited implementation that does not require ffmpeg """
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
url = info_dict['url']
|
||||||
|
self.report_destination(filename)
|
||||||
|
tmpfilename = self.temp_name(filename)
|
||||||
|
|
||||||
|
self.to_screen(
|
||||||
|
'[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
|
||||||
|
data = self.ydl.urlopen(url).read()
|
||||||
|
s = data.decode('utf-8', 'ignore')
|
||||||
|
segment_urls = []
|
||||||
|
for line in s.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line and not line.startswith('#'):
|
||||||
|
segment_url = (
|
||||||
|
line
|
||||||
|
if re.match(r'^https?://', line)
|
||||||
|
else compat_urlparse.urljoin(url, line))
|
||||||
|
segment_urls.append(segment_url)
|
||||||
|
|
||||||
|
is_test = self.params.get('test', False)
|
||||||
|
remaining_bytes = self._TEST_FILE_SIZE if is_test else None
|
||||||
|
byte_counter = 0
|
||||||
|
with open(tmpfilename, 'wb') as outf:
|
||||||
|
for i, segurl in enumerate(segment_urls):
|
||||||
|
self.to_screen(
|
||||||
|
'[hlsnative] %s: Downloading segment %d / %d' %
|
||||||
|
(info_dict['id'], i + 1, len(segment_urls)))
|
||||||
|
seg_req = compat_urllib_request.Request(segurl)
|
||||||
|
if remaining_bytes is not None:
|
||||||
|
seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
|
||||||
|
|
||||||
|
segment = self.ydl.urlopen(seg_req).read()
|
||||||
|
if remaining_bytes is not None:
|
||||||
|
segment = segment[:remaining_bytes]
|
||||||
|
remaining_bytes -= len(segment)
|
||||||
|
outf.write(segment)
|
||||||
|
byte_counter += len(segment)
|
||||||
|
if remaining_bytes is not None and remaining_bytes <= 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
self._hook_progress({
|
||||||
|
'downloaded_bytes': byte_counter,
|
||||||
|
'total_bytes': byte_counter,
|
||||||
|
'filename': filename,
|
||||||
|
'status': 'finished',
|
||||||
|
})
|
||||||
|
self.try_rename(tmpfilename, filename)
|
||||||
|
return True
|
||||||
|
|
||||||
|
@@ -14,8 +14,6 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class HttpFD(FileDownloader):
|
class HttpFD(FileDownloader):
|
||||||
_TEST_FILE_SIZE = 10241
|
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
url = info_dict['url']
|
url = info_dict['url']
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
@@ -135,12 +135,14 @@ from .gametrailers import GametrailersIE
|
|||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
|
from .golem import GolemIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
from .gorillavid import GorillaVidIE
|
from .gorillavid import GorillaVidIE
|
||||||
from .goshgay import GoshgayIE
|
from .goshgay import GoshgayIE
|
||||||
from .grooveshark import GroovesharkIE
|
from .grooveshark import GroovesharkIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
|
from .heise import HeiseIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
@@ -240,6 +242,7 @@ from .ndtv import NDTVIE
|
|||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .nfb import NFBIE
|
from .nfb import NFBIE
|
||||||
|
from .nfl import NFLIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
@@ -260,6 +263,7 @@ from .nrk import (
|
|||||||
from .ntv import NTVIE
|
from .ntv import NTVIE
|
||||||
from .nytimes import NYTimesIE
|
from .nytimes import NYTimesIE
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
|
from .oktoberfesttv import OktoberfestTVIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFTVthekIE,
|
||||||
@@ -270,6 +274,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
|||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .played import PlayedIE
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
@@ -338,6 +343,7 @@ from .spankwire import SpankwireIE
|
|||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
from .spiegeltv import SpiegeltvIE
|
||||||
from .spike import SpikeIE
|
from .spike import SpikeIE
|
||||||
|
from .sport5 import Sport5IE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
@@ -406,11 +412,12 @@ from .videoweed import VideoWeedIE
|
|||||||
from .vidme import VidmeIE
|
from .vidme import VidmeIE
|
||||||
from .vimeo import (
|
from .vimeo import (
|
||||||
VimeoIE,
|
VimeoIE,
|
||||||
VimeoChannelIE,
|
|
||||||
VimeoUserIE,
|
|
||||||
VimeoAlbumIE,
|
VimeoAlbumIE,
|
||||||
|
VimeoChannelIE,
|
||||||
VimeoGroupsIE,
|
VimeoGroupsIE,
|
||||||
|
VimeoLikesIE,
|
||||||
VimeoReviewIE,
|
VimeoReviewIE,
|
||||||
|
VimeoUserIE,
|
||||||
VimeoWatchLaterIE,
|
VimeoWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .vimple import VimpleIE
|
from .vimple import VimpleIE
|
||||||
@@ -449,6 +456,7 @@ from .yahoo import (
|
|||||||
YahooNewsIE,
|
YahooNewsIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
)
|
)
|
||||||
|
from .ynet import YnetIE
|
||||||
from .youjizz import YouJizzIE
|
from .youjizz import YouJizzIE
|
||||||
from .youku import YoukuIE
|
from .youku import YoukuIE
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
|
@@ -22,8 +22,7 @@ class ABCIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
urls_info_json = self._search_regex(
|
urls_info_json = self._search_regex(
|
||||||
|
@@ -35,7 +35,7 @@ class AnySexIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False)
|
r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)
|
||||||
thumbnail = self._html_search_regex(
|
thumbnail = self._html_search_regex(
|
||||||
r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
|
r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
@@ -43,7 +43,7 @@ class AnySexIE(InfoExtractor):
|
|||||||
r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
|
r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
|
||||||
|
|
||||||
duration = parse_duration(self._search_regex(
|
duration = parse_duration(self._search_regex(
|
||||||
r'<b>Duration:</b> (\d+:\d+)', webpage, 'duration', fatal=False))
|
r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
|
r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
@@ -8,8 +8,6 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
qualities,
|
qualities,
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_urllib_parse,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import netrc
|
import netrc
|
||||||
@@ -15,11 +16,13 @@ from ..utils import (
|
|||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urlparse,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
|
||||||
clean_html,
|
clean_html,
|
||||||
compiled_regex_type,
|
compiled_regex_type,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
@@ -163,6 +166,14 @@ class InfoExtractor(object):
|
|||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
return cls._VALID_URL_RE.match(url) is not None
|
return cls._VALID_URL_RE.match(url) is not None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _match_id(cls, url):
|
||||||
|
if '_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
|
m = cls._VALID_URL_RE.match(url)
|
||||||
|
assert m
|
||||||
|
return m.group('id')
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def working(cls):
|
def working(cls):
|
||||||
"""Getter method for _WORKING."""
|
"""Getter method for _WORKING."""
|
||||||
@@ -640,7 +651,9 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
|
entry_protocol='m3u8', preference=None):
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'm3u8-meta',
|
'format_id': 'm3u8-meta',
|
||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
@@ -651,6 +664,11 @@ class InfoExtractor(object):
|
|||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
format_url = lambda u: (
|
||||||
|
u
|
||||||
|
if re.match(r'^https?://', u)
|
||||||
|
else compat_urlparse.urljoin(m3u8_url, u))
|
||||||
|
|
||||||
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
||||||
last_info = None
|
last_info = None
|
||||||
kv_rex = re.compile(
|
kv_rex = re.compile(
|
||||||
@@ -667,15 +685,17 @@ class InfoExtractor(object):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if last_info is None:
|
if last_info is None:
|
||||||
formats.append({'url': line})
|
formats.append({'url': format_url(line)})
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||||
|
|
||||||
f = {
|
f = {
|
||||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
||||||
'url': line.strip(),
|
'url': format_url(line.strip()),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
|
'protocol': entry_protocol,
|
||||||
|
'preference': preference,
|
||||||
}
|
}
|
||||||
codecs = last_info.get('CODECS')
|
codecs = last_info.get('CODECS')
|
||||||
if codecs:
|
if codecs:
|
||||||
@@ -695,6 +715,34 @@ class InfoExtractor(object):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _live_title(self, name):
|
||||||
|
""" Generate the title for a live video """
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
now_str = now.strftime("%Y-%m-%d %H:%M")
|
||||||
|
return name + ' ' + now_str
|
||||||
|
|
||||||
|
def _int(self, v, name, fatal=False, **kwargs):
|
||||||
|
res = int_or_none(v, **kwargs)
|
||||||
|
if 'get_attr' in kwargs:
|
||||||
|
print(getattr(v, kwargs['get_attr']))
|
||||||
|
if res is None:
|
||||||
|
msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(msg)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning(msg)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _float(self, v, name, fatal=False, **kwargs):
|
||||||
|
res = float_or_none(v, **kwargs)
|
||||||
|
if res is None:
|
||||||
|
msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(msg)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning(msg)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@@ -9,7 +9,7 @@ import xml.etree.ElementTree
|
|||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
from .common import InfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
@@ -26,7 +26,7 @@ from ..aes import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollIE(InfoExtractor):
|
class CrunchyrollIE(SubtitlesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||||
@@ -271,6 +271,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
else:
|
else:
|
||||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||||
|
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, subtitles)
|
||||||
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -7,20 +9,20 @@ from ..utils import ExtractorError
|
|||||||
|
|
||||||
|
|
||||||
class EitbIE(InfoExtractor):
|
class EitbIE(InfoExtractor):
|
||||||
IE_NAME = u'eitb.tv'
|
IE_NAME = 'eitb.tv'
|
||||||
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
_VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'add_ie': ['Brightcove'],
|
'add_ie': ['Brightcove'],
|
||||||
u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
'url': 'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
|
||||||
u'md5': u'edf4436247185adee3ea18ce64c47998',
|
'md5': 'edf4436247185adee3ea18ce64c47998',
|
||||||
u'info_dict': {
|
'info_dict': {
|
||||||
u'id': u'2743577154001',
|
'id': '2743577154001',
|
||||||
u'ext': u'mp4',
|
'ext': 'mp4',
|
||||||
u'title': u'60 minutos (Lasa y Zabala, 30 años)',
|
'title': '60 minutos (Lasa y Zabala, 30 años)',
|
||||||
# All videos from eitb has this description in the brightcove info
|
# All videos from eitb has this description in the brightcove info
|
||||||
u'description': u'.',
|
'description': '.',
|
||||||
u'uploader': u'Euskal Telebista',
|
'uploader': 'Euskal Telebista',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -30,7 +32,7 @@ class EitbIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, chapter_id)
|
webpage = self._download_webpage(url, chapter_id)
|
||||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||||
if bc_url is None:
|
if bc_url is None:
|
||||||
raise ExtractorError(u'Could not extract the Brightcove url')
|
raise ExtractorError('Could not extract the Brightcove url')
|
||||||
# The BrightcoveExperience object doesn't contain the video id, we set
|
# The BrightcoveExperience object doesn't contain the video id, we set
|
||||||
# it manually
|
# it manually
|
||||||
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
bc_url += '&%40videoPlayer={0}'.format(chapter_id)
|
||||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -20,6 +21,7 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
'title': 'Music Video 14 british euro brit european cumshots swallow',
|
||||||
'uploader': 'unknown',
|
'uploader': 'unknown',
|
||||||
|
'view_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
@@ -39,8 +41,12 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader',
|
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
|
||||||
fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
|
view_count = str_to_int(self._html_search_regex(
|
||||||
|
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
video_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||||
r'video_url=(.+?)&', webpage, 'video_url'))
|
r'video_url=(.+?)&', webpage, 'video_url'))
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
@@ -51,6 +57,7 @@ class ExtremeTubeIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
|
'view_count': view_count,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format': format,
|
'format': format,
|
||||||
'format_id': format,
|
'format_id': format,
|
||||||
|
@@ -10,13 +10,13 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class FlickrIE(InfoExtractor):
|
class FlickrIE(InfoExtractor):
|
||||||
"""Information Extractor for Flickr videos"""
|
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
|
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
|
||||||
'file': '5645318632.mp4',
|
|
||||||
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
|
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '5645318632',
|
||||||
|
'ext': 'mp4',
|
||||||
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
||||||
"uploader_id": "forestwander-nature-pictures",
|
"uploader_id": "forestwander-nature-pictures",
|
||||||
"title": "Dark Hollow Waterfalls"
|
"title": "Dark Hollow Waterfalls"
|
||||||
@@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor):
|
|||||||
raise ExtractorError('Unable to extract video url')
|
raise ExtractorError('Unable to extract video url')
|
||||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||||
|
|
||||||
return [{
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'uploader_id': video_uploader_id,
|
'uploader_id': video_uploader_id,
|
||||||
}]
|
}
|
||||||
|
@@ -382,14 +382,21 @@ class GenericIE(InfoExtractor):
|
|||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Wistia embed
|
||||||
|
{
|
||||||
|
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||||
|
'md5': '8788b683c777a5cf25621eaf286d0c23',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1cfaf6b7ea',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'md5:51364a8d3d009997ba99656004b5e20d',
|
||||||
|
'duration': 643.0,
|
||||||
|
'filesize': 182808282,
|
||||||
|
'uploader': 'education-portal.com',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
|
||||||
"""Report webpage download."""
|
|
||||||
if not self._downloader.params.get('test', False):
|
|
||||||
self._downloader.report_warning('Falling back on generic information extractor.')
|
|
||||||
super(GenericIE, self).report_download_webpage(video_id)
|
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
"""Report information extraction."""
|
"""Report information extraction."""
|
||||||
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
|
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
|
||||||
@@ -489,6 +496,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
url, smuggled_data = unsmuggle_url(url)
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
force_videoid = None
|
force_videoid = None
|
||||||
|
is_intentional = smuggled_data and smuggled_data.get('to_generic')
|
||||||
if smuggled_data and 'force_videoid' in smuggled_data:
|
if smuggled_data and 'force_videoid' in smuggled_data:
|
||||||
force_videoid = smuggled_data['force_videoid']
|
force_videoid = smuggled_data['force_videoid']
|
||||||
video_id = force_videoid
|
video_id = force_videoid
|
||||||
@@ -531,6 +539,9 @@ class GenericIE(InfoExtractor):
|
|||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if not self._downloader.params.get('test', False) and not is_intentional:
|
||||||
|
self._downloader.report_warning('Falling back on generic information extractor.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -584,7 +595,9 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Helper method
|
# Helper method
|
||||||
def _playlist_from_matches(matches, getter, ie=None):
|
def _playlist_from_matches(matches, getter, ie=None):
|
||||||
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
|
urlrs = orderedSet(
|
||||||
|
self.url_result(self._proto_relative_url(getter(m)), ie)
|
||||||
|
for m in matches)
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
@@ -629,11 +642,11 @@ class GenericIE(InfoExtractor):
|
|||||||
)
|
)
|
||||||
(["\'])
|
(["\'])
|
||||||
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
|
||||||
(?:embed|v)/.+?)
|
(?:embed|v|p)/.+?)
|
||||||
\1''', webpage)
|
\1''', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
|
matches, lambda m: unescapeHTML(m[1]))
|
||||||
|
|
||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
@@ -654,6 +667,16 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': video_title,
|
'title': video_title,
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
match = re.search(r'(?:id=["\']wistia_|data-wistiaid=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||||
|
if match:
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
|
||||||
|
'ie_key': 'Wistia',
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'title': video_title,
|
||||||
|
'id': match.group('id')
|
||||||
|
}
|
||||||
|
|
||||||
# Look for embedded blip.tv player
|
# Look for embedded blip.tv player
|
||||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||||
|
71
youtube_dl/extractor/golem.py
Normal file
71
youtube_dl/extractor/golem.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GolemIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||||
|
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14095',
|
||||||
|
'format_id': 'high',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'iPhone 6 und 6 Plus - Test',
|
||||||
|
'duration': 300.44,
|
||||||
|
'filesize': 65309548,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_PREFIX = 'http://video.golem.de'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
config = self._download_xml(
|
||||||
|
'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': config.findtext('./title', 'golem'),
|
||||||
|
'duration': self._float(config.findtext('./playtime'), 'duration'),
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for e in config.findall('./*[url]'):
|
||||||
|
url = e.findtext('./url')
|
||||||
|
if not url:
|
||||||
|
self._downloader.report_warning(
|
||||||
|
"{0}: url: empty, skipping".format(e.tag))
|
||||||
|
continue
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': e.tag,
|
||||||
|
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||||
|
'height': self._int(e.get('height'), 'height'),
|
||||||
|
'width': self._int(e.get('width'), 'width'),
|
||||||
|
'filesize': self._int(e.findtext('filesize'), 'filesize'),
|
||||||
|
'ext': determine_ext(e.findtext('./filename')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for e in config.findall('.//teaser[url]'):
|
||||||
|
url = e.findtext('./url')
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': compat_urlparse.urljoin(self._PREFIX, url),
|
||||||
|
'width': self._int(e.get('width'), 'thumbnail width'),
|
||||||
|
'height': self._int(e.get('height'), 'thumbnail height'),
|
||||||
|
})
|
||||||
|
info['thumbnails'] = thumbnails
|
||||||
|
|
||||||
|
return info
|
81
youtube_dl/extractor/heise.py
Normal file
81
youtube_dl/extractor/heise.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
get_meta_content,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HeiseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?heise\.de/video/artikel/
|
||||||
|
.+?(?P<id>[0-9]+)\.html(?:$|[?#])
|
||||||
|
'''
|
||||||
|
_TEST = {
|
||||||
|
'url': (
|
||||||
|
'http://www.heise.de/video/artikel/Podcast-c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2404147.html'
|
||||||
|
),
|
||||||
|
'md5': 'ffed432483e922e88545ad9f2f15d30e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2404147',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': (
|
||||||
|
"Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
|
||||||
|
),
|
||||||
|
'format_id': 'mp4_720',
|
||||||
|
'timestamp': 1411812600,
|
||||||
|
'upload_date': '20140927',
|
||||||
|
'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
json_url = self._search_regex(
|
||||||
|
r'json_url:\s*"([^"]+)"', webpage, 'json URL')
|
||||||
|
config = self._download_json(json_url, video_id)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'thumbnail': config.get('poster'),
|
||||||
|
'timestamp': parse_iso8601(get_meta_content('date', webpage)),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
||||||
|
|
||||||
|
title = get_meta_content('fulltitle', webpage)
|
||||||
|
if title:
|
||||||
|
info['title'] = title
|
||||||
|
elif config.get('title'):
|
||||||
|
info['title'] = config['title']
|
||||||
|
else:
|
||||||
|
info['title'] = self._og_search_title(webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for t, rs in config['formats'].items():
|
||||||
|
if not rs or not hasattr(rs, 'items'):
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'formats: {0}: no resolutions'.format(t))
|
||||||
|
continue
|
||||||
|
|
||||||
|
for height_str, obj in rs.items():
|
||||||
|
format_id = '{0}_{1}'.format(t, height_str)
|
||||||
|
|
||||||
|
if not obj or not obj.get('url'):
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'formats: {0}: no url'.format(format_id))
|
||||||
|
continue
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': obj['url'],
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': self._int(height_str, 'height'),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
|
||||||
|
return info
|
@@ -1,7 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@@ -23,6 +22,7 @@ class MuenchenTVIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$'
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@@ -33,9 +33,7 @@ class MuenchenTVIE(InfoExtractor):
|
|||||||
display_id = 'live'
|
display_id = 'live'
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
now = datetime.datetime.now()
|
title = self._live_title(self._og_search_title(webpage))
|
||||||
now_str = now.strftime("%Y-%m-%d %H:%M")
|
|
||||||
title = self._og_search_title(webpage) + ' ' + now_str
|
|
||||||
|
|
||||||
data_js = self._search_regex(
|
data_js = self._search_regex(
|
||||||
r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
|
r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
|
||||||
@@ -73,5 +71,6 @@ class MuenchenTVIE(InfoExtractor):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
103
youtube_dl/extractor/nfl.py
Normal file
103
youtube_dl/extractor/nfl.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NFLIE(InfoExtractor):
|
||||||
|
IE_NAME = 'nfl.com'
|
||||||
|
_VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)'
|
||||||
|
_PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||||
|
# 'md5': '5eb8c40a727dda106d510e5d6ffa79e5', # md5 checksum fluctuates
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0ap3000000398478',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights',
|
||||||
|
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||||
|
'upload_date': '20140921',
|
||||||
|
'timestamp': 1411337580,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
config = self._download_json(self._PLAYER_CONFIG_URL, video_id,
|
||||||
|
note='Downloading player config')
|
||||||
|
url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config)
|
||||||
|
video_data = self._download_json(url_template.format(id=video_id), video_id)
|
||||||
|
|
||||||
|
cdns = config.get('cdns')
|
||||||
|
if not cdns:
|
||||||
|
raise ExtractorError('Failed to get CDN data', expected=True)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
streams = video_data.get('cdnData', {}).get('bitrateInfo', [])
|
||||||
|
for name, cdn in cdns.items():
|
||||||
|
# LimeLight streams don't seem to work
|
||||||
|
if cdn.get('name') == 'LIMELIGHT':
|
||||||
|
continue
|
||||||
|
|
||||||
|
protocol = cdn.get('protocol')
|
||||||
|
host = remove_end(cdn.get('host', ''), '/')
|
||||||
|
if not (protocol and host):
|
||||||
|
continue
|
||||||
|
|
||||||
|
path_prefix = cdn.get('pathprefix', '')
|
||||||
|
if path_prefix and not path_prefix.endswith('/'):
|
||||||
|
path_prefix = '%s/' % path_prefix
|
||||||
|
|
||||||
|
get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format(
|
||||||
|
protocol=protocol,
|
||||||
|
host=host,
|
||||||
|
prefix=path_prefix,
|
||||||
|
path=p,
|
||||||
|
)
|
||||||
|
|
||||||
|
if protocol == 'rtmp':
|
||||||
|
preference = -2
|
||||||
|
elif 'prog' in name.lower():
|
||||||
|
preference = -1
|
||||||
|
else:
|
||||||
|
preference = 0
|
||||||
|
|
||||||
|
for stream in streams:
|
||||||
|
path = stream.get('path')
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': get_url(path),
|
||||||
|
'vbr': int_or_none(stream.get('rate', 0), 1000),
|
||||||
|
'preference': preference,
|
||||||
|
'format_note': name,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
for q in ('xl', 'l', 'm', 's', 'xs'):
|
||||||
|
thumbnail = video_data.get('imagePaths', {}).get(q)
|
||||||
|
if thumbnail:
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_data.get('storyHeadline'),
|
||||||
|
'formats': formats,
|
||||||
|
'description': video_data.get('caption'),
|
||||||
|
'duration': video_data.get('duration'),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': int_or_none(video_data.get('posted'), 1000),
|
||||||
|
}
|
@@ -62,7 +62,7 @@ class NocoIE(InfoExtractor):
|
|||||||
|
|
||||||
def _call_api(self, path, video_id, note):
|
def _call_api(self, path, video_id, note):
|
||||||
ts = compat_str(int(time.time() * 1000))
|
ts = compat_str(int(time.time() * 1000))
|
||||||
tk = hashlib.md5(hashlib.md5(ts).hexdigest() + '#8S?uCraTedap6a').hexdigest()
|
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||||
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||||
|
|
||||||
resp = self._download_json(url, video_id, note)
|
resp = self._download_json(url, video_id, note)
|
||||||
|
47
youtube_dl/extractor/oktoberfesttv.py
Normal file
47
youtube_dl/extractor/oktoberfesttv.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class OktoberfestTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hb-zelt',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:^Live-Kamera: Hofbräuzelt [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._live_title(self._html_search_regex(
|
||||||
|
r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title'))
|
||||||
|
|
||||||
|
clip = self._search_regex(
|
||||||
|
r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip')
|
||||||
|
ncurl = self._search_regex(
|
||||||
|
r"netConnectionUrl:\s*'([^']+)'", webpage, 'rtmp base')
|
||||||
|
video_url = ncurl + clip
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"canvas:\s*\{\s*backgroundImage:\s*'url\(([^)]+)\)'", webpage,
|
||||||
|
'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'is_live': True,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
55
youtube_dl/extractor/played.py
Normal file
55
youtube_dl/extractor/played.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import os.path
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlayedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'played.to'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://played.to/j2f2sfiiukgt',
|
||||||
|
'md5': 'c2bd75a368e82980e7257bf500c00637',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'j2f2sfiiukgt',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'youtube-dl_test_video.mp4',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
orig_webpage = self._download_webpage(url, video_id)
|
||||||
|
fields = re.findall(
|
||||||
|
r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage)
|
||||||
|
data = dict(fields)
|
||||||
|
|
||||||
|
self._sleep(2, video_id)
|
||||||
|
|
||||||
|
post = compat_urllib_parse.urlencode(data)
|
||||||
|
headers = {
|
||||||
|
b'Content-Type': b'application/x-www-form-urlencoded',
|
||||||
|
}
|
||||||
|
req = compat_urllib_request.Request(url, post, headers)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
req, video_id, note='Downloading video page ...')
|
||||||
|
|
||||||
|
title = os.path.splitext(data['fname'])[0]
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'file: "?(.+?)",', webpage, 'video URL')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
}
|
92
youtube_dl/extractor/sport5.py
Normal file
92
youtube_dl/extractor/sport5.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class Sport5IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's5-Y59xx1-GUh2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ולנסיה-קורדובה 0:3',
|
||||||
|
'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
|
||||||
|
'duration': 228,
|
||||||
|
'categories': list,
|
||||||
|
},
|
||||||
|
'skip': 'Blocked outside of Israel',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's5-SiXxx1-hKh2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'GOALS_CELTIC_270914.mp4',
|
||||||
|
'description': '',
|
||||||
|
'duration': 87,
|
||||||
|
'categories': list,
|
||||||
|
},
|
||||||
|
'skip': 'Blocked outside of Israel',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
media_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, media_id)
|
||||||
|
|
||||||
|
video_id = self._html_search_regex('clipId=([\w-]+)', webpage, 'video id')
|
||||||
|
|
||||||
|
metadata = self._download_xml(
|
||||||
|
'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
error = metadata.find('./Error')
|
||||||
|
if error is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s - %s' % (
|
||||||
|
self.IE_NAME,
|
||||||
|
error.find('./Name').text,
|
||||||
|
error.find('./Description').text),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
title = metadata.find('./Title').text
|
||||||
|
description = metadata.find('./Description').text
|
||||||
|
duration = int(metadata.find('./Duration').text)
|
||||||
|
|
||||||
|
posters_el = metadata.find('./PosterLinks')
|
||||||
|
thumbnails = [{
|
||||||
|
'url': thumbnail.text,
|
||||||
|
'width': int(thumbnail.get('width')),
|
||||||
|
'height': int(thumbnail.get('height')),
|
||||||
|
} for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
|
||||||
|
|
||||||
|
categories_el = metadata.find('./Categories')
|
||||||
|
categories = [
|
||||||
|
cat.get('name') for cat in categories_el.findall('./Category')
|
||||||
|
] if categories_el is not None else []
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': fmt.text,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'vbr': int(fmt.get('bitrate')),
|
||||||
|
'width': int(fmt.get('width')),
|
||||||
|
'height': int(fmt.get('height')),
|
||||||
|
} for fmt in metadata.findall('./PlaybackLinks/FileURL')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'duration': duration,
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@@ -19,7 +19,7 @@ class Vbox7IE(InfoExtractor):
|
|||||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '249bb972c2',
|
'id': '249bb972c2',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -50,7 +50,6 @@ class Vbox7IE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': final_url,
|
'url': final_url,
|
||||||
'ext': 'flv',
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': thumbnail_url,
|
||||||
}
|
}
|
||||||
|
@@ -5,7 +5,7 @@ import xml.etree.ElementTree
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_HTTPError,
|
compat_urllib_request,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -24,7 +24,7 @@ class VevoIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
|
||||||
"md5": "06bea460acb744eab74a9d7dcb4bfd61",
|
"md5": "95ee28ee45e70130e3ab02b0f579ae23",
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'GB1101300280',
|
'id': 'GB1101300280',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -40,7 +40,7 @@ class VevoIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'note': 'v3 SMIL format',
|
'note': 'v3 SMIL format',
|
||||||
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
|
||||||
'md5': '893ec0e0d4426a1d96c01de8f2bdff58',
|
'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'USUV71302923',
|
'id': 'USUV71302923',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@@ -69,6 +69,21 @@ class VevoIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
req = compat_urllib_request.Request(
|
||||||
|
'http://www.vevo.com/auth', data=b'')
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
req, None,
|
||||||
|
note='Retrieving oauth token',
|
||||||
|
errnote='Unable to retrieve oauth token',
|
||||||
|
fatal=False)
|
||||||
|
if webpage is False:
|
||||||
|
self._oauth_token = None
|
||||||
|
else:
|
||||||
|
self._oauth_token = self._search_regex(
|
||||||
|
r'access_token":\s*"([^"]+)"',
|
||||||
|
webpage, 'access token', fatal=False)
|
||||||
|
|
||||||
def _formats_from_json(self, video_info):
|
def _formats_from_json(self, video_info):
|
||||||
last_version = {'version': -1}
|
last_version = {'version': -1}
|
||||||
for version in video_info['videoVersions']:
|
for version in video_info['videoVersions']:
|
||||||
@@ -129,6 +144,26 @@ class VevoIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
|
def _download_api_formats(self, video_id):
|
||||||
|
if not self._oauth_token:
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'No oauth token available, skipping API HLS download')
|
||||||
|
return []
|
||||||
|
|
||||||
|
api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
|
||||||
|
video_id, self._oauth_token)
|
||||||
|
api_data = self._download_json(
|
||||||
|
api_url, video_id,
|
||||||
|
note='Downloading HLS formats',
|
||||||
|
errnote='Failed to download HLS format list', fatal=False)
|
||||||
|
if api_data is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
m3u8_url = api_data[0]['url']
|
||||||
|
return self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||||
|
preference=0)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
@@ -152,30 +187,8 @@ class VevoIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
age_limit = None
|
age_limit = None
|
||||||
|
|
||||||
# Download SMIL
|
# Download via HLS API
|
||||||
smil_blocks = sorted((
|
formats.extend(self._download_api_formats(video_id))
|
||||||
f for f in video_info['videoVersions']
|
|
||||||
if f['sourceType'] == 13),
|
|
||||||
key=lambda f: f['version'])
|
|
||||||
|
|
||||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
|
||||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
|
||||||
if smil_blocks:
|
|
||||||
smil_url_m = self._search_regex(
|
|
||||||
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
|
||||||
fatal=False)
|
|
||||||
if smil_url_m is not None:
|
|
||||||
smil_url = smil_url_m
|
|
||||||
|
|
||||||
try:
|
|
||||||
smil_xml = self._download_webpage(smil_url, video_id,
|
|
||||||
'Downloading SMIL info')
|
|
||||||
formats.extend(self._formats_from_smil(smil_xml))
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if not isinstance(ee.cause, compat_HTTPError):
|
|
||||||
raise
|
|
||||||
self._downloader.report_warning(
|
|
||||||
'Cannot download SMIL information, falling back to JSON ..')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
timestamp_ms = int(self._search_regex(
|
timestamp_ms = int(self._search_regex(
|
||||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
|
smuggle_url,
|
||||||
std_headers,
|
std_headers,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@@ -529,3 +530,35 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
|
||||||
|
|
||||||
|
|
||||||
|
class VimeoLikesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes(?:$|[?#])'
|
||||||
|
IE_NAME = 'vimeo:likes'
|
||||||
|
IE_DESC = 'Vimeo user likes'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://vimeo.com/user20132939/likes',
|
||||||
|
'playlist_mincount': 4,
|
||||||
|
'add_ies': ['Generic'],
|
||||||
|
"info_dict": {
|
||||||
|
"description": "Videos Philipp Hagemeister likes on Vimeo.",
|
||||||
|
"title": "Vimeo / Philipp Hagemeister's likes",
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'extract_flat': False,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
rss_url = '%s//vimeo.com/user%s/likes/rss' % (
|
||||||
|
self.http_scheme(), user_id)
|
||||||
|
surl = smuggle_url(rss_url, {
|
||||||
|
'force_videoid': '%s_likes' % user_id,
|
||||||
|
'to_generic': True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': surl,
|
||||||
|
}
|
||||||
|
@@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -16,6 +17,24 @@ class VubeIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
|
'url': 'http://vube.com/trending/William+Wei/Y8NUZ69Tf7?t=s',
|
||||||
|
'md5': 'e7aabe1f8f1aa826b9e4735e1f9cee42',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Y8NUZ69Tf7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Best Drummer Ever [HD]',
|
||||||
|
'description': 'md5:2d63c4b277b85c2277761c2cf7337d71',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'uploader': 'William',
|
||||||
|
'timestamp': 1406876915,
|
||||||
|
'upload_date': '20140801',
|
||||||
|
'duration': 258.051,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'categories': ['amazing', 'hd', 'best drummer ever', 'william wei', 'bucket drumming', 'street drummer', 'epic street drumming'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon',
|
||||||
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
'md5': 'db7aba89d4603dadd627e9d1973946fe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@@ -32,7 +51,8 @@ class VubeIE(InfoExtractor):
|
|||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
|
'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
|
||||||
}
|
},
|
||||||
|
'skip': 'Removed due to DMCA',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
|
'url': 'http://vube.com/SerainaMusic/my-7-year-old-sister-and-i-singing-alive-by-krewella/UeBhTudbfS?t=s&n=1',
|
||||||
@@ -51,7 +71,8 @@ class VubeIE(InfoExtractor):
|
|||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'categories': ['seraina', 'jessica', 'krewella', 'alive'],
|
'categories': ['seraina', 'jessica', 'krewella', 'alive'],
|
||||||
}
|
},
|
||||||
|
'skip': 'Removed due to DMCA',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
||||||
'md5': '0584fc13b50f887127d9d1007589d27f',
|
'md5': '0584fc13b50f887127d9d1007589d27f',
|
||||||
@@ -69,7 +90,8 @@ class VubeIE(InfoExtractor):
|
|||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
|
'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
|
||||||
}
|
},
|
||||||
|
'skip': 'Removed due to DMCA',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -102,6 +124,11 @@ class VubeIE(InfoExtractor):
|
|||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
if not formats and video.get('vst') == 'dmca':
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video has been removed in response to a complaint received under the US Digital Millennium Copyright Act.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
description = video.get('description')
|
description = video.get('description')
|
||||||
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
|
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
|
||||||
|
@@ -5,7 +5,10 @@ import re
|
|||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_strdate
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class WatIE(InfoExtractor):
|
class WatIE(InfoExtractor):
|
||||||
@@ -37,6 +40,7 @@ class WatIE(InfoExtractor):
|
|||||||
'upload_date': '20140816',
|
'upload_date': '20140816',
|
||||||
'duration': 2910,
|
'duration': 2910,
|
||||||
},
|
},
|
||||||
|
'skip': "Ce contenu n'est pas disponible pour l'instant.",
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -57,6 +61,11 @@ class WatIE(InfoExtractor):
|
|||||||
|
|
||||||
video_info = self.download_video_info(real_id)
|
video_info = self.download_video_info(real_id)
|
||||||
|
|
||||||
|
error_desc = video_info.get('error_desc')
|
||||||
|
if error_desc:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||||
|
|
||||||
geo_list = video_info.get('geoList')
|
geo_list = video_info.get('geoList')
|
||||||
country = geo_list[0] if geo_list else ''
|
country = geo_list[0] if geo_list else ''
|
||||||
|
|
||||||
|
@@ -1,13 +1,14 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
class WistiaIE(InfoExtractor):
|
class WistiaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
|
_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
|
||||||
|
_API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||||
@@ -24,11 +25,13 @@ class WistiaIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
request = compat_urllib_request.Request(self._API_URL.format(video_id))
|
||||||
data_json = self._html_search_regex(
|
request.add_header('Referer', url) # Some videos require this.
|
||||||
r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
|
data_json = self._download_json(request, video_id)
|
||||||
|
if data_json.get('error'):
|
||||||
data = json.loads(data_json)
|
raise ExtractorError('Error while getting the playlist',
|
||||||
|
expected=True)
|
||||||
|
data = data_json['media']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
|
54
youtube_dl/extractor/ynet.py
Normal file
54
youtube_dl/extractor/ynet.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import compat_urllib_parse
|
||||||
|
|
||||||
|
|
||||||
|
class YnetIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
|
||||||
|
'md5': '002b44ee2f33d50363a1c153bed524cf',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'L-11659-99244',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'איש לא יודע מאיפה באנו',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
|
||||||
|
'md5': '6455046ae1b48cf7e2b7cae285e53a16',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'L-8859-84418',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage))
|
||||||
|
config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
|
||||||
|
f4m_url = config['clip']['url']
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
|
||||||
|
if m:
|
||||||
|
title = m.group('title')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': self._extract_f4m_formats(f4m_url, video_id),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
}
|
@@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
import json
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
@@ -13,18 +14,25 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class YoukuIE(InfoExtractor):
|
class YoukuIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:(?:http://)?(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|youku:)(?P<ID>[A-Za-z0-9]+)(?:\.html|/v\.swf|)'
|
_VALID_URL = r'''(?x)
|
||||||
_TEST = {
|
(?:
|
||||||
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
|
http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
|
||||||
u"file": u"XNDgyMDQ2NTQw_part00.flv",
|
youku:)
|
||||||
u"md5": u"ffe3f2e435663dc2d1eea34faeff5b5b",
|
(?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
|
||||||
u"params": {u"test": False},
|
'''
|
||||||
u"info_dict": {
|
_TEST = {
|
||||||
u"title": u"youtube-dl test video \"'/\\ä↭𝕐"
|
'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html',
|
||||||
|
'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b',
|
||||||
|
'params': {
|
||||||
|
'test': False
|
||||||
|
},
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'XNDgyMDQ2NTQw_part00',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'youtube-dl test video "\'/\\ä↭𝕐'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _gen_sid(self):
|
def _gen_sid(self):
|
||||||
nowTime = int(time.time() * 1000)
|
nowTime = int(time.time() * 1000)
|
||||||
random1 = random.randint(1000,1998)
|
random1 = random.randint(1000,1998)
|
||||||
@@ -55,49 +63,42 @@ class YoukuIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
video_id = mobj.group('id')
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group('ID')
|
|
||||||
|
|
||||||
info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
|
info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
|
||||||
|
|
||||||
jsondata = self._download_webpage(info_url, video_id)
|
config = self._download_json(info_url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
error_code = config['data'][0].get('error_code')
|
||||||
try:
|
if error_code:
|
||||||
config = json.loads(jsondata)
|
# -8 means blocked outside China.
|
||||||
error_code = config['data'][0].get('error_code')
|
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
||||||
if error_code:
|
raise ExtractorError(error or 'Server reported error %i' % error_code,
|
||||||
# -8 means blocked outside China.
|
expected=True)
|
||||||
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
|
||||||
raise ExtractorError(error or u'Server reported error %i' % error_code,
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
video_title = config['data'][0]['title']
|
video_title = config['data'][0]['title']
|
||||||
seed = config['data'][0]['seed']
|
seed = config['data'][0]['seed']
|
||||||
|
|
||||||
format = self._downloader.params.get('format', None)
|
format = self._downloader.params.get('format', None)
|
||||||
supported_format = list(config['data'][0]['streamfileids'].keys())
|
supported_format = list(config['data'][0]['streamfileids'].keys())
|
||||||
|
|
||||||
if format is None or format == 'best':
|
# TODO proper format selection
|
||||||
if 'hd2' in supported_format:
|
if format is None or format == 'best':
|
||||||
format = 'hd2'
|
if 'hd2' in supported_format:
|
||||||
else:
|
format = 'hd2'
|
||||||
format = 'flv'
|
|
||||||
ext = u'flv'
|
|
||||||
elif format == 'worst':
|
|
||||||
format = 'mp4'
|
|
||||||
ext = u'mp4'
|
|
||||||
else:
|
else:
|
||||||
format = 'flv'
|
format = 'flv'
|
||||||
ext = u'flv'
|
ext = 'flv'
|
||||||
|
elif format == 'worst':
|
||||||
|
format = 'mp4'
|
||||||
|
ext = 'mp4'
|
||||||
|
else:
|
||||||
|
format = 'flv'
|
||||||
|
ext = 'flv'
|
||||||
|
|
||||||
|
fileid = config['data'][0]['streamfileids'][format]
|
||||||
fileid = config['data'][0]['streamfileids'][format]
|
keys = [s['k'] for s in config['data'][0]['segs'][format]]
|
||||||
keys = [s['k'] for s in config['data'][0]['segs'][format]]
|
# segs is usually a dictionary, but an empty *list* if an error occured.
|
||||||
# segs is usually a dictionary, but an empty *list* if an error occured.
|
|
||||||
except (UnicodeDecodeError, ValueError, KeyError):
|
|
||||||
raise ExtractorError(u'Unable to extract info section')
|
|
||||||
|
|
||||||
files_info=[]
|
files_info=[]
|
||||||
sid = self._gen_sid()
|
sid = self._gen_sid()
|
||||||
@@ -106,9 +107,8 @@ class YoukuIE(InfoExtractor):
|
|||||||
#column 8,9 of fileid represent the segment number
|
#column 8,9 of fileid represent the segment number
|
||||||
#fileid[7:9] should be changed
|
#fileid[7:9] should be changed
|
||||||
for index, key in enumerate(keys):
|
for index, key in enumerate(keys):
|
||||||
|
|
||||||
temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
|
temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
|
||||||
download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
|
download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': '%s_part%02d' % (video_id, index),
|
'id': '%s_part%02d' % (video_id, index),
|
||||||
|
@@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
def _set_language(self):
|
def _set_language(self):
|
||||||
return bool(self._download_webpage(
|
return bool(self._download_webpage(
|
||||||
self._LANG_URL, None,
|
self._LANG_URL, None,
|
||||||
note=u'Setting language', errnote='unable to set language',
|
note='Setting language', errnote='unable to set language',
|
||||||
fatal=False))
|
fatal=False))
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
@@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
# No authentication to be performed
|
# No authentication to be performed
|
||||||
if username is None:
|
if username is None:
|
||||||
if self._LOGIN_REQUIRED:
|
if self._LOGIN_REQUIRED:
|
||||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
self._LOGIN_URL, None,
|
self._LOGIN_URL, None,
|
||||||
note=u'Downloading login page',
|
note='Downloading login page',
|
||||||
errnote=u'unable to fetch login page', fatal=False)
|
errnote='unable to fetch login page', fatal=False)
|
||||||
if login_page is False:
|
if login_page is False:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||||
login_results = self._download_webpage(
|
login_results = self._download_webpage(
|
||||||
req, None,
|
req, None,
|
||||||
note=u'Logging in', errnote=u'unable to log in', fatal=False)
|
note='Logging in', errnote='unable to log in', fatal=False)
|
||||||
if login_results is False:
|
if login_results is False:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
|
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
|
||||||
raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||||
|
|
||||||
# Two-Factor
|
# Two-Factor
|
||||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||||
@@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
tfa_code = self._get_tfa_info()
|
tfa_code = self._get_tfa_info()
|
||||||
|
|
||||||
if tfa_code is None:
|
if tfa_code is None:
|
||||||
self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>')
|
self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
|
||||||
self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Unlike the first login form, secTok and timeStmp are both required for the TFA form
|
# Unlike the first login form, secTok and timeStmp are both required for the TFA form
|
||||||
|
|
||||||
match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||||
if match is None:
|
if match is None:
|
||||||
self._downloader.report_warning(u'Failed to get secTok - did the page structure change?')
|
self._downloader.report_warning('Failed to get secTok - did the page structure change?')
|
||||||
secTok = match.group(1)
|
secTok = match.group(1)
|
||||||
match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||||
if match is None:
|
if match is None:
|
||||||
self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?')
|
self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
|
||||||
timeStmp = match.group(1)
|
timeStmp = match.group(1)
|
||||||
|
|
||||||
tfa_form_strs = {
|
tfa_form_strs = {
|
||||||
@@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
||||||
tfa_results = self._download_webpage(
|
tfa_results = self._download_webpage(
|
||||||
tfa_req, None,
|
tfa_req, None,
|
||||||
note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False)
|
note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
|
||||||
|
|
||||||
if tfa_results is False:
|
if tfa_results is False:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
|
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
|
||||||
self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||||
return False
|
return False
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in - did the page structure change?')
|
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||||
return False
|
return False
|
||||||
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
||||||
self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
req, None,
|
req, None,
|
||||||
note=u'Confirming age', errnote=u'Unable to confirm age')
|
note='Confirming age', errnote='Unable to confirm age')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
||||||
|(?: # or the v= param in all its forms
|
|(?: # or the v= param in all its forms
|
||||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||||
@@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
IE_NAME = 'youtube'
|
IE_NAME = 'youtube'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
|
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
|
||||||
u"file": u"BaW_jenozKc.mp4",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': 'BaW_jenozKc',
|
||||||
u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
|
'ext': 'mp4',
|
||||||
u"uploader": u"Philipp Hagemeister",
|
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||||
u"uploader_id": u"phihag",
|
'uploader': 'Philipp Hagemeister',
|
||||||
u"upload_date": u"20121002",
|
'uploader_id': 'phihag',
|
||||||
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
|
'upload_date': '20121002',
|
||||||
u"categories": [u'Science & Technology'],
|
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||||
|
'categories': ['Science & Technology'],
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
|
'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
|
||||||
u"file": u"UxxajLWwzqY.mp4",
|
'note': 'Test generic use_cipher_signature video (#897)',
|
||||||
u"note": u"Test generic use_cipher_signature video (#897)",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': 'UxxajLWwzqY',
|
||||||
u"upload_date": u"20120506",
|
'ext': 'mp4',
|
||||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
'upload_date': '20120506',
|
||||||
u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
|
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||||
u"uploader": u"Icona Pop",
|
'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
|
||||||
u"uploader_id": u"IconaPop"
|
'uploader': 'Icona Pop',
|
||||||
|
'uploader_id': 'IconaPop',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
|
'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
|
||||||
u"file": u"07FYdnEawAQ.mp4",
|
'note': 'Test VEVO video with age protection (#956)',
|
||||||
u"note": u"Test VEVO video with age protection (#956)",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '07FYdnEawAQ',
|
||||||
u"upload_date": u"20130703",
|
'ext': 'mp4',
|
||||||
u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
|
'upload_date': '20130703',
|
||||||
u"description": u"md5:64249768eec3bc4276236606ea996373",
|
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
|
||||||
u"uploader": u"justintimberlakeVEVO",
|
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||||
u"uploader_id": u"justintimberlakeVEVO"
|
'uploader': 'justintimberlakeVEVO',
|
||||||
|
'uploader_id': 'justintimberlakeVEVO',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
|
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
|
||||||
u"file": u"yZIXLfi8CZQ.mp4",
|
'note': 'Embed-only video (#1746)',
|
||||||
u"note": u"Embed-only video (#1746)",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': 'yZIXLfi8CZQ',
|
||||||
u"upload_date": u"20120608",
|
'ext': 'mp4',
|
||||||
u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
|
'upload_date': '20120608',
|
||||||
u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
|
'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
|
||||||
u"uploader": u"SET India",
|
'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
|
||||||
u"uploader_id": u"setindia"
|
'uploader': 'SET India',
|
||||||
|
'uploader_id': 'setindia'
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
|
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
||||||
u"file": u"a9LDPn-MO4I.m4a",
|
'note': '256k DASH audio (format 141) via DASH manifest',
|
||||||
u"note": u"256k DASH audio (format 141) via DASH manifest",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': 'a9LDPn-MO4I',
|
||||||
u"upload_date": "20121002",
|
'ext': 'm4a',
|
||||||
u"uploader_id": "8KVIDEO",
|
'upload_date': '20121002',
|
||||||
u"description": '',
|
'uploader_id': '8KVIDEO',
|
||||||
u"uploader": "8KVIDEO",
|
'description': '',
|
||||||
u"title": "UHDTV TEST 8K VIDEO.mp4"
|
'uploader': '8KVIDEO',
|
||||||
|
'title': 'UHDTV TEST 8K VIDEO.mp4'
|
||||||
},
|
},
|
||||||
u"params": {
|
'params': {
|
||||||
u"youtube_include_dash_manifest": True,
|
'youtube_include_dash_manifest': True,
|
||||||
u"format": "141",
|
'format': '141',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# DASH manifest with encrypted signature
|
# DASH manifest with encrypted signature
|
||||||
@@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'uploader_id': 'AfrojackVEVO',
|
'uploader_id': 'AfrojackVEVO',
|
||||||
'upload_date': '20131011',
|
'upload_date': '20131011',
|
||||||
},
|
},
|
||||||
u"params": {
|
'params': {
|
||||||
'youtube_include_dash_manifest': True,
|
'youtube_include_dash_manifest': True,
|
||||||
'format': '141',
|
'format': '141',
|
||||||
},
|
},
|
||||||
@@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def report_video_info_webpage_download(self, video_id):
|
def report_video_info_webpage_download(self, video_id):
|
||||||
"""Report attempt to download video info webpage."""
|
"""Report attempt to download video info webpage."""
|
||||||
self.to_screen(u'%s: Downloading video info webpage' % video_id)
|
self.to_screen('%s: Downloading video info webpage' % video_id)
|
||||||
|
|
||||||
def report_information_extraction(self, video_id):
|
def report_information_extraction(self, video_id):
|
||||||
"""Report attempt to extract video information."""
|
"""Report attempt to extract video information."""
|
||||||
self.to_screen(u'%s: Extracting video information' % video_id)
|
self.to_screen('%s: Extracting video information' % video_id)
|
||||||
|
|
||||||
def report_unavailable_format(self, video_id, format):
|
def report_unavailable_format(self, video_id, format):
|
||||||
"""Report extracted video URL."""
|
"""Report extracted video URL."""
|
||||||
self.to_screen(u'%s: Format %s not available' % (video_id, format))
|
self.to_screen('%s: Format %s not available' % (video_id, format))
|
||||||
|
|
||||||
def report_rtmp_download(self):
|
def report_rtmp_download(self):
|
||||||
"""Indicate the download will use the RTMP protocol."""
|
"""Indicate the download will use the RTMP protocol."""
|
||||||
self.to_screen(u'RTMP download detected')
|
self.to_screen('RTMP download detected')
|
||||||
|
|
||||||
def _signature_cache_id(self, example_sig):
|
def _signature_cache_id(self, example_sig):
|
||||||
""" Return a string representation of a signature """
|
""" Return a string representation of a signature """
|
||||||
@@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
player_type, player_id, self._signature_cache_id(example_sig))
|
player_type, player_id, self._signature_cache_id(example_sig))
|
||||||
assert os.path.basename(func_id) == func_id
|
assert os.path.basename(func_id) == func_id
|
||||||
|
|
||||||
cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
|
cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
|
||||||
if cache_spec is not None:
|
if cache_spec is not None:
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
|
||||||
if player_type == 'js':
|
if player_type == 'js':
|
||||||
code = self._download_webpage(
|
code = self._download_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id,
|
||||||
note=u'Downloading %s player %s' % (player_type, player_id),
|
note='Downloading %s player %s' % (player_type, player_id),
|
||||||
errnote=u'Download of %s failed' % player_url)
|
errnote='Download of %s failed' % player_url)
|
||||||
res = self._parse_sig_js(code)
|
res = self._parse_sig_js(code)
|
||||||
elif player_type == 'swf':
|
elif player_type == 'swf':
|
||||||
urlh = self._request_webpage(
|
urlh = self._request_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id,
|
||||||
note=u'Downloading %s player %s' % (player_type, player_id),
|
note='Downloading %s player %s' % (player_type, player_id),
|
||||||
errnote=u'Download of %s failed' % player_url)
|
errnote='Download of %s failed' % player_url)
|
||||||
code = urlh.read()
|
code = urlh.read()
|
||||||
res = self._parse_sig_swf(code)
|
res = self._parse_sig_swf(code)
|
||||||
else:
|
else:
|
||||||
@@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
cache_res = res(test_string)
|
cache_res = res(test_string)
|
||||||
cache_spec = [ord(c) for c in cache_res]
|
cache_spec = [ord(c) for c in cache_res]
|
||||||
|
|
||||||
self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
|
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _print_sig_code(self, func, example_sig):
|
def _print_sig_code(self, func, example_sig):
|
||||||
def gen_sig_code(idxs):
|
def gen_sig_code(idxs):
|
||||||
def _genslice(start, end, step):
|
def _genslice(start, end, step):
|
||||||
starts = '' if start == 0 else str(start)
|
starts = '' if start == 0 else str(start)
|
||||||
ends = (u':%d' % (end+step)) if end + step >= 0 else ':'
|
ends = (':%d' % (end+step)) if end + step >= 0 else ':'
|
||||||
steps = '' if step == 1 else (u':%d' % step)
|
steps = '' if step == 1 else (':%d' % step)
|
||||||
return 's[%s%s%s]' % (starts, ends, steps)
|
return 's[%s%s%s]' % (starts, ends, steps)
|
||||||
|
|
||||||
step = None
|
step = None
|
||||||
@@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
expr_code = ' + '.join(gen_sig_code(cache_spec))
|
expr_code = ' + '.join(gen_sig_code(cache_spec))
|
||||||
signature_id_tuple = '(%s)' % (
|
signature_id_tuple = '(%s)' % (
|
||||||
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
|
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
|
||||||
code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||||
' return %s\n') % (signature_id_tuple, expr_code)
|
' return %s\n') % (signature_id_tuple, expr_code)
|
||||||
self.to_screen(u'Extracted signature function:\n' + code)
|
self.to_screen('Extracted signature function:\n' + code)
|
||||||
|
|
||||||
def _parse_sig_js(self, jscode):
|
def _parse_sig_js(self, jscode):
|
||||||
funcname = self._search_regex(
|
funcname = self._search_regex(
|
||||||
@@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
"""Turn the encrypted s field into a working signature"""
|
"""Turn the encrypted s field into a working signature"""
|
||||||
|
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
raise ExtractorError(u'Cannot decrypt signature without player_url')
|
raise ExtractorError('Cannot decrypt signature without player_url')
|
||||||
|
|
||||||
if player_url.startswith(u'//'):
|
if player_url.startswith('//'):
|
||||||
player_url = 'https:' + player_url
|
player_url = 'https:' + player_url
|
||||||
try:
|
try:
|
||||||
player_id = (player_url, self._signature_cache_id(s))
|
player_id = (player_url, self._signature_cache_id(s))
|
||||||
@@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||||
video_id, note=False)
|
video_id, note=False)
|
||||||
except ExtractorError as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
|
||||||
return {}
|
return {}
|
||||||
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
||||||
|
|
||||||
@@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
url = 'https://www.youtube.com/api/timedtext?' + params
|
url = 'https://www.youtube.com/api/timedtext?' + params
|
||||||
sub_lang_list[lang] = url
|
sub_lang_list[lang] = url
|
||||||
if not sub_lang_list:
|
if not sub_lang_list:
|
||||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||||
return {}
|
return {}
|
||||||
return sub_lang_list
|
return sub_lang_list
|
||||||
|
|
||||||
@@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
"""We need the webpage for getting the captions url, pass it as an
|
"""We need the webpage for getting the captions url, pass it as an
|
||||||
argument to speed up the process."""
|
argument to speed up the process."""
|
||||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||||
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
@@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
caption_list = self._download_xml(list_url, video_id)
|
caption_list = self._download_xml(list_url, video_id)
|
||||||
original_lang_node = caption_list.find('track')
|
original_lang_node = caption_list.find('track')
|
||||||
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
||||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
self._downloader.report_warning('Video doesn\'t have automatic captions')
|
||||||
return {}
|
return {}
|
||||||
original_lang = original_lang_node.attrib['lang_code']
|
original_lang = original_lang_node.attrib['lang_code']
|
||||||
|
|
||||||
@@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
def extract_id(cls, url):
|
def extract_id(cls, url):
|
||||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
video_id = mobj.group(2)
|
video_id = mobj.group(2)
|
||||||
return video_id
|
return video_id
|
||||||
|
|
||||||
@@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _extract_annotations(self, video_id):
|
def _extract_annotations(self, video_id):
|
||||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||||
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
|
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
proto = (
|
proto = (
|
||||||
@@ -705,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
# Check for "rental" videos
|
# Check for "rental" videos
|
||||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||||
raise ExtractorError(u'"rental" videos not supported')
|
raise ExtractorError('"rental" videos not supported')
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
self.report_information_extraction(video_id)
|
self.report_information_extraction(video_id)
|
||||||
|
|
||||||
# uploader
|
# uploader
|
||||||
if 'author' not in video_info:
|
if 'author' not in video_info:
|
||||||
raise ExtractorError(u'Unable to extract uploader name')
|
raise ExtractorError('Unable to extract uploader name')
|
||||||
video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
|
video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
|
||||||
|
|
||||||
# uploader_id
|
# uploader_id
|
||||||
@@ -721,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
video_uploader_id = mobj.group(1)
|
video_uploader_id = mobj.group(1)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(u'unable to extract uploader nickname')
|
self._downloader.report_warning('unable to extract uploader nickname')
|
||||||
|
|
||||||
# title
|
# title
|
||||||
if 'title' in video_info:
|
if 'title' in video_info:
|
||||||
video_title = video_info['title'][0]
|
video_title = video_info['title'][0]
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(u'Unable to extract video title')
|
self._downloader.report_warning('Unable to extract video title')
|
||||||
video_title = '_'
|
video_title = '_'
|
||||||
|
|
||||||
# thumbnail image
|
# thumbnail image
|
||||||
@@ -737,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if m_thumb is not None:
|
if m_thumb is not None:
|
||||||
video_thumbnail = m_thumb.group(1)
|
video_thumbnail = m_thumb.group(1)
|
||||||
elif 'thumbnail_url' not in video_info:
|
elif 'thumbnail_url' not in video_info:
|
||||||
self._downloader.report_warning(u'unable to extract video thumbnail')
|
self._downloader.report_warning('unable to extract video thumbnail')
|
||||||
video_thumbnail = None
|
video_thumbnail = None
|
||||||
else: # don't panic if we can't find it
|
else: # don't panic if we can't find it
|
||||||
video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
|
video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
|
||||||
@@ -791,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if count is not None:
|
if count is not None:
|
||||||
return int(count.replace(',', ''))
|
return int(count.replace(',', ''))
|
||||||
return None
|
return None
|
||||||
like_count = _extract_count(u'like')
|
like_count = _extract_count('like')
|
||||||
dislike_count = _extract_count(u'dislike')
|
dislike_count = _extract_count('dislike')
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||||
@@ -802,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if 'length_seconds' not in video_info:
|
if 'length_seconds' not in video_info:
|
||||||
self._downloader.report_warning(u'unable to extract video duration')
|
self._downloader.report_warning('unable to extract video duration')
|
||||||
video_duration = None
|
video_duration = None
|
||||||
else:
|
else:
|
||||||
video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
|
video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
|
||||||
@@ -823,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
||||||
# this signatures are encrypted
|
# this signatures are encrypted
|
||||||
if 'url_encoded_fmt_stream_map' not in args:
|
if 'url_encoded_fmt_stream_map' not in args:
|
||||||
raise ValueError(u'No stream_map present') # caught below
|
raise ValueError('No stream_map present') # caught below
|
||||||
re_signature = re.compile(r'[&,]s=')
|
re_signature = re.compile(r'[&,]s=')
|
||||||
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
|
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
|
||||||
if m_s is not None:
|
if m_s is not None:
|
||||||
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
|
self.to_screen('%s: Encrypted signatures detected.' % video_id)
|
||||||
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
|
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
|
||||||
m_s = re_signature.search(args.get('adaptive_fmts', ''))
|
m_s = re_signature.search(args.get('adaptive_fmts', ''))
|
||||||
if m_s is not None:
|
if m_s is not None:
|
||||||
@@ -905,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
player_desc = 'html5 player %s' % player_version
|
player_desc = 'html5 player %s' % player_version
|
||||||
|
|
||||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||||
self.to_screen(u'{%s} signature length %s, %s' %
|
self.to_screen('{%s} signature length %s, %s' %
|
||||||
(format_id, parts_sizes, player_desc))
|
(format_id, parts_sizes, player_desc))
|
||||||
|
|
||||||
signature = self._decrypt_signature(
|
signature = self._decrypt_signature(
|
||||||
@@ -920,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||||
formats = _map_to_format_list(url_map)
|
formats = _map_to_format_list(url_map)
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||||
|
|
||||||
# Look for the DASH manifest
|
# Look for the DASH manifest
|
||||||
if (self._downloader.params.get('youtube_include_dash_manifest', False)):
|
if (self._downloader.params.get('youtube_include_dash_manifest', False)):
|
||||||
@@ -941,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
|
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
|
||||||
dash_doc = self._download_xml(
|
dash_doc = self._download_xml(
|
||||||
dash_manifest_url, video_id,
|
dash_manifest_url, video_id,
|
||||||
note=u'Downloading DASH manifest',
|
note='Downloading DASH manifest',
|
||||||
errnote=u'Could not download DASH manifest')
|
errnote='Could not download DASH manifest')
|
||||||
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||||
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
||||||
if url_el is None:
|
if url_el is None:
|
||||||
continue
|
continue
|
||||||
@@ -969,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
existing_format.update(f)
|
existing_format.update(f)
|
||||||
|
|
||||||
except (ExtractorError, KeyError) as e:
|
except (ExtractorError, KeyError) as e:
|
||||||
self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
|
self.report_warning('Skipping DASH manifest: %s' % e, video_id)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@@ -1000,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
(?:\w+\.)?
|
(?:\w+\.)?
|
||||||
youtube\.com/
|
youtube\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:course|view_play_list|my_playlists|artist|playlist|watch)
|
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
|
||||||
\? (?:.*?&)*? (?:p|a|list)=
|
\? (?:.*?&)*? (?:p|a|list)=
|
||||||
| p/
|
| p/
|
||||||
)
|
)
|
||||||
@@ -1056,6 +1061,20 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
'title': 'YDL_safe_search',
|
'title': 'YDL_safe_search',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'note': 'embedded',
|
||||||
|
'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
||||||
|
'playlist_count': 4,
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'JODA15',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'note': 'Embedded SWF player',
|
||||||
|
'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
|
||||||
|
'playlist_count': 4,
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'JODA7',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@@ -1090,7 +1109,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
playlist_id = mobj.group(1) or mobj.group(2)
|
playlist_id = mobj.group(1) or mobj.group(2)
|
||||||
|
|
||||||
# Check if it's a video-specific URL
|
# Check if it's a video-specific URL
|
||||||
@@ -1098,16 +1117,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
if 'v' in query_dict:
|
if 'v' in query_dict:
|
||||||
video_id = query_dict['v'][0]
|
video_id = query_dict['v'][0]
|
||||||
if self._downloader.params.get('noplaylist'):
|
if self._downloader.params.get('noplaylist'):
|
||||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
if playlist_id.startswith('RD'):
|
if playlist_id.startswith('RD'):
|
||||||
# Mixes require a custom extraction process
|
# Mixes require a custom extraction process
|
||||||
return self._extract_mix(playlist_id)
|
return self._extract_mix(playlist_id)
|
||||||
if playlist_id.startswith('TL'):
|
if playlist_id.startswith('TL'):
|
||||||
raise ExtractorError(u'For downloading YouTube.com top lists, use '
|
raise ExtractorError('For downloading YouTube.com top lists, use '
|
||||||
'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
||||||
|
|
||||||
url = self._TEMPLATE_URL % playlist_id
|
url = self._TEMPLATE_URL % playlist_id
|
||||||
@@ -1152,19 +1171,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubeTopListIE(YoutubePlaylistIE):
|
class YoutubeTopListIE(YoutubePlaylistIE):
|
||||||
IE_NAME = 'youtube:toplist'
|
IE_NAME = 'youtube:toplist'
|
||||||
IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||||
' (Example: "yttoplist:music:Top Tracks")')
|
' (Example: "yttoplist:music:Top Tracks")')
|
||||||
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
||||||
_TESTS = []
|
_TESTS = [{
|
||||||
|
'url': 'yttoplist:music:Trending',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'skip': 'Only works for logged-in users',
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
channel = mobj.group('chann')
|
channel = mobj.group('chann')
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
query = compat_urllib_parse.urlencode({'title': title})
|
query = compat_urllib_parse.urlencode({'title': title})
|
||||||
playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
|
channel_page = self._download_webpage(
|
||||||
channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
|
'https://www.youtube.com/%s' % channel, title)
|
||||||
link = self._html_search_regex(playlist_re, channel_page, 'list')
|
link = self._html_search_regex(
|
||||||
|
r'''(?x)
|
||||||
|
<a\s+href="([^"]+)".*?>\s*
|
||||||
|
<span\s+class="branded-page-module-title-text">\s*
|
||||||
|
<span[^>]*>.*?%s.*?</span>''' % re.escape(query),
|
||||||
|
channel_page, 'list')
|
||||||
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
|
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
|
||||||
|
|
||||||
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
|
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
|
||||||
@@ -1190,6 +1218,11 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
||||||
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
||||||
IE_NAME = 'youtube:channel'
|
IE_NAME = 'youtube:channel'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'paginated channel',
|
||||||
|
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||||
|
'playlist_mincount': 91,
|
||||||
|
}]
|
||||||
|
|
||||||
def extract_videos_from_page(self, page):
|
def extract_videos_from_page(self, page):
|
||||||
ids_in_page = []
|
ids_in_page = []
|
||||||
@@ -1202,7 +1235,7 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
# Extract channel id
|
# Extract channel id
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
|
|
||||||
# Download channel page
|
# Download channel page
|
||||||
channel_id = mobj.group(1)
|
channel_id = mobj.group(1)
|
||||||
@@ -1224,7 +1257,7 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||||
page = self._download_json(
|
page = self._download_json(
|
||||||
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
url, channel_id, note='Downloading page #%s' % pagenum,
|
||||||
transform_source=uppercase_escape)
|
transform_source=uppercase_escape)
|
||||||
|
|
||||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||||
@@ -1233,7 +1266,7 @@ class YoutubeChannelIE(InfoExtractor):
|
|||||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||||
break
|
break
|
||||||
|
|
||||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||||
|
|
||||||
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
for video_id in video_ids]
|
for video_id in video_ids]
|
||||||
@@ -1248,6 +1281,17 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||||
IE_NAME = 'youtube:user'
|
IE_NAME = 'youtube:user'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.youtube.com/user/TheLinuxFoundation',
|
||||||
|
'playlist_mincount': 320,
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'TheLinuxFoundation',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'ytuser:phihag',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
# Don't return True if the url can be extracted with other youtube
|
# Don't return True if the url can be extracted with other youtube
|
||||||
@@ -1260,7 +1304,7 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
# Extract username
|
# Extract username
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
raise ExtractorError('Invalid URL: %s' % url)
|
||||||
|
|
||||||
username = mobj.group(1)
|
username = mobj.group(1)
|
||||||
|
|
||||||
@@ -1281,7 +1325,7 @@ class YoutubeUserIE(InfoExtractor):
|
|||||||
try:
|
try:
|
||||||
response = json.loads(page)
|
response = json.loads(page)
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
|
||||||
if 'entry' not in response['feed']:
|
if 'entry' not in response['feed']:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -1322,9 +1366,9 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
|||||||
compat_urllib_parse.quote_plus(query.encode('utf-8')),
|
compat_urllib_parse.quote_plus(query.encode('utf-8')),
|
||||||
(PAGE_SIZE * pagenum) + 1)
|
(PAGE_SIZE * pagenum) + 1)
|
||||||
data_json = self._download_webpage(
|
data_json = self._download_webpage(
|
||||||
result_url, video_id=u'query "%s"' % query,
|
result_url, video_id='query "%s"' % query,
|
||||||
note=u'Downloading page %s' % (pagenum + 1),
|
note='Downloading page %s' % (pagenum + 1),
|
||||||
errnote=u'Unable to download API page')
|
errnote='Unable to download API page')
|
||||||
data = json.loads(data_json)
|
data = json.loads(data_json)
|
||||||
api_response = data['data']
|
api_response = data['data']
|
||||||
|
|
||||||
@@ -1356,6 +1400,13 @@ class YoutubeSearchURLIE(InfoExtractor):
|
|||||||
IE_DESC = 'YouTube.com search URLs'
|
IE_DESC = 'YouTube.com search URLs'
|
||||||
IE_NAME = 'youtube:search_url'
|
IE_NAME = 'youtube:search_url'
|
||||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'youtube-dl test video',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@@ -1390,17 +1441,38 @@ class YoutubeSearchURLIE(InfoExtractor):
|
|||||||
|
|
||||||
class YoutubeShowIE(InfoExtractor):
|
class YoutubeShowIE(InfoExtractor):
|
||||||
IE_DESC = 'YouTube.com (multi-season) shows'
|
IE_DESC = 'YouTube.com (multi-season) shows'
|
||||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
||||||
IE_NAME = 'youtube:show'
|
IE_NAME = 'youtube:show'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.youtube.com/show/airdisasters',
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'airdisasters',
|
||||||
|
'title': 'Air Disasters',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
show_name = mobj.group(1)
|
playlist_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
|
webpage = self._download_webpage(
|
||||||
|
url, playlist_id, 'Downloading show webpage')
|
||||||
# There's one playlist for each season of the show
|
# There's one playlist for each season of the show
|
||||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||||
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
|
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
||||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
|
||||||
|
for season in m_seasons
|
||||||
|
]
|
||||||
|
title = self._og_search_title(webpage, fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'title': title,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.09.22.1'
|
__version__ = '2014.09.28.1'
|
||||||
|
Reference in New Issue
Block a user