Compare commits
46 Commits
2014.09.18
...
2014.09.24
Author | SHA1 | Date | |
---|---|---|---|
|
0b97f3a936 | ||
|
eb73f2649f | ||
|
f0b5d6af74 | ||
|
2f771f6c99 | ||
|
3b2f933b01 | ||
|
cc746841e7 | ||
|
ac7553d031 | ||
|
cdc628a498 | ||
|
69ea8ca42c | ||
|
4bc3a23ec5 | ||
|
bd5650ac64 | ||
|
86916dae4b | ||
|
f7d159cf95 | ||
|
632e5684ce | ||
|
094d42fe44 | ||
|
63cddb6477 | ||
|
273dea4248 | ||
|
f90d95edeb | ||
|
45c85d7ba1 | ||
|
d0df92928b | ||
|
df8f53f752 | ||
|
e35cb78c40 | ||
|
3ef7d11acd | ||
|
224ce0d872 | ||
|
dd41e8c82b | ||
|
b509a4b176 | ||
|
b28c8403b2 | ||
|
7bd4b4229a | ||
|
72e450c555 | ||
|
522c55b7f2 | ||
|
58e7071a2c | ||
|
516812df41 | ||
|
752297631f | ||
|
34e14a9beb | ||
|
ffb5b05db1 | ||
|
3e8fcd9fa1 | ||
|
532f5bff70 | ||
|
f566d9f1d5 | ||
|
7267bd536f | ||
|
589d3d7c7a | ||
|
46f74bcf5c | ||
|
37bfe8ace4 | ||
|
0529eef5a4 | ||
|
a8aa99442f | ||
|
94b539d155 | ||
|
b8874d4d4e |
@@ -10,7 +10,6 @@ from test.helper import FakeYDL
|
||||
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
YoutubeUserIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
@@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
self.assertEqual(len(entries), 25)
|
||||
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
|
||||
|
||||
def test_youtube_channel(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeChannelIE(dl)
|
||||
#test paginated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||
self.assertTrue(len(result['entries']) > 90)
|
||||
#test autogenerated channel
|
||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
self.assertTrue(len(result['entries']) >= 18)
|
||||
|
||||
def test_youtube_user(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeUserIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||
self.assertTrue(len(result['entries']) >= 320)
|
||||
|
||||
def test_youtube_show(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeShowIE(dl)
|
||||
result = ie.extract('http://www.youtube.com/show/airdisasters')
|
||||
self.assertTrue(len(result) >= 3)
|
||||
|
||||
def test_youtube_mix(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
@@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_toplist(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeTopListIE(dl)
|
||||
result = ie.extract('yttoplist:music:Trending')
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
def test_youtube_search_url(self):
|
||||
dl = FakeYDL()
|
||||
ie = YoutubeSearchURLIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
|
||||
entries = result['entries']
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['title'], 'youtube-dl test video')
|
||||
self.assertTrue(len(entries) >= 5)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import FileDownloader
|
||||
from .hls import HlsFD
|
||||
from .hls import NativeHlsFD
|
||||
from .http import HttpFD
|
||||
from .mplayer import MplayerFD
|
||||
from .rtmp import RtmpFD
|
||||
@@ -19,6 +20,8 @@ def get_suitable_downloader(info_dict):
|
||||
|
||||
if url.startswith('rtmp'):
|
||||
return RtmpFD
|
||||
if protocol == 'm3u8_native':
|
||||
return NativeHlsFD
|
||||
if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
|
||||
return HlsFD
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
format_bytes,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -251,6 +252,8 @@ class F4mFD(FileDownloader):
|
||||
# We only download the first fragment
|
||||
fragments_list = fragments_list[:1]
|
||||
total_frags = len(fragments_list)
|
||||
# For some akamai manifests we'll need to add a query to the fragment url
|
||||
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
|
||||
@@ -290,6 +293,8 @@ class F4mFD(FileDownloader):
|
||||
for (seg_i, frag_i) in fragments_list:
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
url = base_url + name
|
||||
if akamai_pv:
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
if not success:
|
||||
|
@@ -1,8 +1,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
compat_urlparse,
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
)
|
||||
@@ -43,3 +47,46 @@ class HlsFD(FileDownloader):
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'%s exited with code %d' % (program, retval))
|
||||
return False
|
||||
|
||||
|
||||
class NativeHlsFD(FileDownloader):
|
||||
""" A more limited implementation that does not require ffmpeg """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
|
||||
data = self.ydl.urlopen(url).read()
|
||||
s = data.decode('utf-8', 'ignore')
|
||||
segment_urls = []
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
segment_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(url, line))
|
||||
segment_urls.append(segment_url)
|
||||
|
||||
byte_counter = 0
|
||||
with open(tmpfilename, 'wb') as outf:
|
||||
for i, segurl in enumerate(segment_urls):
|
||||
segment = self.ydl.urlopen(segurl).read()
|
||||
outf.write(segment)
|
||||
byte_counter += len(segment)
|
||||
self.to_screen(
|
||||
'[hlsnative] %s: Downloading segment %d / %d' %
|
||||
(info_dict['id'], i + 1, len(segment_urls)))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
self.try_rename(tmpfilename, filename)
|
||||
return True
|
||||
|
||||
|
@@ -6,7 +6,6 @@ from .aftonbladet import AftonbladetIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allmyvideos import AllmyvideosIE
|
||||
from .allocine import AllocineIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
@@ -200,6 +199,7 @@ from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
@@ -209,6 +209,7 @@ from .mpora import MporaIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .moniker import MonikerIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
@@ -221,6 +222,7 @@ from .mtv import (
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVIggyIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .musicvault import MusicVaultIE
|
||||
from .muzu import MuzuTVIE
|
||||
@@ -238,6 +240,7 @@ from .ndtv import NDTVIE
|
||||
from .newgrounds import NewgroundsIE
|
||||
from .newstube import NewstubeIE
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhl import NHLIE, NHLVideocenterIE
|
||||
from .niconico import NiconicoIE
|
||||
from .ninegag import NineGagIE
|
||||
@@ -247,7 +250,10 @@ from .nosvideo import NosVideoIE
|
||||
from .novamov import NovaMovIE
|
||||
from .nowness import NownessIE
|
||||
from .nowvideo import NowVideoIE
|
||||
from .npo import NPOIE
|
||||
from .npo import (
|
||||
NPOIE,
|
||||
TegenlichtVproIE,
|
||||
)
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKTVIE,
|
||||
@@ -360,6 +366,7 @@ from .thisav import ThisAVIE
|
||||
from .tinypic import TinyPicIE
|
||||
from .tlc import TlcIE, TlcDeIE
|
||||
from .tnaflix import TNAFlixIE
|
||||
from .thvideo import THVideoIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
@@ -446,6 +453,7 @@ from .yahoo import (
|
||||
from .youjizz import YouJizzIE
|
||||
from .youku import YoukuIE
|
||||
from .youporn import YouPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
compat_http_client,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
|
||||
clean_html,
|
||||
@@ -130,6 +131,8 @@ class InfoExtractor(object):
|
||||
by YoutubeDL if it's missing)
|
||||
categories: A list of categories that the video falls in, for example
|
||||
["Sports", "Berlin"]
|
||||
is_live: True, False, or None (=unknown). Whether this video is a
|
||||
live stream that goes on instead of a fixed-length video.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
@@ -638,7 +641,9 @@ class InfoExtractor(object):
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None):
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None):
|
||||
|
||||
formats = [{
|
||||
'format_id': 'm3u8-meta',
|
||||
'url': m3u8_url,
|
||||
@@ -649,6 +654,11 @@ class InfoExtractor(object):
|
||||
'format_note': 'Quality selection URL',
|
||||
}]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
m3u8_doc = self._download_webpage(m3u8_url, video_id)
|
||||
last_info = None
|
||||
kv_rex = re.compile(
|
||||
@@ -665,15 +675,17 @@ class InfoExtractor(object):
|
||||
continue
|
||||
else:
|
||||
if last_info is None:
|
||||
formats.append({'url': line})
|
||||
formats.append({'url': format_url(line)})
|
||||
continue
|
||||
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
|
||||
|
||||
f = {
|
||||
'format_id': 'm3u8-%d' % (tbr if tbr else len(formats)),
|
||||
'url': line.strip(),
|
||||
'url': format_url(line.strip()),
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
codecs = last_info.get('CODECS')
|
||||
if codecs:
|
||||
|
@@ -7,7 +7,7 @@ class DivxStageIE(NovaMovIE):
|
||||
IE_NAME = 'divxstage'
|
||||
IE_DESC = 'DivxStage'
|
||||
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
|
||||
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag|to)'}
|
||||
|
||||
_HOST = 'www.divxstage.eu'
|
||||
|
||||
@@ -24,4 +24,4 @@ class DivxStageIE(NovaMovIE):
|
||||
'title': 'youtubedl test video',
|
||||
'description': 'This is a test video for youtubedl.',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -5,24 +5,29 @@ import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import compat_urllib_parse_unquote
|
||||
from ..utils import compat_urllib_parse_unquote, url_basename
|
||||
|
||||
|
||||
class DropboxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||
'info_dict': {
|
||||
'id': 'nelirfsxnmcfbfh',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
fn = compat_urllib_parse_unquote(mobj.group('title'))
|
||||
fn = compat_urllib_parse_unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
video_url = (
|
||||
re.sub(r'[?&]dl=0', '', url) +
|
||||
|
@@ -10,13 +10,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class FlickrIE(InfoExtractor):
|
||||
"""Information Extractor for Flickr videos"""
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
|
||||
_TEST = {
|
||||
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
|
||||
'file': '5645318632.mp4',
|
||||
'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
|
||||
'info_dict': {
|
||||
'id': '5645318632',
|
||||
'ext': 'mp4',
|
||||
"description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
|
||||
"uploader_id": "forestwander-nature-pictures",
|
||||
"title": "Dark Hollow Waterfalls"
|
||||
@@ -49,12 +49,12 @@ class FlickrIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to extract video url')
|
||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader_id': video_uploader_id,
|
||||
}]
|
||||
}
|
||||
|
@@ -4,16 +4,21 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FranceInterIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})'
|
||||
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
|
||||
'file': '793962.mp3',
|
||||
'md5': '4764932e466e6f6c79c317d2e74f6884',
|
||||
"info_dict": {
|
||||
"title": "L’Histoire dans les jeux vidéo",
|
||||
'id': '793962',
|
||||
'ext': 'mp3',
|
||||
'title': 'L’Histoire dans les jeux vidéo',
|
||||
'description': 'md5:7e93ddb4451e7530022792240a3049c7',
|
||||
'timestamp': 1387369800,
|
||||
'upload_date': '20131218',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -22,17 +27,26 @@ class FranceInterIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title')
|
||||
|
||||
path = self._search_regex(
|
||||
r'&urlAOD=(.*?)&startTime', webpage, 'video url')
|
||||
r'<a id="player".+?href="([^"]+)"', webpage, 'video url')
|
||||
video_url = 'http://www.franceinter.fr/' + path
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<span class="title">(.+?)</span>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<span class="description">(.*?)</span>',
|
||||
webpage, 'description', fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'data-date="(\d+)"', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
'vcodec': 'none',
|
||||
}],
|
||||
'title': title,
|
||||
}
|
||||
|
@@ -584,7 +584,9 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Helper method
|
||||
def _playlist_from_matches(matches, getter, ie=None):
|
||||
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
|
||||
urlrs = orderedSet(
|
||||
self.url_result(self._proto_relative_url(getter(m)), ie)
|
||||
for m in matches)
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
@@ -633,7 +635,7 @@ class GenericIE(InfoExtractor):
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
|
87
youtube_dl/extractor/mgoon.py
Normal file
87
youtube_dl/extractor/mgoon.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MgoonIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:(:?m\.)?mgoon\.com/(?:ch/(?:.+)/v|play/view)|
|
||||
video\.mgoon\.com)/(?P<id>[0-9]+)'''
|
||||
_API_URL = 'http://mpos.mgoon.com/player/video?id={0:}'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mgoon.com/ch/hi6618/v/5582148',
|
||||
'md5': 'dd46bb66ab35cf6d51cc812fd82da79d',
|
||||
'info_dict': {
|
||||
'id': '5582148',
|
||||
'uploader_id': 'hi6618',
|
||||
'duration': 240.419,
|
||||
'upload_date': '20131220',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:543aa4c27a4931d371c3f433e8cebebc',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.mgoon.com/play/view/5582148',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://video.mgoon.com/5582148',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
data = self._download_json(self._API_URL.format(video_id), video_id)
|
||||
|
||||
if data.get('errorInfo', {}).get('code') != 'NONE':
|
||||
raise ExtractorError('%s encountered an error: %s' % (
|
||||
self.IE_NAME, data['errorInfo']['message']), expected=True)
|
||||
|
||||
v_info = data['videoInfo']
|
||||
title = v_info.get('v_title')
|
||||
thumbnail = v_info.get('v_thumbnail')
|
||||
duration = v_info.get('v_duration')
|
||||
upload_date = unified_strdate(v_info.get('v_reg_date'))
|
||||
uploader_id = data.get('userInfo', {}).get('u_alias')
|
||||
if duration:
|
||||
duration /= 1000.0
|
||||
|
||||
age_limit = None
|
||||
if data.get('accessInfo', {}).get('code') == 'VIDEO_STATUS_ADULT':
|
||||
age_limit = 18
|
||||
|
||||
formats = []
|
||||
get_quality = qualities(['360p', '480p', '720p', '1080p'])
|
||||
for fmt in data['videoFiles']:
|
||||
formats.append({
|
||||
'format_id': fmt['label'],
|
||||
'quality': get_quality(fmt['label']),
|
||||
'url': fmt['url'],
|
||||
'ext': fmt['format'],
|
||||
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'age_limit': age_limit,
|
||||
}
|
@@ -11,11 +11,11 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AllmyvideosIE(InfoExtractor):
|
||||
IE_NAME = 'allmyvideos.net'
|
||||
_VALID_URL = r'https?://allmyvideos\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
class MonikerIE(InfoExtractor):
|
||||
IE_DESC = 'allmyvideos.net and vidspot.net'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://allmyvideos.net/jih3nce3x6wn',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
@@ -23,7 +23,18 @@ class AllmyvideosIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vidspot.net/l2ngsmhs8ci5',
|
||||
'md5': '710883dee1bfc370ecf9fa6a89307c88',
|
||||
'info_dict': {
|
||||
'id': 'l2ngsmhs8ci5',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
77
youtube_dl/extractor/muenchentv.py
Normal file
77
youtube_dl/extractor/muenchentv.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class MuenchenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
|
||||
IE_DESC = 'münchen.tv'
|
||||
_TEST = {
|
||||
'url': 'http://www.muenchen.tv/livestream/',
|
||||
'info_dict': {
|
||||
'id': '5334',
|
||||
'display_id': 'live',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = 'live'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
now = datetime.datetime.now()
|
||||
now_str = now.strftime("%Y-%m-%d %H:%M")
|
||||
title = self._og_search_title(webpage) + ' ' + now_str
|
||||
|
||||
data_js = self._search_regex(
|
||||
r'(?s)\nplaylist:\s*(\[.*?}\]),related:',
|
||||
webpage, 'playlist configuration')
|
||||
data_json = js_to_json(data_js)
|
||||
data = json.loads(data_json)[0]
|
||||
|
||||
video_id = data['mediaid']
|
||||
thumbnail = data.get('image')
|
||||
|
||||
formats = []
|
||||
for format_num, s in enumerate(data['sources']):
|
||||
ext = determine_ext(s['file'], None)
|
||||
label_str = s.get('label')
|
||||
if label_str is None:
|
||||
label_str = '_%d' % format_num
|
||||
|
||||
if ext is None:
|
||||
format_id = label_str
|
||||
else:
|
||||
format_id = '%s-%s' % (ext, label_str)
|
||||
|
||||
formats.append({
|
||||
'url': s['file'],
|
||||
'tbr': int_or_none(s.get('label')),
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'preference': -100 if '.smil' in s['file'] else 0,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
@@ -16,9 +16,9 @@ class NBCIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
|
||||
'md5': '54d0fbc33e0b853a65d7b4de5c06d64e',
|
||||
# md5 checksum is not stable
|
||||
'info_dict': {
|
||||
'id': 'u1RInQZRN7QJ',
|
||||
'id': 'bTmnLCvIbaaH',
|
||||
'ext': 'flv',
|
||||
'title': 'I Am a Firefighter',
|
||||
'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
|
||||
|
103
youtube_dl/extractor/nfl.py
Normal file
103
youtube_dl/extractor/nfl.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class NFLIE(InfoExtractor):
|
||||
IE_NAME = 'nfl.com'
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)'
|
||||
_PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json'
|
||||
_TEST = {
|
||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
||||
# 'md5': '5eb8c40a727dda106d510e5d6ffa79e5', # md5 checksum fluctuates
|
||||
'info_dict': {
|
||||
'id': '0ap3000000398478',
|
||||
'ext': 'mp4',
|
||||
'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights',
|
||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
||||
'upload_date': '20140921',
|
||||
'timestamp': 1411337580,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
config = self._download_json(self._PLAYER_CONFIG_URL, video_id,
|
||||
note='Downloading player config')
|
||||
url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config)
|
||||
video_data = self._download_json(url_template.format(id=video_id), video_id)
|
||||
|
||||
cdns = config.get('cdns')
|
||||
if not cdns:
|
||||
raise ExtractorError('Failed to get CDN data', expected=True)
|
||||
|
||||
formats = []
|
||||
streams = video_data.get('cdnData', {}).get('bitrateInfo', [])
|
||||
for name, cdn in cdns.items():
|
||||
# LimeLight streams don't seem to work
|
||||
if cdn.get('name') == 'LIMELIGHT':
|
||||
continue
|
||||
|
||||
protocol = cdn.get('protocol')
|
||||
host = remove_end(cdn.get('host', ''), '/')
|
||||
if not (protocol and host):
|
||||
continue
|
||||
|
||||
path_prefix = cdn.get('pathprefix', '')
|
||||
if path_prefix and not path_prefix.endswith('/'):
|
||||
path_prefix = '%s/' % path_prefix
|
||||
|
||||
get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format(
|
||||
protocol=protocol,
|
||||
host=host,
|
||||
prefix=path_prefix,
|
||||
path=p,
|
||||
)
|
||||
|
||||
if protocol == 'rtmp':
|
||||
preference = -2
|
||||
elif 'prog' in name.lower():
|
||||
preference = -1
|
||||
else:
|
||||
preference = 0
|
||||
|
||||
for stream in streams:
|
||||
path = stream.get('path')
|
||||
if not path:
|
||||
continue
|
||||
|
||||
formats.append({
|
||||
'url': get_url(path),
|
||||
'vbr': int_or_none(stream.get('rate', 0), 1000),
|
||||
'preference': preference,
|
||||
'format_note': name,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = None
|
||||
for q in ('xl', 'l', 'm', 's', 'xs'):
|
||||
thumbnail = video_data.get('imagePaths', {}).get(q)
|
||||
if thumbnail:
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data.get('storyHeadline'),
|
||||
'formats': formats,
|
||||
'description': video_data.get('caption'),
|
||||
'duration': video_data.get('duration'),
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': int_or_none(video_data.get('posted'), 1000),
|
||||
}
|
@@ -2,6 +2,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -17,6 +19,7 @@ from ..utils import (
|
||||
class NocoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
|
||||
_LOGIN_URL = 'http://noco.tv/do.php'
|
||||
_API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
|
||||
_NETRC_MACHINE = 'noco'
|
||||
|
||||
_TEST = {
|
||||
@@ -55,33 +58,52 @@ class NocoIE(InfoExtractor):
|
||||
login = self._download_json(request, None, 'Logging in as %s' % username)
|
||||
|
||||
if 'erreur' in login:
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id, note):
|
||||
ts = compat_str(int(time.time() * 1000))
|
||||
tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (path, ts, tk)
|
||||
|
||||
resp = self._download_json(url, video_id, note)
|
||||
|
||||
if isinstance(resp, dict) and resp.get('error'):
|
||||
self._raise_error(resp['error'], resp['description'])
|
||||
|
||||
return resp
|
||||
|
||||
def _raise_error(self, error, description):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (self.IE_NAME, error, description),
|
||||
expected=True)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
medias = self._download_json(
|
||||
'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
medias = self._call_api(
|
||||
'shows/%s/medias' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
qualities = self._call_api(
|
||||
'qualities',
|
||||
video_id, 'Downloading qualities JSON')
|
||||
|
||||
formats = []
|
||||
|
||||
for fmt in medias['fr']['video_list']['default']['quality_list']:
|
||||
format_id = fmt['quality_key']
|
||||
for format_id, fmt in medias['fr']['video_list']['none']['quality_list'].items():
|
||||
|
||||
file = self._download_json(
|
||||
'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id),
|
||||
video = self._call_api(
|
||||
'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
|
||||
video_id, 'Downloading %s video JSON' % format_id)
|
||||
|
||||
file_url = file['file']
|
||||
file_url = video['file']
|
||||
if not file_url:
|
||||
continue
|
||||
|
||||
if file_url == 'forbidden':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (
|
||||
self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']),
|
||||
expected=True)
|
||||
if file_url in ['forbidden', 'not found']:
|
||||
popmessage = video['popmessage']
|
||||
self._raise_error(popmessage['title'], popmessage['message'])
|
||||
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
@@ -91,20 +113,31 @@ class NocoIE(InfoExtractor):
|
||||
'abr': fmt['audiobitrate'],
|
||||
'vbr': fmt['videobitrate'],
|
||||
'filesize': fmt['filesize'],
|
||||
'format_note': fmt['quality_name'],
|
||||
'preference': fmt['priority'],
|
||||
'format_note': qualities[format_id]['quality_name'],
|
||||
'preference': qualities[format_id]['priority'],
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
show = self._download_json(
|
||||
'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0]
|
||||
show = self._call_api(
|
||||
'shows/by_id/%s' % video_id,
|
||||
video_id, 'Downloading show JSON')[0]
|
||||
|
||||
upload_date = unified_strdate(show['indexed'])
|
||||
upload_date = unified_strdate(show['online_date_start_utc'])
|
||||
uploader = show['partner_name']
|
||||
uploader_id = show['partner_key']
|
||||
duration = show['duration_ms'] / 1000.0
|
||||
thumbnail = show['screenshot']
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_key, thumbnail_url in show.items():
|
||||
m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
|
||||
if not m:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
|
||||
episode = show.get('show_TT') or show.get('show_OT')
|
||||
family = show.get('family_TT') or show.get('family_OT')
|
||||
@@ -124,7 +157,7 @@ class NocoIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
parse_duration,
|
||||
qualities,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@@ -55,7 +56,9 @@ class NPOIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
return self._get_info(video_id)
|
||||
|
||||
def _get_info(self, video_id):
|
||||
metadata = self._download_json(
|
||||
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
||||
video_id,
|
||||
@@ -106,3 +109,30 @@ class NPOIE(InfoExtractor):
|
||||
'duration': parse_duration(metadata.get('tijdsduur')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class TegenlichtVproIE(NPOIE):
|
||||
IE_NAME = 'tegenlicht.vpro.nl'
|
||||
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html',
|
||||
'md5': 'f8065e4e5a7824068ed3c7e783178f2c',
|
||||
'info_dict': {
|
||||
'id': 'VPWON_1169289',
|
||||
'ext': 'm4v',
|
||||
'title': 'Tegenlicht',
|
||||
'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1',
|
||||
'upload_date': '20130225',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = url_basename(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
urn = self._html_search_meta('mediaurn', webpage)
|
||||
info_page = self._download_json(
|
||||
'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name)
|
||||
return self._get_info(info_page['mid'])
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
@@ -29,6 +30,7 @@ class PlayFMIE(InfoExtractor):
|
||||
'duration': 5627.428,
|
||||
'upload_date': '20140712',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
@@ -51,7 +53,8 @@ class PlayFMIE(InfoExtractor):
|
||||
|
||||
recording = rec_doc.find('./recording')
|
||||
title = recording.find('./title').text
|
||||
view_count = int_or_none(recording.find('./stats/playcount').text)
|
||||
view_count = str_to_int(recording.find('./stats/playcount').text)
|
||||
comment_count = str_to_int(recording.find('./stats/comments').text)
|
||||
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
||||
thumbnail = recording.find('./image').text
|
||||
|
||||
@@ -75,6 +78,7 @@ class PlayFMIE(InfoExtractor):
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/(?:single/)?(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@@ -21,12 +21,16 @@ class SBSIE(InfoExtractor):
|
||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||
'info_dict': {
|
||||
'id': '320403011771',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dingo Conservation',
|
||||
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'add_ies': ['generic'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -338,20 +338,17 @@ class SoundcloudUserIE(SoundcloudIE):
|
||||
|
||||
|
||||
class SoundcloudPlaylistIE(SoundcloudIE):
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))$'
|
||||
_VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
|
||||
IE_NAME = 'soundcloud:playlist'
|
||||
_TESTS = [
|
||||
|
||||
{
|
||||
'url': 'http://api.soundcloud.com/playlists/4110309',
|
||||
'info_dict': {
|
||||
'id': '4110309',
|
||||
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
'description': 're:.*?TILT Brass - Bowery Poetry Club',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://api.soundcloud.com/playlists/4110309',
|
||||
'info_dict': {
|
||||
'id': '4110309',
|
||||
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
|
||||
'description': 're:.*?TILT Brass - Bowery Poetry Club',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -5,6 +5,7 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
xpath_with_ns,
|
||||
)
|
||||
@@ -55,36 +56,44 @@ class ThePlatformIE(InfoExtractor):
|
||||
body = meta.find(_x('smil:body'))
|
||||
|
||||
f4m_node = body.find(_x('smil:seq//smil:video'))
|
||||
if f4m_node is not None:
|
||||
if f4m_node is not None and '.f4m' in f4m_node.attrib['src']:
|
||||
f4m_url = f4m_node.attrib['src']
|
||||
if 'manifest.f4m?' not in f4m_url:
|
||||
f4m_url += '?'
|
||||
# the parameters are from syfy.com, other sites may use others,
|
||||
# they also work for nbc.com
|
||||
f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'
|
||||
formats = [{
|
||||
'ext': 'flv',
|
||||
'url': f4m_url,
|
||||
}]
|
||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
else:
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
switch = body.find(_x('smil:switch'))
|
||||
formats = []
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int(attr['width'])
|
||||
height = int(attr['height'])
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
switch = body.find(_x('smil:switch'))
|
||||
if switch is not None:
|
||||
base_url = head.find(_x('smil:meta')).attrib['base']
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
width = int(attr['width'])
|
||||
height = int(attr['height'])
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
format_id = '%dx%d_%dk' % (width, height, vbr)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': base_url,
|
||||
'play_path': 'mp4:' + attr['src'],
|
||||
'ext': 'flv',
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
switch = body.find(_x('smil:seq//smil:switch'))
|
||||
for f in switch.findall(_x('smil:video')):
|
||||
attr = f.attrib
|
||||
vbr = int(attr['system-bitrate']) // 1000
|
||||
formats.append({
|
||||
'format_id': compat_str(vbr),
|
||||
'url': attr['src'],
|
||||
'vbr': vbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
59
youtube_dl/extractor/thvideo.py
Normal file
59
youtube_dl/extractor/thvideo.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate
|
||||
)
|
||||
|
||||
|
||||
class THVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://thvideo.tv/v/th1987/',
|
||||
'md5': 'fa107b1f73817e325e9433505a70db50',
|
||||
'info_dict': {
|
||||
'id': '1987',
|
||||
'ext': 'mp4',
|
||||
'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览',
|
||||
'display_id': 'th1987',
|
||||
'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg',
|
||||
'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...',
|
||||
'upload_date': '20140722'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# extract download link from mobile player page
|
||||
webpage_player = self._download_webpage(
|
||||
'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id),
|
||||
video_id, note='Downloading video source page')
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.*?)" type', webpage_player, 'video url')
|
||||
|
||||
# extract video info from main page
|
||||
webpage = self._download_webpage(
|
||||
'http://thvideo.tv/v/th%s' % (video_id), video_id)
|
||||
title = self._og_search_title(webpage)
|
||||
display_id = 'th%s' % video_id
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'span itemprop="datePublished" content="(.*?)">', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'upload_date': upload_date
|
||||
}
|
@@ -14,27 +14,35 @@ from ..aes import aes_decrypt_text
|
||||
|
||||
|
||||
class Tube8IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
'md5': '44bf12b98313827dd52d35b8706a4ea0',
|
||||
'info_dict': {
|
||||
'id': '229795',
|
||||
'ext': 'mp4',
|
||||
'description': 'hot teen Kasia grinding',
|
||||
'uploader': 'unknown',
|
||||
'title': 'Kasia music video',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
|
||||
'md5': '44bf12b98313827dd52d35b8706a4ea0',
|
||||
'info_dict': {
|
||||
'id': '229795',
|
||||
'display_id': 'kasia-music-video',
|
||||
'ext': 'mp4',
|
||||
'description': 'hot teen Kasia grinding',
|
||||
'uploader': 'unknown',
|
||||
'title': 'Kasia music video',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'age_verified=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
webpage = self._download_webpage(req, display_id)
|
||||
|
||||
flashvars = json.loads(self._html_search_regex(
|
||||
r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
|
||||
@@ -70,6 +78,7 @@ class Tube8IE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
|
@@ -6,6 +6,7 @@ import xml.etree.ElementTree
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_HTTPError,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@@ -69,6 +70,21 @@ class VevoIE(InfoExtractor):
|
||||
}]
|
||||
_SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
req = compat_urllib_request.Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token',
|
||||
fatal=False)
|
||||
if webpage is False:
|
||||
self._oauth_token = None
|
||||
else:
|
||||
self._oauth_token = self._search_regex(
|
||||
r'access_token":\s*"([^"]+)"',
|
||||
webpage, 'access token', fatal=False)
|
||||
|
||||
def _formats_from_json(self, video_info):
|
||||
last_version = {'version': -1}
|
||||
for version in video_info['videoVersions']:
|
||||
@@ -129,6 +145,26 @@ class VevoIE(InfoExtractor):
|
||||
})
|
||||
return formats
|
||||
|
||||
def _download_api_formats(self, video_id):
|
||||
if not self._oauth_token:
|
||||
self._downloader.report_warning(
|
||||
'No oauth token available, skipping API HLS download')
|
||||
return []
|
||||
|
||||
api_url = 'https://apiv2.vevo.com/video/%s/streams/hls?token=%s' % (
|
||||
video_id, self._oauth_token)
|
||||
api_data = self._download_json(
|
||||
api_url, video_id,
|
||||
note='Downloading HLS formats',
|
||||
errnote='Failed to download HLS format list', fatal=False)
|
||||
if api_data is None:
|
||||
return []
|
||||
|
||||
m3u8_url = api_data[0]['url']
|
||||
return self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||
preference=0)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
@@ -152,30 +188,8 @@ class VevoIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
# Download SMIL
|
||||
smil_blocks = sorted((
|
||||
f for f in video_info['videoVersions']
|
||||
if f['sourceType'] == 13),
|
||||
key=lambda f: f['version'])
|
||||
|
||||
smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
|
||||
self._SMIL_BASE_URL, video_id, video_id.lower())
|
||||
if smil_blocks:
|
||||
smil_url_m = self._search_regex(
|
||||
r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
|
||||
fatal=False)
|
||||
if smil_url_m is not None:
|
||||
smil_url = smil_url_m
|
||||
|
||||
try:
|
||||
smil_xml = self._download_webpage(smil_url, video_id,
|
||||
'Downloading SMIL info')
|
||||
formats.extend(self._formats_from_smil(smil_xml))
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError):
|
||||
raise
|
||||
self._downloader.report_warning(
|
||||
'Cannot download SMIL information, falling back to JSON ..')
|
||||
# Download via HLS API
|
||||
formats.extend(self._download_api_formats(video_id))
|
||||
|
||||
self._sort_formats(formats)
|
||||
timestamp_ms = int(self._search_regex(
|
||||
|
@@ -5,7 +5,10 @@ import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
@@ -57,6 +60,11 @@ class WatIE(InfoExtractor):
|
||||
|
||||
video_info = self.download_video_info(real_id)
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
|
||||
geo_list = video_info.get('geoList')
|
||||
country = geo_list[0] if geo_list else ''
|
||||
|
||||
|
58
youtube_dl/extractor/yourupload.py
Normal file
58
youtube_dl/extractor/yourupload.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class YourUploadIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?:yourupload\.com/watch|
|
||||
embed\.yourupload\.com|
|
||||
embed\.yucache\.net
|
||||
)/(?P<id>[A-Za-z0-9]+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://yourupload.com/watch/14i14h',
|
||||
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
|
||||
'info_dict': {
|
||||
'id': '14i14h',
|
||||
'ext': 'mp4',
|
||||
'title': 'BigBuckBunny_320x180.mp4',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.yourupload.com/14i14h',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://embed.yucache.net/14i14h?client_file_id=803349',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url = 'http://embed.yucache.net/{0:}'.format(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
url = self._og_search_video_url(webpage)
|
||||
|
||||
formats = [{
|
||||
'format_id': 'sd',
|
||||
'url': url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -46,7 +46,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _set_language(self):
|
||||
return bool(self._download_webpage(
|
||||
self._LANG_URL, None,
|
||||
note=u'Setting language', errnote='unable to set language',
|
||||
note='Setting language', errnote='unable to set language',
|
||||
fatal=False))
|
||||
|
||||
def _login(self):
|
||||
@@ -61,13 +61,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# No authentication to be performed
|
||||
if username is None:
|
||||
if self._LOGIN_REQUIRED:
|
||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||
return True
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
note=u'Downloading login page',
|
||||
errnote=u'unable to fetch login page', fatal=False)
|
||||
note='Downloading login page',
|
||||
errnote='unable to fetch login page', fatal=False)
|
||||
if login_page is False:
|
||||
return
|
||||
|
||||
@@ -105,12 +105,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
req = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||
login_results = self._download_webpage(
|
||||
req, None,
|
||||
note=u'Logging in', errnote=u'unable to log in', fatal=False)
|
||||
note='Logging in', errnote='unable to log in', fatal=False)
|
||||
if login_results is False:
|
||||
return False
|
||||
|
||||
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
|
||||
raise ExtractorError(u'Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
|
||||
|
||||
# Two-Factor
|
||||
# TODO add SMS and phone call support - these require making a request and then prompting the user
|
||||
@@ -119,19 +119,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
tfa_code = self._get_tfa_info()
|
||||
|
||||
if tfa_code is None:
|
||||
self._downloader.report_warning(u'Two-factor authentication required. Provide it with --twofactor <code>')
|
||||
self._downloader.report_warning(u'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>')
|
||||
self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)')
|
||||
return False
|
||||
|
||||
# Unlike the first login form, secTok and timeStmp are both required for the TFA form
|
||||
|
||||
match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning(u'Failed to get secTok - did the page structure change?')
|
||||
self._downloader.report_warning('Failed to get secTok - did the page structure change?')
|
||||
secTok = match.group(1)
|
||||
match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U)
|
||||
if match is None:
|
||||
self._downloader.report_warning(u'Failed to get timeStmp - did the page structure change?')
|
||||
self._downloader.report_warning('Failed to get timeStmp - did the page structure change?')
|
||||
timeStmp = match.group(1)
|
||||
|
||||
tfa_form_strs = {
|
||||
@@ -155,23 +155,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data)
|
||||
tfa_results = self._download_webpage(
|
||||
tfa_req, None,
|
||||
note=u'Submitting TFA code', errnote=u'unable to submit tfa', fatal=False)
|
||||
note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
|
||||
|
||||
if tfa_results is False:
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None:
|
||||
self._downloader.report_warning(u'Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||
self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.')
|
||||
return False
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in - did the page structure change?')
|
||||
self._downloader.report_warning('unable to log in - did the page structure change?')
|
||||
return False
|
||||
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
|
||||
self._downloader.report_warning(u'Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
|
||||
return False
|
||||
|
||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -185,7 +185,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
self._download_webpage(
|
||||
req, None,
|
||||
note=u'Confirming age', errnote=u'Unable to confirm age')
|
||||
note='Confirming age', errnote='Unable to confirm age')
|
||||
return True
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -211,7 +211,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
(?:(?:v|embed|e)/) # v/ or embed/ or e/
|
||||
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
@@ -307,69 +307,74 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
IE_NAME = 'youtube'
|
||||
_TESTS = [
|
||||
{
|
||||
u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
|
||||
u"file": u"BaW_jenozKc.mp4",
|
||||
u"info_dict": {
|
||||
u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
|
||||
u"uploader": u"Philipp Hagemeister",
|
||||
u"uploader_id": u"phihag",
|
||||
u"upload_date": u"20121002",
|
||||
u"description": u"test chars: \"'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .",
|
||||
u"categories": [u'Science & Technology'],
|
||||
'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
|
||||
'info_dict': {
|
||||
'id': 'BaW_jenozKc',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'phihag',
|
||||
'upload_date': '20121002',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
|
||||
u"file": u"UxxajLWwzqY.mp4",
|
||||
u"note": u"Test generic use_cipher_signature video (#897)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120506",
|
||||
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
|
||||
u"description": u"md5:fea86fda2d5a5784273df5c7cc994d9f",
|
||||
u"uploader": u"Icona Pop",
|
||||
u"uploader_id": u"IconaPop"
|
||||
'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY',
|
||||
'note': 'Test generic use_cipher_signature video (#897)',
|
||||
'info_dict': {
|
||||
'id': 'UxxajLWwzqY',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120506',
|
||||
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||
'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
|
||||
'uploader': 'Icona Pop',
|
||||
'uploader_id': 'IconaPop',
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
|
||||
u"file": u"07FYdnEawAQ.mp4",
|
||||
u"note": u"Test VEVO video with age protection (#956)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20130703",
|
||||
u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
|
||||
u"description": u"md5:64249768eec3bc4276236606ea996373",
|
||||
u"uploader": u"justintimberlakeVEVO",
|
||||
u"uploader_id": u"justintimberlakeVEVO"
|
||||
'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
|
||||
'note': 'Test VEVO video with age protection (#956)',
|
||||
'info_dict': {
|
||||
'id': '07FYdnEawAQ',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130703',
|
||||
'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
|
||||
'description': 'md5:64249768eec3bc4276236606ea996373',
|
||||
'uploader': 'justintimberlakeVEVO',
|
||||
'uploader_id': 'justintimberlakeVEVO',
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"//www.YouTube.com/watch?v=yZIXLfi8CZQ",
|
||||
u"file": u"yZIXLfi8CZQ.mp4",
|
||||
u"note": u"Embed-only video (#1746)",
|
||||
u"info_dict": {
|
||||
u"upload_date": u"20120608",
|
||||
u"title": u"Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012",
|
||||
u"description": u"md5:09b78bd971f1e3e289601dfba15ca4f7",
|
||||
u"uploader": u"SET India",
|
||||
u"uploader_id": u"setindia"
|
||||
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
|
||||
'note': 'Embed-only video (#1746)',
|
||||
'info_dict': {
|
||||
'id': 'yZIXLfi8CZQ',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120608',
|
||||
'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
|
||||
'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
|
||||
'uploader': 'SET India',
|
||||
'uploader_id': 'setindia'
|
||||
}
|
||||
},
|
||||
{
|
||||
u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I",
|
||||
u"file": u"a9LDPn-MO4I.m4a",
|
||||
u"note": u"256k DASH audio (format 141) via DASH manifest",
|
||||
u"info_dict": {
|
||||
u"upload_date": "20121002",
|
||||
u"uploader_id": "8KVIDEO",
|
||||
u"description": '',
|
||||
u"uploader": "8KVIDEO",
|
||||
u"title": "UHDTV TEST 8K VIDEO.mp4"
|
||||
'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I',
|
||||
'note': '256k DASH audio (format 141) via DASH manifest',
|
||||
'info_dict': {
|
||||
'id': 'a9LDPn-MO4I',
|
||||
'ext': 'm4a',
|
||||
'upload_date': '20121002',
|
||||
'uploader_id': '8KVIDEO',
|
||||
'description': '',
|
||||
'uploader': '8KVIDEO',
|
||||
'title': 'UHDTV TEST 8K VIDEO.mp4'
|
||||
},
|
||||
u"params": {
|
||||
u"youtube_include_dash_manifest": True,
|
||||
u"format": "141",
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141',
|
||||
},
|
||||
},
|
||||
# DASH manifest with encrypted signature
|
||||
@@ -384,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'uploader_id': 'AfrojackVEVO',
|
||||
'upload_date': '20131011',
|
||||
},
|
||||
u"params": {
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141',
|
||||
},
|
||||
@@ -397,19 +402,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
def report_video_info_webpage_download(self, video_id):
|
||||
"""Report attempt to download video info webpage."""
|
||||
self.to_screen(u'%s: Downloading video info webpage' % video_id)
|
||||
self.to_screen('%s: Downloading video info webpage' % video_id)
|
||||
|
||||
def report_information_extraction(self, video_id):
|
||||
"""Report attempt to extract video information."""
|
||||
self.to_screen(u'%s: Extracting video information' % video_id)
|
||||
self.to_screen('%s: Extracting video information' % video_id)
|
||||
|
||||
def report_unavailable_format(self, video_id, format):
|
||||
"""Report extracted video URL."""
|
||||
self.to_screen(u'%s: Format %s not available' % (video_id, format))
|
||||
self.to_screen('%s: Format %s not available' % (video_id, format))
|
||||
|
||||
def report_rtmp_download(self):
|
||||
"""Indicate the download will use the RTMP protocol."""
|
||||
self.to_screen(u'RTMP download detected')
|
||||
self.to_screen('RTMP download detected')
|
||||
|
||||
def _signature_cache_id(self, example_sig):
|
||||
""" Return a string representation of a signature """
|
||||
@@ -429,21 +434,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_type, player_id, self._signature_cache_id(example_sig))
|
||||
assert os.path.basename(func_id) == func_id
|
||||
|
||||
cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
|
||||
cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
|
||||
if cache_spec is not None:
|
||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||
|
||||
if player_type == 'js':
|
||||
code = self._download_webpage(
|
||||
player_url, video_id,
|
||||
note=u'Downloading %s player %s' % (player_type, player_id),
|
||||
errnote=u'Download of %s failed' % player_url)
|
||||
note='Downloading %s player %s' % (player_type, player_id),
|
||||
errnote='Download of %s failed' % player_url)
|
||||
res = self._parse_sig_js(code)
|
||||
elif player_type == 'swf':
|
||||
urlh = self._request_webpage(
|
||||
player_url, video_id,
|
||||
note=u'Downloading %s player %s' % (player_type, player_id),
|
||||
errnote=u'Download of %s failed' % player_url)
|
||||
note='Downloading %s player %s' % (player_type, player_id),
|
||||
errnote='Download of %s failed' % player_url)
|
||||
code = urlh.read()
|
||||
res = self._parse_sig_swf(code)
|
||||
else:
|
||||
@@ -454,15 +459,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
|
||||
self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
|
||||
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
def gen_sig_code(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = '' if start == 0 else str(start)
|
||||
ends = (u':%d' % (end+step)) if end + step >= 0 else ':'
|
||||
steps = '' if step == 1 else (u':%d' % step)
|
||||
ends = (':%d' % (end+step)) if end + step >= 0 else ':'
|
||||
steps = '' if step == 1 else (':%d' % step)
|
||||
return 's[%s%s%s]' % (starts, ends, steps)
|
||||
|
||||
step = None
|
||||
@@ -492,9 +497,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
expr_code = ' + '.join(gen_sig_code(cache_spec))
|
||||
signature_id_tuple = '(%s)' % (
|
||||
', '.join(compat_str(len(p)) for p in example_sig.split('.')))
|
||||
code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||
code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
|
||||
' return %s\n') % (signature_id_tuple, expr_code)
|
||||
self.to_screen(u'Extracted signature function:\n' + code)
|
||||
self.to_screen('Extracted signature function:\n' + code)
|
||||
|
||||
def _parse_sig_js(self, jscode):
|
||||
funcname = self._search_regex(
|
||||
@@ -516,9 +521,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
|
||||
if player_url is None:
|
||||
raise ExtractorError(u'Cannot decrypt signature without player_url')
|
||||
raise ExtractorError('Cannot decrypt signature without player_url')
|
||||
|
||||
if player_url.startswith(u'//'):
|
||||
if player_url.startswith('//'):
|
||||
player_url = 'https:' + player_url
|
||||
try:
|
||||
player_id = (player_url, self._signature_cache_id(s))
|
||||
@@ -542,7 +547,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
|
||||
video_id, note=False)
|
||||
except ExtractorError as err:
|
||||
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
|
||||
self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
|
||||
return {}
|
||||
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
|
||||
|
||||
@@ -560,7 +565,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url = 'https://www.youtube.com/api/timedtext?' + params
|
||||
sub_lang_list[lang] = url
|
||||
if not sub_lang_list:
|
||||
self._downloader.report_warning(u'video doesn\'t have subtitles')
|
||||
self._downloader.report_warning('video doesn\'t have subtitles')
|
||||
return {}
|
||||
return sub_lang_list
|
||||
|
||||
@@ -568,7 +573,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
sub_format = self._downloader.params.get('subtitlesformat', 'srt')
|
||||
self.to_screen(u'%s: Looking for automatic captions' % video_id)
|
||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
|
||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||
if mobj is None:
|
||||
@@ -589,7 +594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
caption_list = self._download_xml(list_url, video_id)
|
||||
original_lang_node = caption_list.find('track')
|
||||
if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
|
||||
self._downloader.report_warning(u'Video doesn\'t have automatic captions')
|
||||
self._downloader.report_warning('Video doesn\'t have automatic captions')
|
||||
return {}
|
||||
original_lang = original_lang_node.attrib['lang_code']
|
||||
|
||||
@@ -615,7 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
def extract_id(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
@@ -635,7 +640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
def _extract_annotations(self, video_id):
|
||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||
return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.')
|
||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||
|
||||
def _real_extract(self, url):
|
||||
proto = (
|
||||
@@ -705,14 +710,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
|
||||
# Check for "rental" videos
|
||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||
raise ExtractorError(u'"rental" videos not supported')
|
||||
raise ExtractorError('"rental" videos not supported')
|
||||
|
||||
# Start extracting information
|
||||
self.report_information_extraction(video_id)
|
||||
|
||||
# uploader
|
||||
if 'author' not in video_info:
|
||||
raise ExtractorError(u'Unable to extract uploader name')
|
||||
raise ExtractorError('Unable to extract uploader name')
|
||||
video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
|
||||
|
||||
# uploader_id
|
||||
@@ -721,13 +726,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if mobj is not None:
|
||||
video_uploader_id = mobj.group(1)
|
||||
else:
|
||||
self._downloader.report_warning(u'unable to extract uploader nickname')
|
||||
self._downloader.report_warning('unable to extract uploader nickname')
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = video_info['title'][0]
|
||||
else:
|
||||
self._downloader.report_warning(u'Unable to extract video title')
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
video_title = '_'
|
||||
|
||||
# thumbnail image
|
||||
@@ -737,7 +742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if m_thumb is not None:
|
||||
video_thumbnail = m_thumb.group(1)
|
||||
elif 'thumbnail_url' not in video_info:
|
||||
self._downloader.report_warning(u'unable to extract video thumbnail')
|
||||
self._downloader.report_warning('unable to extract video thumbnail')
|
||||
video_thumbnail = None
|
||||
else: # don't panic if we can't find it
|
||||
video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
|
||||
@@ -791,8 +796,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
if count is not None:
|
||||
return int(count.replace(',', ''))
|
||||
return None
|
||||
like_count = _extract_count(u'like')
|
||||
dislike_count = _extract_count(u'dislike')
|
||||
like_count = _extract_count('like')
|
||||
dislike_count = _extract_count('dislike')
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
@@ -802,7 +807,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
return
|
||||
|
||||
if 'length_seconds' not in video_info:
|
||||
self._downloader.report_warning(u'unable to extract video duration')
|
||||
self._downloader.report_warning('unable to extract video duration')
|
||||
video_duration = None
|
||||
else:
|
||||
video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]))
|
||||
@@ -823,11 +828,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
||||
# this signatures are encrypted
|
||||
if 'url_encoded_fmt_stream_map' not in args:
|
||||
raise ValueError(u'No stream_map present') # caught below
|
||||
raise ValueError('No stream_map present') # caught below
|
||||
re_signature = re.compile(r'[&,]s=')
|
||||
m_s = re_signature.search(args['url_encoded_fmt_stream_map'])
|
||||
if m_s is not None:
|
||||
self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
|
||||
self.to_screen('%s: Encrypted signatures detected.' % video_id)
|
||||
video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
|
||||
m_s = re_signature.search(args.get('adaptive_fmts', ''))
|
||||
if m_s is not None:
|
||||
@@ -905,7 +910,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
|
||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||
self.to_screen(u'{%s} signature length %s, %s' %
|
||||
self.to_screen('{%s} signature length %s, %s' %
|
||||
(format_id, parts_sizes, player_desc))
|
||||
|
||||
signature = self._decrypt_signature(
|
||||
@@ -920,7 +925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
formats = _map_to_format_list(url_map)
|
||||
else:
|
||||
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
# Look for the DASH manifest
|
||||
if (self._downloader.params.get('youtube_include_dash_manifest', False)):
|
||||
@@ -941,9 +946,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
|
||||
dash_doc = self._download_xml(
|
||||
dash_manifest_url, video_id,
|
||||
note=u'Downloading DASH manifest',
|
||||
errnote=u'Could not download DASH manifest')
|
||||
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||
note='Downloading DASH manifest',
|
||||
errnote='Could not download DASH manifest')
|
||||
for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||
url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
|
||||
if url_el is None:
|
||||
continue
|
||||
@@ -969,7 +974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
existing_format.update(f)
|
||||
|
||||
except (ExtractorError, KeyError) as e:
|
||||
self.report_warning(u'Skipping DASH manifest: %s' % e, video_id)
|
||||
self.report_warning('Skipping DASH manifest: %s' % e, video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -1000,7 +1005,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
(?:\w+\.)?
|
||||
youtube\.com/
|
||||
(?:
|
||||
(?:course|view_play_list|my_playlists|artist|playlist|watch)
|
||||
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
@@ -1056,6 +1061,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'YDL_safe_search',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'note': 'embedded',
|
||||
'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
|
||||
'playlist_count': 4,
|
||||
'info_dict': {
|
||||
'title': 'JODA15',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -1090,7 +1102,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
# Extract playlist id
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
playlist_id = mobj.group(1) or mobj.group(2)
|
||||
|
||||
# Check if it's a video-specific URL
|
||||
@@ -1098,16 +1110,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
if 'v' in query_dict:
|
||||
video_id = query_dict['v'][0]
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
else:
|
||||
self.to_screen(u'Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
|
||||
if playlist_id.startswith('RD'):
|
||||
# Mixes require a custom extraction process
|
||||
return self._extract_mix(playlist_id)
|
||||
if playlist_id.startswith('TL'):
|
||||
raise ExtractorError(u'For downloading YouTube.com top lists, use '
|
||||
raise ExtractorError('For downloading YouTube.com top lists, use '
|
||||
'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
||||
|
||||
url = self._TEMPLATE_URL % playlist_id
|
||||
@@ -1152,19 +1164,28 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
class YoutubeTopListIE(YoutubePlaylistIE):
|
||||
IE_NAME = 'youtube:toplist'
|
||||
IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||
' (Example: "yttoplist:music:Top Tracks")')
|
||||
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
||||
_TESTS = []
|
||||
_TESTS = [{
|
||||
'url': 'yttoplist:music:Trending',
|
||||
'playlist_mincount': 5,
|
||||
'skip': 'Only works for logged-in users',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel = mobj.group('chann')
|
||||
title = mobj.group('title')
|
||||
query = compat_urllib_parse.urlencode({'title': title})
|
||||
playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query)
|
||||
channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
|
||||
link = self._html_search_regex(playlist_re, channel_page, 'list')
|
||||
channel_page = self._download_webpage(
|
||||
'https://www.youtube.com/%s' % channel, title)
|
||||
link = self._html_search_regex(
|
||||
r'''(?x)
|
||||
<a\s+href="([^"]+)".*?>\s*
|
||||
<span\s+class="branded-page-module-title-text">\s*
|
||||
<span[^>]*>.*?%s.*?</span>''' % re.escape(query),
|
||||
channel_page, 'list')
|
||||
url = compat_urlparse.urljoin('https://www.youtube.com/', link)
|
||||
|
||||
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
|
||||
@@ -1190,6 +1211,11 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
||||
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
||||
IE_NAME = 'youtube:channel'
|
||||
_TESTS = [{
|
||||
'note': 'paginated channel',
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'playlist_mincount': 91,
|
||||
}]
|
||||
|
||||
def extract_videos_from_page(self, page):
|
||||
ids_in_page = []
|
||||
@@ -1202,7 +1228,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
# Extract channel id
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
# Download channel page
|
||||
channel_id = mobj.group(1)
|
||||
@@ -1224,7 +1250,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_json(
|
||||
url, channel_id, note=u'Downloading page #%s' % pagenum,
|
||||
url, channel_id, note='Downloading page #%s' % pagenum,
|
||||
transform_source=uppercase_escape)
|
||||
|
||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||
@@ -1233,7 +1259,7 @@ class YoutubeChannelIE(InfoExtractor):
|
||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||
break
|
||||
|
||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||
|
||||
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in video_ids]
|
||||
@@ -1248,6 +1274,17 @@ class YoutubeUserIE(InfoExtractor):
|
||||
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||
IE_NAME = 'youtube:user'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/user/TheLinuxFoundation',
|
||||
'playlist_mincount': 320,
|
||||
'info_dict': {
|
||||
'title': 'TheLinuxFoundation',
|
||||
}
|
||||
}, {
|
||||
'url': 'ytuser:phihag',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
# Don't return True if the url can be extracted with other youtube
|
||||
@@ -1260,7 +1297,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
# Extract username
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
if mobj is None:
|
||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
||||
raise ExtractorError('Invalid URL: %s' % url)
|
||||
|
||||
username = mobj.group(1)
|
||||
|
||||
@@ -1281,7 +1318,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
try:
|
||||
response = json.loads(page)
|
||||
except ValueError as err:
|
||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
||||
raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
|
||||
if 'entry' not in response['feed']:
|
||||
return
|
||||
|
||||
@@ -1322,9 +1359,9 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
||||
compat_urllib_parse.quote_plus(query.encode('utf-8')),
|
||||
(PAGE_SIZE * pagenum) + 1)
|
||||
data_json = self._download_webpage(
|
||||
result_url, video_id=u'query "%s"' % query,
|
||||
note=u'Downloading page %s' % (pagenum + 1),
|
||||
errnote=u'Unable to download API page')
|
||||
result_url, video_id='query "%s"' % query,
|
||||
note='Downloading page %s' % (pagenum + 1),
|
||||
errnote='Unable to download API page')
|
||||
data = json.loads(data_json)
|
||||
api_response = data['data']
|
||||
|
||||
@@ -1356,6 +1393,13 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
IE_DESC = 'YouTube.com search URLs'
|
||||
IE_NAME = 'youtube:search_url'
|
||||
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'title': 'youtube-dl test video',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -1390,17 +1434,38 @@ class YoutubeSearchURLIE(InfoExtractor):
|
||||
|
||||
class YoutubeShowIE(InfoExtractor):
|
||||
IE_DESC = 'YouTube.com (multi-season) shows'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
|
||||
IE_NAME = 'youtube:show'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.youtube.com/show/airdisasters',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': 'airdisasters',
|
||||
'title': 'Air Disasters',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
show_name = mobj.group(1)
|
||||
webpage = self._download_webpage(url, show_name, 'Downloading show webpage')
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, 'Downloading show webpage')
|
||||
# There's one playlist for each season of the show
|
||||
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
|
||||
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
|
||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||
self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
|
||||
for season in m_seasons
|
||||
]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.09.18'
|
||||
__version__ = '2014.09.24.1'
|
||||
|
Reference in New Issue
Block a user