Compare commits

..

32 Commits

Author SHA1 Message Date
f20b52778b release 2014.12.10 2014-12-10 12:21:40 +01:00
83e865a370 Fix PEP8 issue E713 2014-12-09 23:11:26 +01:00
b89a938687 [bet] Add extractor (Closes #4416) 2014-12-09 22:29:01 +06:00
e89a2aabed [extractor/common] Add generic SMIL formats extraction routine 2014-12-09 22:28:28 +06:00
f58766ce5c [extractor/common] Document ie_key in url results 2014-12-09 10:58:06 +01:00
15644a40df Merge pull request #4395 from cryptonaut/issue2883
Handle --get-url with merged formats (fixes #2883)
2014-12-08 17:21:56 +01:00
d4800f3c3f Merge branch 'master' of github.com:rg3/youtube-dl 2014-12-08 17:17:31 +01:00
09a5dd2d3b [bliptv] Add support for audio-only files (Fixes #4404) 2014-12-08 17:17:22 +01:00
819039ee63 [tvigle] Update test and modernize 2014-12-08 22:03:02 +06:00
603c92080f [nhl] Make sure we add '_sd' before the extension (fixes #4397)
'.replace' would find the first dot in the path.
2014-12-07 11:26:07 +01:00
16ae61f655 Handle --get-url with merged formats (fixes #2883)
Outputs one URL per line
2014-12-06 12:55:07 -08:00
0ef4d4ab7e Credit @akretz for prosiebensat1 playlist support (#4394) 2014-12-07 01:48:45 +06:00
4542535f94 Merge branch 'akretz-master' 2014-12-07 01:47:09 +06:00
6a52eed80e [prosiebensat1] Improve and simplify 2014-12-07 01:46:44 +06:00
acf5cbfe93 [extractor/common] Add description to playlist_result 2014-12-07 01:46:30 +06:00
8d1c8cae9c [prosiebensat1] Fix broken tests 2014-12-06 19:21:05 +01:00
c84890f708 [prosiebensat1] Add support for playlists (fixes #4357) 2014-12-06 19:05:22 +01:00
6d0886204a [radio.de] Add support for radio.de websites (Closes #4393) 2014-12-06 23:01:52 +06:00
04d02a9d57 [twitch] Add login support (#3986) 2014-12-06 21:24:20 +06:00
b82f815f37 Allow iterators for playlist result entries 2014-12-06 14:02:19 +01:00
158f8cadc0 [adultswim] PEP8 2014-12-06 14:01:59 +01:00
7d70cf4157 [nba] Remove unused import 2014-12-06 13:59:37 +01:00
6591fdf51f [tagesschau] Look at the right place for download links 2014-12-06 13:59:10 +01:00
47d7c64274 [test_utils] Make test more realistically (#4377) 2014-12-06 12:36:23 +01:00
db175341c7 Credit @cryptonaut for adultswim (#4388) 2014-12-06 12:32:01 +01:00
9ff6772790 [youtube] Modernize 2014-12-06 12:20:54 +01:00
5f9b83944d [ffmpeg] Improve version check and call it from hls (Fixes #4377) 2014-12-06 12:14:26 +01:00
f6735be4da Merge remote-tracking branch 'cryptonaut/adultswim' 2014-12-06 11:55:24 +01:00
6a3e0103bb [nba] Add test for #4387 2014-12-06 11:26:17 +01:00
0b5cc1983e [nba] Modernize 2014-12-06 11:15:25 +01:00
1a9f8b1ad4 [nba] Improve _VALID_URL regex (fixes #4387)
Allows for optional trailing / or /index.html
2014-12-06 01:49:22 -08:00
7115599121 [adultswim] Updated to work with new site format (fixes #4317) 2014-12-05 21:55:47 -08:00
23 changed files with 530 additions and 173 deletions

View File

@ -90,3 +90,5 @@ Matthew Rayfield
t0mm0
Tithen-Firion
Zack Fernandes
cryptonaut
Adrian Kretz

View File

@ -48,6 +48,7 @@ from youtube_dl.utils import (
intlist_to_bytes,
args_to_str,
parse_filesize,
version_tuple,
)
@ -381,5 +382,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
self.assertEqual(parse_filesize('1,24 KB'), 1240)
def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style
if __name__ == '__main__':
unittest.main()

View File

@ -7,6 +7,7 @@ import collections
import datetime
import errno
import io
import itertools
import json
import locale
import os
@ -654,21 +655,28 @@ class YoutubeDL(object):
if playlistend == -1:
playlistend = None
if isinstance(ie_result['entries'], list):
n_all_entries = len(ie_result['entries'])
entries = ie_result['entries'][playliststart:playlistend]
ie_entries = ie_result['entries']
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
else:
assert isinstance(ie_result['entries'], PagedList)
entries = ie_result['entries'].getslice(
elif isinstance(ie_entries, PagedList):
entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
else: # iterable
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
@ -934,8 +942,12 @@ class YoutubeDL(object):
if self.params.get('forceid', False):
self.to_stdout(info_dict['id'])
if self.params.get('forceurl', False):
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if info_dict.get('requested_formats') is not None:
for f in info_dict['requested_formats']:
self.to_stdout(f['url'] + f.get('play_path', ''))
else:
# For RTMP URLs, also include the playpath
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
self.to_stdout(info_dict['thumbnail'])
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:

View File

@ -247,7 +247,7 @@ else:
userhome = compat_getenv('HOME')
elif 'USERPROFILE' in os.environ:
userhome = compat_getenv('USERPROFILE')
elif not 'HOMEPATH' in os.environ:
elif 'HOMEPATH' not in os.environ:
return path
else:
try:

View File

@ -4,6 +4,7 @@ import os
import re
import subprocess
from ..postprocessor.ffmpeg import FFmpegPostProcessor
from .common import FileDownloader
from ..utils import (
compat_urlparse,
@ -32,6 +33,9 @@ class HlsFD(FileDownloader):
return False
cmd = [program] + args
ffpp = FFmpegPostProcessor(downloader=self)
ffpp.check_version()
retval = subprocess.call(cmd)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))

View File

@ -30,6 +30,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
from .blinkx import BlinkxIE
@ -307,6 +308,7 @@ from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
from .radiode import RadioDeIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE

View File

@ -2,123 +2,147 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
)
class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
_TEST = {
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
_TESTS = [{
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
'playlist': [
{
'md5': '4da359ec73b58df4575cd01a610ba5dc',
'md5': '247572debc75c7652f253c8daa51a14d',
'info_dict': {
'id': '8a250ba1450996e901453d7f02ca02f5',
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
'title': 'Rick and Morty - Pilot Part 1',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
{
'md5': 'ffbdf55af9331c509d95350bd0cc1819',
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
'info_dict': {
'id': '8a250ba1450996e901453d7f4bd102f6',
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
'title': 'Rick and Morty - Pilot Part 4',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
],
'info_dict': {
'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
}
}, {
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
'playlist': [
{
'md5': 'b92409635540304280b4b6c36bd14a0a',
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
'info_dict': {
'id': '8a250ba1450996e901453d7fa73c02f7',
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'e8818891d60e47b29cd89d7b0278156d',
'info_dict': {
'id': '8a250ba1450996e901453d7fc8ba02f8',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}
]
}
],
'info_dict': {
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}]
_video_extensions = {
'3500': 'flv',
'640': 'mp4',
'150': 'mp4',
'ipad': 'm3u8',
'iphone': 'm3u8'
}
_video_dimensions = {
'3500': (1280, 720),
'640': (480, 270),
'150': (320, 180)
}
@staticmethod
def find_video_info(collection, slug):
for video in collection.get('videos'):
if video.get('slug') == slug:
return video
@staticmethod
def find_collection_by_linkURL(collections, linkURL):
for collection in collections:
if collection.get('linkURL') == linkURL:
return collection
@staticmethod
def find_collection_containing_video(collections, slug):
for collection in collections:
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_path = mobj.group('path')
show_path = mobj.group('show_path')
episode_path = mobj.group('episode_path')
is_playlist = True if mobj.group('is_playlist') else False
webpage = self._download_webpage(url, video_path)
episode_id = self._html_search_regex(
r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>',
webpage, 'episode_id')
title = self._og_search_title(webpage)
webpage = self._download_webpage(url, episode_path)
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
# Extract the value of `bootstrappedData` from the Javascript in the page.
bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
episode_el = idoc.find('.//episode')
show_title = episode_el.attrib.get('collectionTitle')
episode_title = episode_el.attrib.get('title')
thumbnail = episode_el.attrib.get('thumbnailUrl')
description = episode_el.find('./description').text.strip()
try:
bootstrappedData = json.loads(bootstrappedDataJS)
except ValueError as ve:
errmsg = '%s: Failed to parse JSON ' % episode_path
raise ExtractorError(errmsg, cause=ve)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
collections = bootstrappedData['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
else:
collections = bootstrappedData['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
show = bootstrappedData['show']
show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
episode_id = video_info['id']
episode_title = video_info['title']
episode_description = video_info['description']
episode_duration = video_info.get('duration')
entries = []
segment_els = episode_el.findall('./segments/segment')
for part_num, segment_id in enumerate(segment_ids):
segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
for part_num, segment_el in enumerate(segment_els):
segment_id = segment_el.attrib.get('id')
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
thumbnail = segment_el.attrib.get('thumbnailUrl')
duration = segment_el.attrib.get('duration')
segment_title = '%s - %s' % (show_title, episode_title)
if len(segment_ids) > 1:
segment_title += ' Part %d' % (part_num + 1)
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
idoc = self._download_xml(
segment_url, segment_title,
'Downloading segment information', 'Unable to download segment information')
segment_duration = idoc.find('.//trt').text.strip()
formats = []
file_els = idoc.findall('.//files/file')
for file_el in file_els:
bitrate = file_el.attrib.get('bitrate')
type = file_el.attrib.get('type')
width, height = self._video_dimensions.get(bitrate, (None, None))
ftype = file_el.attrib.get('type')
formats.append({
'format_id': '%s-%s' % (bitrate, type),
'url': file_el.text,
'ext': self._video_extensions.get(bitrate, 'mp4'),
'format_id': '%s_%s' % (bitrate, ftype),
'url': file_el.text.strip(),
# The bitrate may not be a number (for example: 'iphone')
'tbr': int(bitrate) if bitrate.isdigit() else None,
'height': height,
'width': width
'quality': 1 if ftype == 'hd' else -1
})
self._sort_formats(formats)
@ -127,18 +151,16 @@ class AdultSwimIE(InfoExtractor):
'id': segment_id,
'title': segment_title,
'formats': formats,
'uploader': show_title,
'thumbnail': thumbnail,
'duration': duration,
'description': description
'duration': segment_duration,
'description': episode_description
})
return {
'_type': 'playlist',
'id': episode_id,
'display_id': video_path,
'display_id': episode_path,
'entries': entries,
'title': '%s %s' % (show_title, episode_title),
'description': description,
'thumbnail': thumbnail
'title': '%s - %s' % (show_title, episode_title),
'description': episode_description,
'duration': episode_duration
}

108
youtube_dl/extractor/bet.py Normal file
View File

@ -0,0 +1,108 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
xpath_text,
xpath_with_ns,
int_or_none,
parse_iso8601,
)
class BetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [
{
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
'info_dict': {
'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
'ext': 'flv',
'title': 'BET News Presents: A Conversation With President Obama',
'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6',
'duration': 1534,
'timestamp': 1418075340,
'upload_date': '20141208',
'uploader': 'admin',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
'info_dict': {
'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
'display_id': 'justice-for-ferguson-a-community-reacts',
'ext': 'flv',
'title': 'Justice for Ferguson: A Community Reacts',
'description': 'A BET News special.',
'duration': 1696,
'timestamp': 1416942360,
'upload_date': '20141125',
'uploader': 'admin',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
},
'params': {
# rtmp download
'skip_download': True,
},
}
]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
media_url = compat_urllib_parse.unquote(self._search_regex(
[r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"],
webpage, 'media URL'))
mrss = self._download_xml(media_url, display_id)
item = mrss.find('./channel/item')
NS_MAP = {
'dc': 'http://purl.org/dc/elements/1.1/',
'media': 'http://search.yahoo.com/mrss/',
'ka': 'http://kickapps.com/karss',
}
title = xpath_text(item, './title', 'title')
description = xpath_text(
item, './description', 'description', fatal=False)
video_id = xpath_text(item, './guid', 'video id', fatal=False)
timestamp = parse_iso8601(xpath_text(
item, xpath_with_ns('./dc:date', NS_MAP),
'upload date', fatal=False))
uploader = xpath_text(
item, xpath_with_ns('./dc:creator', NS_MAP),
'uploader', fatal=False)
media_content = item.find(
xpath_with_ns('./media:content', NS_MAP))
duration = int_or_none(media_content.get('duration'))
smil_url = media_content.get('url')
thumbnail = media_content.find(
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
formats = self._extract_smil_formats(smil_url, display_id)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'uploader': uploader,
'duration': duration,
'formats': formats,
}

View File

@ -4,13 +4,17 @@ import re
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_request,
unescapeHTML,
parse_iso8601,
compat_urlparse,
clean_html,
from ..compat import (
compat_str,
compat_urllib_request,
compat_urlparse,
)
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
unescapeHTML,
)
@ -78,7 +82,25 @@ class BlipTVIE(SubtitlesInfoExtractor):
'uploader': 'NostalgiaCritic',
'uploader_id': '246467',
}
}
},
{
# https://github.com/rg3/youtube-dl/pull/4404
'note': 'Audio only',
'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
'md5': '76c0a56f24e769ceaab21fbb6416a351',
'info_dict': {
'id': '7103299',
'ext': 'flv',
'title': 'Weekly Manga Recap: Kingdom',
'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
'timestamp': 1417660321,
'upload_date': '20141204',
'uploader': 'The Rollo T',
'uploader_id': '407429',
'duration': 7251,
'vcodec': 'none',
}
},
]
def _real_extract(self, url):
@ -145,11 +167,11 @@ class BlipTVIE(SubtitlesInfoExtractor):
'url': real_url,
'format_id': role,
'format_note': media_type,
'vcodec': media_content.get(blip('vcodec')),
'vcodec': media_content.get(blip('vcodec')) or 'none',
'acodec': media_content.get(blip('acodec')),
'filesize': media_content.get('filesize'),
'width': int(media_content.get('width')),
'height': int(media_content.get('height')),
'width': int_or_none(media_content.get('width')),
'height': int_or_none(media_content.get('height')),
})
self._sort_formats(formats)

View File

@ -158,8 +158,8 @@ class InfoExtractor(object):
_type "playlist" indicates multiple videos.
There must be a key "entries", which is a list or a PagedList object, each
element of which is a valid dictionary under this specfication.
There must be a key "entries", which is a list, an iterable, or a PagedList
object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above).
@ -174,9 +174,10 @@ class InfoExtractor(object):
_type "url" indicates that the video must be extracted from another
location, possibly by a different extractor. Its only required key is:
"url" - the next URL to extract.
Additionally, it may have properties believed to be identical to the
resolved entity, for example "title" if the title of the referred video is
The key "ie_key" can be set to the class name (minus the trailing "IE",
e.g. "Youtube") if the extractor class is known in advance.
Additionally, the dictionary may have any properties of the resolved entity
known in advance, for example "title" if the title of the referred video is
known ahead of time.
@ -439,7 +440,7 @@ class InfoExtractor(object):
return video_info
@staticmethod
def playlist_result(entries, playlist_id=None, playlist_title=None):
def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
"""Returns a playlist"""
video_info = {'_type': 'playlist',
'entries': entries}
@ -447,6 +448,8 @@ class InfoExtractor(object):
video_info['id'] = playlist_id
if playlist_title:
video_info['title'] = playlist_title
if playlist_description:
video_info['description'] = playlist_description
return video_info
def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
@ -790,6 +793,49 @@ class InfoExtractor(object):
self._sort_formats(formats)
return formats
# TODO: improve extraction
def _extract_smil_formats(self, smil_url, video_id):
smil = self._download_xml(
smil_url, video_id, 'Downloading SMIL file',
'Unable to download SMIL file')
base = smil.find('./head/meta').get('base')
formats = []
rtmp_count = 0
for video in smil.findall('./body/switch/video'):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
proto = video.get('proto')
if not proto:
if base:
if base.startswith('rtmp'):
proto = 'rtmp'
elif base.startswith('http'):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
formats.extend(self._extract_m3u8_formats(src, video_id, ext))
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
formats.append({
'url': streamer,
'play_path': src,
'ext': 'flv',
'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
'tbr': bitrate,
'width': width,
'height': height,
})
self._sort_formats(formats)
return formats
def _live_title(self, name):
""" Generate the title for a live video """
now = datetime.datetime.now()

View File

@ -1,7 +1,5 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
remove_end,
@ -10,8 +8,8 @@ from ..utils import (
class NBAIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)(?:/index\.html)?(?:\?.*)?$'
_TEST = {
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4',
'info_dict': {
@ -21,12 +19,13 @@ class NBAIE(InfoExtractor):
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181,
},
}
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
@ -37,7 +36,7 @@ class NBAIE(InfoExtractor):
description = self._og_search_description(webpage)
duration = parse_duration(
self._html_search_meta('duration', webpage, 'duration', fatal=False))
self._html_search_meta('duration', webpage, 'duration'))
return {
'id': shortened_video_id,

View File

@ -2,6 +2,7 @@ from __future__ import unicode_literals
import re
import json
import os
from .common import InfoExtractor
from ..compat import (
@ -26,7 +27,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
initial_video_url = info['publishPoint']
if info['formats'] == '1':
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
path = parsed_url.path.replace('.', '_sd.', 1)
filename, ext = os.path.splitext(parsed_url.path)
path = '%s_sd%s' % (filename, ext)
data = compat_urllib_parse.urlencode({
'type': 'fvod',
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])

View File

@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4',
'title': 'Im Interview: Kai Wiesinger',
'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
'upload_date': '20140225',
'upload_date': '20140203',
'duration': 522.56,
},
'params': {
@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):
'ext': 'mp4',
'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
'description': 'md5:2669cde3febe9bce13904f701e774eb6',
'upload_date': '20140225',
'upload_date': '20141014',
'duration': 2410.44,
},
'params': {
@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):
'skip_download': True,
},
},
{
'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
'info_dict': {
'id': '439664',
'title': 'Episode 8 - Ganze Folge - Playlist',
'description': 'md5:63b8963e71f481782aeea877658dec84',
},
'playlist_count': 2,
},
]
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
]
_TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
]
_PAGE_TYPE_REGEXES = [
r'<meta name="page_type" content="([^"]+)">',
r"'itemType'\s*:\s*'([^']*)'",
]
_PLAYLIST_ID_REGEXES = [
r'content[iI]d=(\d+)',
r"'itemId'\s*:\s*'([^']*)'",
]
_PLAYLIST_CLIP_REGEXES = [
r'(?s)data-qvt=.+?<a href="([^"]+)"',
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
def _extract_clip(self, url, webpage):
clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')
access_token = 'testclient'
@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):
'duration': duration,
'formats': formats,
}
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
for regex in self._PLAYLIST_CLIP_REGEXES:
playlist_clips = re.findall(regex, webpage)
if playlist_clips:
title = self._html_search_regex(
self._TITLE_REGEXES, webpage, 'title')
description = self._html_search_regex(
self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False)
entries = [
self.url_result(
re.match('(.+?//.+?)/', url).group(1) + clip_path,
'ProSiebenSat1')
for clip_path in playlist_clips]
return self.playlist_result(entries, playlist_id, title, description)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_type = self._search_regex(
self._PAGE_TYPE_REGEXES, webpage,
'page type', default='clip').lower()
if page_type == 'clip':
return self._extract_clip(url, webpage)
elif page_type == 'playlist':
return self._extract_playlist(url, webpage)

View File

@ -0,0 +1,55 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
class RadioDeIE(InfoExtractor):
IE_NAME = 'radio.de'
_VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)'
_TEST = {
'url': 'http://ndr2.radio.de/',
'md5': '3b4cdd011bc59174596b6145cda474a4',
'info_dict': {
'id': 'ndr2',
'ext': 'mp3',
'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:591c49c702db1a33751625ebfb67f273',
'thumbnail': 're:^https?://.*\.png',
},
'params': {
'skip_download': True,
}
}
def _real_extract(self, url):
radio_id = self._match_id(url)
webpage = self._download_webpage(url, radio_id)
broadcast = json.loads(self._search_regex(
r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}',
webpage, 'broadcast'))
title = self._live_title(broadcast['name'])
description = broadcast.get('description') or broadcast.get('shortDescription')
thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl')
formats = [{
'url': stream['streamUrl'],
'ext': stream['streamContentFormat'].lower(),
'acodec': stream['streamContentFormat'],
'abr': stream['bitRate'],
'asr': stream['sampleRate']
} for stream in broadcast['streamUrls']]
self._sort_formats(formats)
return {
'id': radio_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'is_live': True,
'formats': formats,
}

View File

@ -70,7 +70,7 @@ class TagesschauIE(InfoExtractor):
webpage, 'download links')
links = re.finditer(
r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
webpage)
download_text)
formats = []
for l in links:
format_id = self._search_regex(

View File

@ -1,32 +1,30 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
str_to_int,
parse_age_limit,
)
class TvigleIE(InfoExtractor):
IE_NAME = 'tvigle'
IE_DESC = 'Интернет-телевидение Tvigle.ru'
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'
_VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<id>[^/]+)/$'
_TESTS = [
{
'url': 'http://www.tvigle.ru/video/brat/',
'md5': 'ff4344a4894b0524441fb6f8218dc716',
'url': 'http://www.tvigle.ru/video/sokrat/',
'md5': '36514aed3657d4f70b4b2cef8eb520cd',
'info_dict': {
'id': '5118490',
'display_id': 'brat',
'ext': 'mp4',
'title': 'Брат',
'description': 'md5:d16ac7c0b47052ea51fddb92c4e413eb',
'duration': 5722.6,
'age_limit': 16,
'id': '1848932',
'display_id': 'sokrat',
'ext': 'flv',
'title': 'Сократ',
'description': 'md5:a05bd01be310074d5833efc6743be95e',
'duration': 6586,
'age_limit': 0,
},
},
{
@ -44,8 +42,7 @@ class TvigleIE(InfoExtractor):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
@ -60,8 +57,8 @@ class TvigleIE(InfoExtractor):
title = item['title']
description = item['description']
thumbnail = item['thumbnail']
duration = float_or_none(item['durationMilliseconds'], 1000)
age_limit = str_to_int(item['ageRestrictions'])
duration = float_or_none(item.get('durationMilliseconds'), 1000)
age_limit = parse_age_limit(item.get('ageRestrictions'))
formats = []
for vcodec, fmts in item['videos'].items():

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
@ -5,6 +6,8 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
parse_iso8601,
)
@ -24,6 +27,7 @@ class TwitchIE(InfoExtractor):
"""
_PAGE_LIMIT = 100
_API_BASE = 'https://api.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
_TESTS = [{
'url': 'http://www.twitch.tv/riotgames/b/577357806',
'info_dict': {
@ -109,6 +113,44 @@ class TwitchIE(InfoExtractor):
'view_count': info['views'],
}
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
authenticity_token = self._search_regex(
r'<input name="authenticity_token" type="hidden" value="([^"]+)"',
login_page, 'authenticity token')
login_form = {
'utf8': ''.encode('utf-8'),
'authenticity_token': authenticity_token,
'redirect_on_login': '',
'embed_form': 'false',
'mp_source_action': '',
'follow': '',
'user[login]': username,
'user[password]': password,
}
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
m = re.search(
r"id=([\"'])login_error_message\1[^>]*>(?P<msg>[^<]+)", response)
if m:
raise ExtractorError(
'Unable to login: %s' % m.group('msg').strip(), expected=True)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj.group('chapterid'):

View File

@ -1229,7 +1229,7 @@ class YoutubeTopListIE(YoutubePlaylistIE):
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = 'youtube:channel'
@ -1247,13 +1247,8 @@ class YoutubeChannelIE(InfoExtractor):
return ids_in_page
def _real_extract(self, url):
# Extract channel id
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
channel_id = self._match_id(url)
# Download channel page
channel_id = mobj.group(1)
video_ids = []
url = 'https://www.youtube.com/channel/%s/videos' % channel_id
channel_page = self._download_webpage(url, channel_id)
@ -1267,8 +1262,12 @@ class YoutubeChannelIE(InfoExtractor):
# The videos are contained in a single page
# the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page)
else:
# Download all channel pages using the json-based channel_ajax query
entries = [
self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(entries, channel_id)
def _entries():
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_json(
@ -1276,21 +1275,19 @@ class YoutubeChannelIE(InfoExtractor):
transform_source=uppercase_escape)
ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page)
for video_id in ids_in_page:
yield self.url_result(
video_id, 'Youtube', video_id=video_id)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, channel_id)
return self.playlist_result(_entries(), channel_id)
class YoutubeUserIE(InfoExtractor):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@ -1318,12 +1315,7 @@ class YoutubeUserIE(InfoExtractor):
return super(YoutubeUserIE, cls).suitable(url)
def _real_extract(self, url):
# Extract username
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError('Invalid URL: %s' % url)
username = mobj.group(1)
username = self._match_id(url)
# Download video ids using YouTube Data API. Result size per
# query is limited (currently to 50 videos) so we need to query

View File

@ -14,7 +14,7 @@ class ExecAfterDownloadPP(PostProcessor):
def run(self, information):
cmd = self.exec_cmd
if not '{}' in cmd:
if '{}' not in cmd:
cmd += ' {}'
cmd = cmd.replace('{}', shlex_quote(information['filepath']))

View File

@ -37,11 +37,11 @@ class FFmpegPostProcessor(PostProcessor):
if not self._executable:
raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
REQUIRED_VERSION = '1.0'
required_version = '10-0' if self._uses_avconv() else '1.0'
if is_outdated_version(
self._versions[self._executable], REQUIRED_VERSION):
self._versions[self._executable], required_version):
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
self._executable, self._executable, REQUIRED_VERSION)
self._executable, self._executable, required_version)
if self._downloader:
self._downloader.report_warning(warning)

View File

@ -79,7 +79,7 @@ def update_self(to_screen, verbose):
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
return
if not 'signature' in versions_info:
if 'signature' not in versions_info:
to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
return
signature = versions_info['signature']

View File

@ -1024,7 +1024,7 @@ def smuggle_url(url, data):
def unsmuggle_url(smug_url, default=None):
if not '#__youtubedl_smuggle' in smug_url:
if '#__youtubedl_smuggle' not in smug_url:
return smug_url, default
url, _, sdata = smug_url.rpartition('#')
jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
@ -1502,7 +1502,7 @@ def limit_length(s, length):
def version_tuple(v):
return [int(e) for e in v.split('.')]
return tuple(int(e) for e in re.split(r'[-.]', v))
def is_outdated_version(version, limit, assume_new=True):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2014.12.06.1'
__version__ = '2014.12.10'