Merge remote-tracking branch 'upstream/master'
This commit is contained in:
		| @@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|  | ||||
|     def test_facebook_matching(self): | ||||
|         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) | ||||
|         self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) | ||||
|  | ||||
|     def test_no_duplicates(self): | ||||
|         ies = gen_extractors() | ||||
|   | ||||
| @@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase): | ||||
|         d = json.loads(stripped) | ||||
|         self.assertEqual(d, [{"id": "532cb", "x": 3}]) | ||||
|  | ||||
|     def test_uppercase_escpae(self): | ||||
|     def test_uppercase_escape(self): | ||||
|         self.assertEqual(uppercase_escape(u'aä'), u'aä') | ||||
|         self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') | ||||
|  | ||||
|   | ||||
| @@ -68,6 +68,7 @@ __authors__  = ( | ||||
|     'Hassaan Ali', | ||||
|     'Dobrosław Żybort', | ||||
|     'David Fabijan', | ||||
|     'Sebastian Haas', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|   | ||||
| @@ -295,7 +295,7 @@ class FileDownloader(object): | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         """Real download process. Redefine in subclasses.""" | ||||
|         raise NotImplementedError(u'This method must be implemented by sublcasses') | ||||
|         raise NotImplementedError(u'This method must be implemented by subclasses') | ||||
|  | ||||
|     def _hook_progress(self, status): | ||||
|         for ph in self._progress_hooks: | ||||
|   | ||||
| @@ -225,9 +225,12 @@ from .nrk import ( | ||||
| from .ntv import NTVIE | ||||
| from .nytimes import NYTimesIE | ||||
| from .nuvid import NuvidIE | ||||
| from .oe1 import OE1IE | ||||
| from .ooyala import OoyalaIE | ||||
| from .orf import ORFIE | ||||
| from .orf import ( | ||||
|     ORFTVthekIE, | ||||
|     ORFOE1IE, | ||||
|     ORFFM4IE, | ||||
| ) | ||||
| from .parliamentliveuk import ParliamentLiveUKIE | ||||
| from .pbs import PBSIE | ||||
| from .photobucket import PhotobucketIE | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import json | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'format': format['type'], | ||||
|                     'width': format['width'], | ||||
|                     'height': int(format['height']), | ||||
|                     'width': int_or_none(format['width']), | ||||
|                     'height': int_or_none(format['height']), | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|   | ||||
| @@ -51,6 +51,9 @@ class ARDIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: | ||||
|             raise ExtractorError('Video %s is no longer available' % video_id, expected=True) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', | ||||
|              r'<meta name="dcterms.title" content="(.*?)"/>', | ||||
|   | ||||
| @@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|             regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] | ||||
|             return any(re.match(r, f['versionCode']) for r in regexes) | ||||
|         # Some formats may not be in the same language as the url | ||||
|         # TODO: Might want not to drop videos that does not match requested language | ||||
|         # but to process those formats with lower precedence | ||||
|         formats = filter(_match_lang, all_formats) | ||||
|         formats = list(formats) # in python3 filter returns an iterator | ||||
|         formats = list(formats)  # in python3 filter returns an iterator | ||||
|         if not formats: | ||||
|             # Some videos are only available in the 'Originalversion' | ||||
|             # they aren't tagged as being in French or German | ||||
|             if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): | ||||
|                 formats = all_formats | ||||
|             else: | ||||
|                 raise ExtractorError(u'The formats list is empty') | ||||
|             # Sometimes there are neither videos of requested lang code | ||||
|             # nor original version videos available | ||||
|             # For such cases we just take all_formats as is | ||||
|             formats = all_formats | ||||
|             if not formats: | ||||
|                 raise ExtractorError('The formats list is empty') | ||||
|  | ||||
|         if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: | ||||
|             def sort_key(f): | ||||
|   | ||||
| @@ -20,7 +20,7 @@ from ..utils import ( | ||||
| class FacebookIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:\w+\.)?facebook\.com/ | ||||
|         (?:[^#?]*\#!/)? | ||||
|         (?:[^#]*?\#!/)? | ||||
|         (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) | ||||
|         (?:v|video_id)=(?P<id>[0-9]+) | ||||
|         (?:.*)''' | ||||
|   | ||||
| @@ -1,40 +0,0 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import calendar | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| # audios on oe1.orf.at are only available for 7 days, so we can't | ||||
| # add tests. | ||||
|  | ||||
|  | ||||
| class OE1IE(InfoExtractor): | ||||
|     IE_DESC = 'oe1.orf.at' | ||||
|     _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_id = mobj.group('id') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://oe1.orf.at/programm/%s/konsole' % show_id, | ||||
|             show_id | ||||
|         ) | ||||
|  | ||||
|         timestamp = datetime.datetime.strptime('%s %s' % ( | ||||
|             data['item']['day_label'], | ||||
|             data['item']['time'] | ||||
|         ), '%d.%m.%Y %H:%M') | ||||
|         unix_timestamp = calendar.timegm(timestamp.utctimetuple()) | ||||
|  | ||||
|         return { | ||||
|             'id': show_id, | ||||
|             'title': data['item']['title'], | ||||
|             'url': data['item']['url_stream'], | ||||
|             'ext': 'mp3', | ||||
|             'description': data['item'].get('info'), | ||||
|             'timestamp': unix_timestamp | ||||
|         } | ||||
| @@ -3,23 +3,38 @@ import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unescapeHTML | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class OoyalaIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|         'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|         'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', | ||||
|         'info_dict': { | ||||
|             'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Explaining Data Recovery from Hard Drives and SSDs', | ||||
|             'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video | ||||
|             'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|             'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', | ||||
|             'info_dict': { | ||||
|                 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Explaining Data Recovery from Hard Drives and SSDs', | ||||
|                 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', | ||||
|             }, | ||||
|         }, { | ||||
|             # Only available for ipad | ||||
|             'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', | ||||
|             'md5': '4b9754921fddb68106e48c142e2a01e6', | ||||
|             'info_dict': { | ||||
|                 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Simulation Overview - Levels of Simulation', | ||||
|                 'description': '', | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _url_for_embed_code(embed_code): | ||||
| @@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor): | ||||
|         player = self._download_webpage(player_url, embedCode) | ||||
|         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', | ||||
|                                         player, 'mobile player url') | ||||
|         mobile_player = self._download_webpage(mobile_url, embedCode) | ||||
|         videos_info = self._search_regex( | ||||
|             r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', | ||||
|             mobile_player, 'info').replace('\\"','"') | ||||
|         videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"') | ||||
|         # Looks like some videos are only available for particular devices | ||||
|         # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 | ||||
|         # is only available for ipad) | ||||
|         # Working around with fetching URLs for all the devices found starting with 'unknown' | ||||
|         # until we succeed or eventually fail for each device. | ||||
|         devices = re.findall(r'device\s*=\s*"([^"]+)";', player) | ||||
|         devices.remove('unknown') | ||||
|         devices.insert(0, 'unknown') | ||||
|         for device in devices: | ||||
|             mobile_player = self._download_webpage( | ||||
|                 '%s&device=%s' % (mobile_url, device), embedCode, | ||||
|                 'Downloading mobile player JS for %s device' % device) | ||||
|             videos_info = self._search_regex( | ||||
|                 r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', | ||||
|                 mobile_player, 'info', fatal=False, default=None) | ||||
|             if videos_info: | ||||
|                 break | ||||
|         if not videos_info: | ||||
|             raise ExtractorError('Unable to extract info') | ||||
|         videos_info = videos_info.replace('\\"', '"') | ||||
|         videos_more_info = self._search_regex( | ||||
|             r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') | ||||
|         videos_info = json.loads(videos_info) | ||||
|         videos_more_info =json.loads(videos_more_info) | ||||
|         videos_more_info = json.loads(videos_more_info) | ||||
|  | ||||
|         if videos_more_info.get('lineup'): | ||||
|             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] | ||||
|   | ||||
| @@ -3,6 +3,8 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
| import calendar | ||||
| import datetime | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| @@ -12,7 +14,9 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ORFIE(InfoExtractor): | ||||
| class ORFTVthekIE(InfoExtractor): | ||||
|     IE_NAME = 'orf:tvthek' | ||||
|     IE_DESC = 'ORF TVthek' | ||||
|     _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -105,3 +109,73 @@ class ORFIE(InfoExtractor): | ||||
|             'entries': entries, | ||||
|             'id': playlist_id, | ||||
|         } | ||||
|  | ||||
|  | ||||
| # Audios on ORF radio are only available for 7 days, so we can't add tests. | ||||
|  | ||||
|  | ||||
| class ORFOE1IE(InfoExtractor): | ||||
|     IE_NAME = 'orf:oe1' | ||||
|     IE_DESC = 'Radio Österreich 1' | ||||
|     _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_id = mobj.group('id') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://oe1.orf.at/programm/%s/konsole' % show_id, | ||||
|             show_id | ||||
|         ) | ||||
|  | ||||
|         timestamp = datetime.datetime.strptime('%s %s' % ( | ||||
|             data['item']['day_label'], | ||||
|             data['item']['time'] | ||||
|         ), '%d.%m.%Y %H:%M') | ||||
|         unix_timestamp = calendar.timegm(timestamp.utctimetuple()) | ||||
|  | ||||
|         return { | ||||
|             'id': show_id, | ||||
|             'title': data['item']['title'], | ||||
|             'url': data['item']['url_stream'], | ||||
|             'ext': 'mp3', | ||||
|             'description': data['item'].get('info'), | ||||
|             'timestamp': unix_timestamp | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ORFFM4IE(InfoExtractor): | ||||
|     IE_DESC = 'orf:fm4' | ||||
|     IE_DESC = 'radio FM4' | ||||
|     _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         show_date = mobj.group('date') | ||||
|         show_id = mobj.group('show') | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id), | ||||
|             show_id | ||||
|         ) | ||||
|  | ||||
|         def extract_entry_dict(info, title, subtitle): | ||||
|             return { | ||||
|                 'id': info['loopStreamId'].replace('.mp3', ''), | ||||
|                 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], | ||||
|                 'title': title, | ||||
|                 'description': subtitle, | ||||
|                 'duration': (info['end'] - info['start']) / 1000, | ||||
|                 'timestamp': info['start'] / 1000, | ||||
|                 'ext': 'mp3' | ||||
|             } | ||||
|  | ||||
|         entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': show_id, | ||||
|             'title': data['title'], | ||||
|             'description': data['subtitle'], | ||||
|             'entries': entries | ||||
|         } | ||||
| @@ -1,23 +1,23 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import strip_jsonp | ||||
| from ..utils import str_or_none | ||||
|  | ||||
|  | ||||
| class ReverbNationIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', | ||||
|         'file': '16965047.mp3', | ||||
|         'md5': '3da12ebca28c67c111a7f8b262d3f7a7', | ||||
|         'info_dict': { | ||||
|             "id": "16965047", | ||||
|             "ext": "mp3", | ||||
|             "title": "MONA LISA", | ||||
|             "uploader": "ALKILADOS", | ||||
|             "uploader_id": 216429, | ||||
|             "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" | ||||
|             "uploader_id": "216429", | ||||
|             "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$" | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor): | ||||
|         song_id = mobj.group('id') | ||||
|  | ||||
|         api_res = self._download_json( | ||||
|             'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' | ||||
|                 % (song_id, int(time.time() * 1000)), | ||||
|             'https://api.reverbnation.com/song/%s' % song_id, | ||||
|             song_id, | ||||
|             transform_source=strip_jsonp, | ||||
|             note='Downloading information of song %s' % song_id | ||||
|         ) | ||||
|  | ||||
| @@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor): | ||||
|             'title': api_res.get('name'), | ||||
|             'url': api_res.get('url'), | ||||
|             'uploader': api_res.get('artist', {}).get('name'), | ||||
|             'uploader_id': api_res.get('artist', {}).get('id'), | ||||
|             'thumbnail': api_res.get('image', api_res.get('thumbnail')), | ||||
|             'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), | ||||
|             'thumbnail': self._proto_relative_url( | ||||
|                 api_res.get('image', api_res.get('thumbnail'))), | ||||
|             'ext': 'mp3', | ||||
|             'vcodec': 'none', | ||||
|         } | ||||
|   | ||||
| @@ -1273,9 +1273,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): | ||||
|     if get_attr: | ||||
|         if v is not None: | ||||
|             v = getattr(v, get_attr, None) | ||||
|     if v == '': | ||||
|         v = None | ||||
|     return default if v is None else (int(v) * invscale // scale) | ||||
|  | ||||
|  | ||||
| def str_or_none(v, default=None): | ||||
|     return default if v is None else compat_str(v) | ||||
|  | ||||
|  | ||||
| def str_to_int(int_str): | ||||
|     if int_str is None: | ||||
|         return None | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.08.05' | ||||
| __version__ = '2014.08.10' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user