Merge branch 'bleacherreport' of github.com:remitamine/youtube-dl into remitamine-bleacherreport
This commit is contained in:
		| @@ -61,6 +61,10 @@ from .beatportpro import BeatportProIE | ||||
| from .bet import BetIE | ||||
| from .bild import BildIE | ||||
| from .bilibili import BiliBiliIE | ||||
| from .bleacherreport import ( | ||||
|     BleacherReportIE, | ||||
|     BleacherReportCMSIE, | ||||
| ) | ||||
| from .blinkx import BlinkxIE | ||||
| from .bloomberg import BloombergIE | ||||
| from .bpb import BpbIE | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/amp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/amp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AMPIE(InfoExtractor): | ||||
|     # parse Akamai Adaptive Media Player feed | ||||
|     def _extract_feed_info(self, url): | ||||
|         item = self._download_json( | ||||
|             url, None, 'Downloading Akamai AMP feed', | ||||
|             'Unable to download Akamai AMP feed')['channel']['item'] | ||||
|  | ||||
|         video_id = item['guid'] | ||||
|  | ||||
|         def get_media_node(name, default=None): | ||||
|             media_name = 'media-%s' % name | ||||
|             media_group = item.get('media-group') or item | ||||
|             return media_group.get(media_name) or item.get(media_name) or item.get(name, default) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         media_thumbnail = get_media_node('thumbnail') | ||||
|         if media_thumbnail: | ||||
|             if isinstance(media_thumbnail, dict): | ||||
|                 media_thumbnail = [media_thumbnail] | ||||
|             for thumbnail_data in media_thumbnail: | ||||
|                 thumbnail = thumbnail_data['@attributes'] | ||||
|                 thumbnails.append({ | ||||
|                     'url': self._proto_relative_url(thumbnail['url'], 'http:'), | ||||
|                     'width': int_or_none(thumbnail.get('width')), | ||||
|                     'height': int_or_none(thumbnail.get('height')), | ||||
|                 }) | ||||
|  | ||||
|         subtitles = {} | ||||
|         media_subtitle = get_media_node('subTitle') | ||||
|         if media_subtitle: | ||||
|             if isinstance(media_subtitle, dict): | ||||
|                 media_subtitle = [media_subtitle] | ||||
|             for subtitle_data in media_subtitle: | ||||
|                 subtitle = subtitle_data['@attributes'] | ||||
|                 lang = subtitle.get('lang') or 'en' | ||||
|                 subtitles[lang] = [{'url': subtitle['href']}] | ||||
|  | ||||
|         formats = [] | ||||
|         media_content = get_media_node('content') | ||||
|         if isinstance(media_content, dict): | ||||
|             media_content = [media_content] | ||||
|         for media_data in media_content: | ||||
|             media = media_data['@attributes'] | ||||
|             media_type = media['type'] | ||||
|             if media_type == 'video/f4m': | ||||
|                 f4m_formats = self._extract_f4m_formats( | ||||
|                     media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', | ||||
|                     video_id, f4m_id='hds', fatal=False) | ||||
|                 if f4m_formats: | ||||
|                     formats.extend(f4m_formats) | ||||
|             elif media_type == 'application/x-mpegURL': | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False) | ||||
|                 if m3u8_formats: | ||||
|                     formats.extend(m3u8_formats) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': media_data['media-category']['@attributes']['label'], | ||||
|                     'url': media['url'], | ||||
|                     'tbr': int_or_none(media.get('bitrate')), | ||||
|                     'filesize': int_or_none(media.get('fileSize')), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': get_media_node('title'), | ||||
|             'description': get_media_node('description'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'timestamp': parse_iso8601(item.get('pubDate'), ' '), | ||||
|             'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										106
									
								
								youtube_dl/extractor/bleacherreport.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								youtube_dl/extractor/bleacherreport.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .amp import AMPIE | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BleacherReportIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football', | ||||
|         'md5': 'a3ffc3dc73afdbc2010f02d98f990f20', | ||||
|         'info_dict': { | ||||
|             'id': '2496438', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?', | ||||
|             'uploader_id': 3992341, | ||||
|             'description': 'CFB, ACC, Florida State', | ||||
|             'timestamp': 1434380212, | ||||
|             'upload_date': '20150615', | ||||
|             'uploader': 'Team Stream Now ', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', | ||||
|         'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50', | ||||
|         'info_dict': { | ||||
|             'id': '2586817', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', | ||||
|             'timestamp': 1446839961, | ||||
|             'uploader': 'Sean Fay', | ||||
|             'description': 'md5:825e94e0f3521df52fa83b2ed198fa20', | ||||
|             'uploader_id': 6466954, | ||||
|             'upload_date': '20151011', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         article_id = self._match_id(url) | ||||
|  | ||||
|         article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] | ||||
|  | ||||
|         thumbnails = [] | ||||
|         primary_photo = article_data.get('primaryPhoto') | ||||
|         if primary_photo: | ||||
|             thumbnails = [{ | ||||
|                 'url': primary_photo['url'], | ||||
|                 'width': primary_photo.get('width'), | ||||
|                 'height': primary_photo.get('height'), | ||||
|             }] | ||||
|  | ||||
|         info = { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': article_id, | ||||
|             'title': article_data['title'], | ||||
|             'uploader': article_data.get('author', {}).get('name'), | ||||
|             'uploader_id': article_data.get('authorId'), | ||||
|             'timestamp': parse_iso8601(article_data.get('createdAt')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'comment_count': int_or_none(article_data.get('commentsCount')), | ||||
|             'view_count': int_or_none(article_data.get('hitCount')), | ||||
|         } | ||||
|  | ||||
|         video = article_data.get('video') | ||||
|         if video: | ||||
|             video_type = video['type'] | ||||
|             if video_type == 'cms.bleacherreport.com': | ||||
|                 info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] | ||||
|             elif video_type == 'ooyala.com': | ||||
|                 info['url'] = 'ooyala:%s' % video['id'] | ||||
|             elif video_type == 'youtube.com': | ||||
|                 info['url'] = video['id'] | ||||
|             elif video_type == 'vine.co': | ||||
|                 info['url'] = 'https://vine.co/v/%s' % video['id'] | ||||
|             else: | ||||
|                 info['url'] = video_type + video['id'] | ||||
|             return info | ||||
|         else: | ||||
|             raise ExtractorError('no video in the article', expected=True) | ||||
|  | ||||
|  | ||||
| class BleacherReportCMSIE(AMPIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', | ||||
|         'md5': 'f0ca220af012d4df857b54f792c586bb', | ||||
|         'info_dict': { | ||||
|             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', | ||||
|             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id) | ||||
|         info['id'] = video_id | ||||
|         return info | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .amp import AMPIE | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_parse, | ||||
| @@ -12,14 +12,11 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     sanitized_Request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DramaFeverBaseIE(InfoExtractor): | ||||
| class DramaFeverBaseIE(AMPIE): | ||||
|     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' | ||||
|     _NETRC_MACHINE = 'dramafever' | ||||
|  | ||||
| @@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE): | ||||
|             'timestamp': 1404336058, | ||||
|             'upload_date': '20140702', | ||||
|             'duration': 343, | ||||
|         } | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url).replace('/', '.') | ||||
|  | ||||
|         try: | ||||
|             feed = self._download_json( | ||||
|                 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, | ||||
|                 video_id, 'Downloading episode JSON')['channel']['item'] | ||||
|             info = self._extract_feed_info( | ||||
|                 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
|                 raise ExtractorError( | ||||
|                     'Currently unavailable in your country.', expected=True) | ||||
|             raise | ||||
|  | ||||
|         media_group = feed.get('media-group', {}) | ||||
|  | ||||
|         formats = [] | ||||
|         for media_content in media_group['media-content']: | ||||
|             src = media_content.get('@attributes', {}).get('url') | ||||
|             if not src: | ||||
|                 continue | ||||
|             ext = determine_ext(src) | ||||
|             if ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     src, video_id, f4m_id='hds')) | ||||
|             elif ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, 'mp4', m3u8_id='hls')) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': src, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = media_group.get('media-title') | ||||
|         description = media_group.get('media-description') | ||||
|         duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration')) | ||||
|         thumbnail = self._proto_relative_url( | ||||
|             media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url')) | ||||
|         timestamp = parse_iso8601(feed.get('pubDate'), ' ') | ||||
|  | ||||
|         subtitles = {} | ||||
|         for media_subtitle in media_group.get('media-subTitle', []): | ||||
|             lang = media_subtitle.get('@attributes', {}).get('lang') | ||||
|             href = media_subtitle.get('@attributes', {}).get('href') | ||||
|             if not lang or not href: | ||||
|                 continue | ||||
|             subtitles[lang] = [{ | ||||
|                 'ext': 'ttml', | ||||
|                 'url': href, | ||||
|             }] | ||||
|  | ||||
|         series_id, episode_number = video_id.split('.') | ||||
|         episode_info = self._download_json( | ||||
|             # We only need a single episode info, so restricting page size to one episode | ||||
| @@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE): | ||||
|             if value: | ||||
|                 subfile = value[0].get('subfile') or value[0].get('new_subfile') | ||||
|                 if subfile and subfile != 'http://www.dramafever.com/st/': | ||||
|                     subtitles.setdefault('English', []).append({ | ||||
|                     info['subtitiles'].setdefault('English', []).append({ | ||||
|                         'ext': 'srt', | ||||
|                         'url': subfile, | ||||
|                     }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class DramaFeverSeriesIE(DramaFeverBaseIE): | ||||
|   | ||||
| @@ -2,14 +2,10 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     int_or_none, | ||||
| ) | ||||
| from .amp import AMPIE | ||||
|  | ||||
|  | ||||
| class FoxNewsIE(InfoExtractor): | ||||
| class FoxNewsIE(AMPIE): | ||||
|     IE_DESC = 'Fox News and Fox Business Video' | ||||
|     _VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
| @@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor): | ||||
|                 'id': '3937480', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Frozen in Time', | ||||
|                 'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler', | ||||
|                 'description': '16-year-old girl is size of toddler', | ||||
|                 'duration': 265, | ||||
|                 'timestamp': 1304411491, | ||||
|                 'upload_date': '20110503', | ||||
|                 # 'timestamp': 1304411491, | ||||
|                 # 'upload_date': '20110503', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
| @@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor): | ||||
|                 'id': '3922535568001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", | ||||
|                 'description': "Congressman discusses the president's executive action", | ||||
|                 'description': "Congressman discusses president's plan", | ||||
|                 'duration': 292, | ||||
|                 'timestamp': 1417662047, | ||||
|                 'upload_date': '20141204', | ||||
|                 # 'timestamp': 1417662047, | ||||
|                 # 'upload_date': '20141204', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|         }, | ||||
| @@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor): | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         host = mobj.group('host') | ||||
|         host, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id) | ||||
|  | ||||
|         item = video['channel']['item'] | ||||
|         title = item['title'] | ||||
|         description = item['description'] | ||||
|         timestamp = parse_iso8601(item['dc-date']) | ||||
|  | ||||
|         media_group = item['media-group'] | ||||
|         duration = None | ||||
|         formats = [] | ||||
|         for media in media_group['media-content']: | ||||
|             attributes = media['@attributes'] | ||||
|             video_url = attributes['url'] | ||||
|             if video_url.endswith('.f4m'): | ||||
|                 formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id)) | ||||
|             elif video_url.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv')) | ||||
|             elif not video_url.endswith('.smil'): | ||||
|                 duration = int_or_none(attributes.get('duration')) | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'format_id': media['media-category']['@attributes']['label'], | ||||
|                     'preference': 1, | ||||
|                     'vbr': int_or_none(attributes.get('bitrate')), | ||||
|                     'filesize': int_or_none(attributes.get('fileSize')) | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         media_thumbnail = media_group['media-thumbnail']['@attributes'] | ||||
|         thumbnails = [{ | ||||
|             'url': media_thumbnail['url'], | ||||
|             'width': int_or_none(media_thumbnail.get('width')), | ||||
|             'height': int_or_none(media_thumbnail.get('height')), | ||||
|         }] if media_thumbnail else [] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
|         info = self._extract_feed_info( | ||||
|             'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) | ||||
|         info['id'] = video_id | ||||
|         return info | ||||
|   | ||||
		Reference in New Issue
	
	Block a user