Compare commits

...

11 Commits

Author SHA1 Message Date
Sergey M․
1730878167 release 2017.04.11 2017-04-11 02:17:53 +07:00
Sergey M․
689cd458a6 [ChangeLog] Actualize 2017-04-11 02:16:20 +07:00
Sergey M․
6b9466de2f [afreecatv] Fix extraction (closes #12706) 2017-04-11 02:05:53 +07:00
John Hawkinson
61568e50cf [generic] Add support for <object> youtube embeds (closes #12637) 2017-04-11 00:04:32 +07:00
Sergey M․
364a69e8c6 [test_download] Fix testing playlists with single video and add comments 2017-04-11 00:01:02 +07:00
Sergey M․
6240925b40 [bbccouk] Treat bitrate as audio+video bitrate in media selector 2017-04-10 22:56:22 +07:00
Sergey M․
964744af95 [bbccouk] Skip unrecognized formats in media selector (#12701) 2017-04-10 22:53:51 +07:00
Sergey M․
1af959ef9f [bbccouk] Add support for https protocol in media selector (closes #12701) 2017-04-10 22:53:06 +07:00
Remita Amine
a206ef62df [curiositystream] fix extraction(closes #12638) 2017-04-10 13:50:08 +01:00
Remita Amine
3f2ce6896a [adn] update subtitle decryption key 2017-04-09 12:33:29 +01:00
Sergey M․
a6f7263cf4 [chaturbate] Fix extraction (closes #12665) 2017-04-09 01:39:40 +07:00
10 changed files with 219 additions and 34 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.09**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.11**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.04.09
[debug] youtube-dl version 2017.04.11
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,3 +1,16 @@
version 2017.04.11
Extractors
* [afreecatv] Fix extraction (#12706)
+ [generic] Add support for <object> YouTube embeds (#12637)
* [bbccouk] Treat bitrate as audio+video bitrate in media selector
+ [bbccouk] Skip unrecognized formats in media selector (#12701)
+ [bbccouk] Add support for https protocol in media selector (#12701)
* [curiositystream] Fix extraction (#12638)
* [adn] Update subtitle decryption key
* [chaturbate] Fix extraction (#12665, #12688, #12690)
version 2017.04.09
Extractors

View File

@@ -199,9 +199,16 @@ def generator(test_case, tname):
self.assertEqual(
test_case['playlist_duration_sum'], got_duration)
# Generalize both playlists and single videos to unified format for
# simplicity
if 'entries' not in res_dict:
res_dict['entries'] = [res_dict]
for tc_num, tc in enumerate(test_cases):
tc_res_dict = res_dict['entries'][tc_num] if is_playlist else res_dict
tc_res_dict = res_dict['entries'][tc_num]
# First, check test cases' data against extracted data alone
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
# Now, check downloaded file consistency
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
@@ -219,13 +226,14 @@ def generator(test_case, tname):
if 'md5' in tc:
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
# Finally, check test cases' data again but this time against
# extracted data from info JSON file written during processing
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(
os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn)
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally:
try_rm_tcs_files()

View File

@@ -45,7 +45,7 @@ class ADNIE(InfoExtractor):
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\xb5@\xcfq\xa3\x98"N\xe4\xf3\x12\x98}}\x16\xd8'),
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_xpath
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
xpath_text,
@@ -72,13 +73,54 @@ class AfreecaTVIE(InfoExtractor):
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'flv',
'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디',
'uploader_id': 'badkids',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'duration': 107,
},
'params': {
'skip_download': True, # requires rtmpdump
'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '10481652_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '10481652_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
@@ -94,7 +136,7 @@ class AfreecaTVIE(InfoExtractor):
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
if m:
video_key['upload_date'] = m.group('upload_date')
video_key['part'] = m.group('part')
video_key['part'] = int(m.group('part'))
return video_key
def _real_extract(self, url):
@@ -109,23 +151,64 @@ class AfreecaTVIE(InfoExtractor):
raise ExtractorError('Specified AfreecaTV video does not exist',
expected=True)
video_url_raw = video_element.text
app, playpath = video_url_raw.split('mp4:')
video_url = video_element.text.strip()
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(video_xml, './track/duration',
'duration'))
duration = int_or_none(xpath_text(
video_xml, './track/duration', 'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
return {
common_entry = {
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}
info = common_entry.copy()
info.update({
'id': video_id,
'title': title,
'duration': duration,
})
if not video_url:
entries = []
for file_num, file_element in enumerate(
video_element.findall(compat_xpath('./file')), start=1):
file_url = file_element.text
if not file_url:
continue
video_key = self.parse_video_key(file_element.get('key', ''))
if not video_key:
continue
file_duration = int_or_none(file_element.get('duration'))
part = video_key.get('part', file_num)
format_id = '%s_%s' % (video_id, part)
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'title': '%s (part %d)' % (title, part),
'upload_date': video_key.get('upload_date'),
'duration': file_duration,
'formats': formats,
})
entries.append(file_info)
entries_info = info.copy()
entries_info.update({
'_type': 'multi_video',
'entries': entries,
})
return entries_info
info = {
'id': video_id,
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
'title': title,
'uploader': uploader,
'uploader_id': uploader_id,
@@ -133,6 +216,21 @@ class AfreecaTVIE(InfoExtractor):
'thumbnail': thumbnail,
}
if determine_ext(video_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
app, playpath = video_url.split('mp4:')
info.update({
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
})
return info
class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global'

View File

@@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
fmt.update({
'width': width,
'height': height,
'vbr': bitrate,
'tbr': bitrate,
'vcodec': encoding,
})
else:
@@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor):
'acodec': encoding,
'vcodec': 'none',
})
if protocol == 'http':
if protocol in ('http', 'https'):
# Direct link
fmt.update({
'url': href,
@@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor):
'rtmp_live': False,
'ext': 'flv',
})
else:
continue
formats.append(fmt)
elif kind == 'captions':
subtitles = self.extract_subtitles(media, programme_id)

View File

@@ -33,10 +33,17 @@ class ChaturbateIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
m3u8_urls = []
if not m3u8_formats:
for m in re.finditer(
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
'url').replace('_fast', '')
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
if m3u8_url not in m3u8_urls:
m3u8_urls.append(m3u8_url)
if not m3u8_urls:
error = self._search_regex(
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
@@ -50,7 +57,8 @@ class ChaturbateIE(InfoExtractor):
raise ExtractorError('Unable to find stream URL')
formats = []
for m3u8_id, m3u8_url in m3u8_formats:
for m3u8_url in m3u8_urls:
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4',
# ffmpeg skips segments for fast m3u8

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -49,6 +51,48 @@ class CuriosityStreamBaseIE(InfoExtractor):
limelight_media_id = media['limelight_media_id']
title = media['title']
formats = []
for encoding in media.get('encodings', []):
m3u8_url = encoding.get('master_playlist_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
encoding_url = encoding.get('url')
file_url = encoding.get('file_url')
if not encoding_url and not file_url:
continue
f = {
'width': int_or_none(encoding.get('width')),
'height': int_or_none(encoding.get('height')),
'vbr': int_or_none(encoding.get('video_bitrate')),
'abr': int_or_none(encoding.get('audio_bitrate')),
'filesize': int_or_none(encoding.get('size_in_bytes')),
'vcodec': encoding.get('video_codec'),
'acodec': encoding.get('audio_codec'),
'container': encoding.get('container_type'),
}
for f_url in (encoding_url, file_url):
if not f_url:
continue
fmt = f.copy()
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
if rtmp:
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': 'rtmp',
})
else:
fmt.update({
'url': f_url,
'format_id': 'http',
})
formats.append(fmt)
self._sort_formats(formats)
subtitles = {}
for closed_caption in media.get('closed_captions', []):
sub_url = closed_caption.get('file')
@@ -60,16 +104,14 @@ class CuriosityStreamBaseIE(InfoExtractor):
})
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'limelight:media:' + limelight_media_id,
'formats': formats,
'title': title,
'description': media.get('description'),
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
'duration': int_or_none(media.get('duration')),
'tags': media.get('tags'),
'subtitles': subtitles,
'ie_key': 'LimelightMedia',
}
@@ -78,14 +120,12 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://app.curiositystream.com/video/2',
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
'md5': '262bb2f257ff301115f1973540de8983',
'info_dict': {
'id': '2',
'ext': 'mp4',
'title': 'How Did You Develop The Internet?',
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
'timestamp': 1448388615,
'upload_date': '20151124',
}
}
@@ -105,7 +145,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
'playlist_mincount': 17,
'playlist_mincount': 12,
}
def _real_extract(self, url):

View File

@@ -730,6 +730,21 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
# YouTube <object> embed
{
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
'md5': '516718101ec834f74318df76259fb3cc',
'info_dict': {
'id': 'msN87y-iEx0',
'ext': 'webm',
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
'upload_date': '20080526',
'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
'uploader': 'Christopher Sykes',
'uploader_id': 'ChristopherJSykes',
},
'add_ie': ['Youtube'],
},
# Camtasia studio
{
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
@@ -1938,6 +1953,7 @@ class GenericIE(InfoExtractor):
data-video-url=|
<embed[^>]+?src=|
embedSWF\(?:\s*|
<object[^>]+data=|
new\s+SWFObject\(
)
(["\'])

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.04.09'
__version__ = '2017.04.11'