Compare commits

..

51 Commits

Author SHA1 Message Date
Sergey M․
1730878167 release 2017.04.11 2017-04-11 02:17:53 +07:00
Sergey M․
689cd458a6 [ChangeLog] Actualize 2017-04-11 02:16:20 +07:00
Sergey M․
6b9466de2f [afreecatv] Fix extraction (closes #12706) 2017-04-11 02:05:53 +07:00
John Hawkinson
61568e50cf [generic] Add support for <object> youtube embeds (closes #12637) 2017-04-11 00:04:32 +07:00
Sergey M․
364a69e8c6 [test_download] Fix testing playlists with single video and add comments 2017-04-11 00:01:02 +07:00
Sergey M․
6240925b40 [bbccouk] Treat bitrate as audio+video bitrate in media selector 2017-04-10 22:56:22 +07:00
Sergey M․
964744af95 [bbccouk] Skip unrecognized formats in media selector (#12701) 2017-04-10 22:53:51 +07:00
Sergey M․
1af959ef9f [bbccouk] Add support for https protocol in media selector (closes #12701) 2017-04-10 22:53:06 +07:00
Remita Amine
a206ef62df [curiositystream] fix extraction(closes #12638) 2017-04-10 13:50:08 +01:00
Remita Amine
3f2ce6896a [adn] update subtitle decryption key 2017-04-09 12:33:29 +01:00
Sergey M․
a6f7263cf4 [chaturbate] Fix extraction (closes #12665) 2017-04-09 01:39:40 +07:00
Sergey M․
4372436504 release 2017.04.09 2017-04-09 00:01:28 +07:00
Sergey M․
eb8cc8ea3b [ChangeLog] Actualize 2017-04-08 23:59:17 +07:00
Sergey M․
41b263ac8a [canvas] Add IE_DESC (closes #12478) 2017-04-08 22:45:45 +07:00
Sergey M․
ca8fca9d9d [vrt] Add IE_DESC (closes #12477) 2017-04-08 22:44:31 +07:00
midas02
e129fa0846 [vier] Add IE_DESC 2017-04-08 22:43:29 +07:00
Sergey M․
2bd875edfe [medici] Add extractor (closes #3406) 2017-04-08 22:38:37 +07:00
Steven Maude
95152630db [rbmaradio] Add support for redbullradio.com URLs 2017-04-08 21:39:07 +07:00
Sergey M․
04e431cf97 [npo:live] Improve (closes #12555) 2017-04-08 21:31:22 +07:00
Aldo Gunsing
1591ba258a [npo:live] Add support for default url 2017-04-08 21:30:38 +07:00
Sergey M․
29c6726646 [mixcloud] Fix view count extraction and modernize 2017-04-08 21:11:08 +07:00
Sergey M․
a66e25859a [mixcloud:playlist] Relax title extraction and fix description extraction (closes #12582) 2017-04-08 21:04:09 +07:00
Kfir Breger
c93c0fc2fd [mixcloud:playlist] Fix title extraction 2017-04-08 20:47:07 +07:00
Sergey M․
90e3f18fc1 [thesun] Extract playlists (closes #11298, closes #12674) 2017-04-08 20:08:31 +07:00
Entropy
5f3e0b69ef [TheSun] Add new extractor 2017-04-08 19:54:04 +07:00
Sergey M․
28b674ca23 [ceskateleveize:porady] Add extractor (closes #7411, closes #12645) 2017-04-08 19:46:42 +07:00
Sergey M․
e18f1da97a [ceskateleveize] Improve extraction and remove URL replacement hacks 2017-04-08 19:41:14 +07:00
Sergey M․
78280352ca [kaltura] Cleanup regexes from redundant escaping 2017-04-08 16:48:27 +07:00
John Hawkinson
a01825a541 [kaltura] Add support for iframe embeds 2017-04-08 16:40:57 +07:00
Sergey M․
f8f2da25ab [wshh] Extract html5 entries and delegate to generic extractor (closes #12676) 2017-04-08 16:01:56 +07:00
Yen Chi Hsuan
4c03973296 [airmozilla] Fix extraction (closes #12670) 2017-04-08 15:39:58 +08:00
Sergey M․
60e5016199 [test_download] Remove unused import 2017-04-08 14:19:01 +07:00
Sergey M․
c4d6fc6d65 [test_subtitles] Fix raiplay test 2017-04-08 14:11:20 +07:00
Sergey M․
1b3feca0a7 [raiplay] Extract subtitles 2017-04-08 14:11:03 +07:00
Sergey M․
80b2fdf9ac [test_download] Match info dicts against tests before matching info file 2017-04-08 14:10:12 +07:00
John Hawkinson
3bef10a50c [test_download] typo in comment 2017-04-08 13:07:25 +07:00
John Hawkinson
a84da06f49 [test_download] Improve diagnostic on wrong 'id' 2017-04-08 13:07:25 +07:00
Sergey M․
3461f5db06 [xfileshare] Add support for vidlo.us (closes #12660) 2017-04-06 23:39:09 +07:00
Sergey M․
0378b8b917 [xfileshare] Add support for vidbom.com (closes #12661) 2017-04-06 23:38:16 +07:00
Sergey M․
7f04386b89 [aenetworks] Add more video URL regexes (closes #12657) 2017-04-06 02:36:48 +07:00
Remita Amine
fac39cccd4 [odnoklassniki] fix format sorting for 1080p quality 2017-04-03 23:39:56 +01:00
Remita Amine
b68e00b08a [rtl2] add support for you.rtl2.de(closes #10257) 2017-04-03 21:36:35 +01:00
Sergey M․
2ab0bfcd81 [vshare] Add extractor (closes #12278) 2017-04-04 03:05:18 +07:00
Sergey M․
b022f4f600 release 2017.04.03 2017-04-03 03:53:55 +07:00
Sergey M․
e2435ba5f3 [ChangeLog] Actualize 2017-04-03 03:52:44 +07:00
Remita Amine
a9bb61a425 [discoveryvr] Add new extractor(closes #12578) 2017-04-02 09:22:09 +01:00
Remita Amine
dbf70c489f [tv5mondeplus] clean description and use stable id 2017-04-02 00:26:48 +01:00
Remita Amine
61e2331ad8 [tv5mondeplus] Add new extractor(closes #11386) 2017-04-01 23:49:40 +01:00
Sergey M․
fd47550885 [extractor/common] Add coding cookie 2017-04-02 04:42:10 +07:00
Sergey M․
4457823dda [extractor/common] Move censorship checks to a separate method and add check for just another ISP 2017-04-02 03:57:44 +07:00
Sergey M․
b3633fa0ce [pericope] Add support for pscp.tv URLs 2017-04-02 03:20:28 +07:00
35 changed files with 894 additions and 208 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.02**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.11**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.04.02
[debug] youtube-dl version 2017.04.11
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,7 +1,54 @@
version 2017.04.11
Extractors
* [afreecatv] Fix extraction (#12706)
+ [generic] Add support for <object> YouTube embeds (#12637)
* [bbccouk] Treat bitrate as audio+video bitrate in media selector
+ [bbccouk] Skip unrecognized formats in media selector (#12701)
+ [bbccouk] Add support for https protocol in media selector (#12701)
* [curiositystream] Fix extraction (#12638)
* [adn] Update subtitle decryption key
* [chaturbate] Fix extraction (#12665, #12688, #12690)
version 2017.04.09
Extractors
+ [medici] Add support for medici.tv (#3406)
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
+ [npo:live] Add support for default URL (#12555)
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
* [ceskateleveize] Improve extraction and remove URL replacement hacks
+ [kaltura] Add support for iframe embeds (#12679)
* [airmozilla] Fix extraction (#12670)
* [wshh] Extract html5 entries and delegate to generic extractor (12676)
+ [raiplay] Extract subtitles
+ [xfileshare] Add support for vidlo.us (#12660)
+ [xfileshare] Add support for vidbom.com (#12661)
+ [aenetworks] Add more video URL regular expressions (#12657)
+ [odnoklassniki] Fix format sorting for 1080p quality
+ [rtl2] Add support for you.rtl2.de (#10257)
+ [vshare] Add support for vshare.io (#12278)
version 2017.04.03
Core
+ [extractor/common] Add censorship check for TransTelekom ISP
* [extractor/common] Move censorship checks to a separate method
Extractors
+ [discoveryvr] Add support for discoveryvr.com (#12578)
+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
version 2017.04.02
Core
[YoutubeDL] Return early when extraction of url_transparent fails
* [YoutubeDL] Return early when extraction of url_transparent fails
Extractors
* [rai] Fix and improve extraction (#11790)

View File

@@ -127,7 +127,7 @@
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas**
- **Canvas**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
@@ -145,6 +145,7 @@
- **CCTV**: 央视网
- **CDA**
- **CeskaTelevize**
- **CeskaTelevizePorady**
- **channel9**: Channel 9
- **CharlieRose**
- **Chaturbate**
@@ -213,6 +214,7 @@
- **DiscoveryGo**
- **DiscoveryGoPlaylist**
- **DiscoveryNetworksDe**
- **DiscoveryVR**
- **Disney**
- **Dotsub**
- **DouyuTV**: 斗鱼
@@ -430,6 +432,7 @@
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
- **Medialaan**
- **Medici**
- **Meipai**: 美拍
- **MelonVOD**
- **META**
@@ -657,7 +660,9 @@
- **rte**: Raidió Teilifís Éireann TV
- **rte:radio**: Raidió Teilifís Éireann radio
- **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2**
- **rtl2**
- **rtl2:you**
- **rtl2:you:series**
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
@@ -779,6 +784,7 @@
- **TheScene**
- **TheSixtyOne**
- **TheStar**
- **TheSun**
- **TheWeatherChannel**
- **ThisAmericanLife**
- **ThisAV**
@@ -815,6 +821,7 @@
- **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
- **TVANouvelles**
- **TVANouvellesArticle**
@@ -891,7 +898,7 @@
- **vidme:user**
- **vidme:user:likes**
- **Vidzi**
- **vier**
- **vier**: vier.be and vijf.be
- **vier:videos**
- **ViewLift**
- **ViewLiftEmbed**
@@ -928,9 +935,10 @@
- **Vporn**
- **vpro**: npo.nl and ntr.nl
- **Vrak**
- **VRT**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **vrv**
- **vrv:series**
- **VShare**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
@@ -958,7 +966,7 @@
- **WSJ**: Wall Street Journal
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
- **XHamster**
- **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑

View File

@@ -151,7 +151,7 @@ def generator(test_case, tname):
try_num = 1
while True:
try:
# We're not using .download here sine that is just a shim
# We're not using .download here since that is just a shim
# for outside error handling, and returns the exit code
# instead of the result dict.
res_dict = ydl.extract_info(
@@ -199,7 +199,16 @@ def generator(test_case, tname):
self.assertEqual(
test_case['playlist_duration_sum'], got_duration)
for tc in test_cases:
# Generalize both playlists and single videos to unified format for
# simplicity
if 'entries' not in res_dict:
res_dict['entries'] = [res_dict]
for tc_num, tc in enumerate(test_cases):
tc_res_dict = res_dict['entries'][tc_num]
# First, check test cases' data against extracted data alone
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
# Now, check downloaded file consistency
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
@@ -217,13 +226,14 @@ def generator(test_case, tname):
if 'md5' in tc:
md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5'])
# Finally, check test cases' data again but this time against
# extracted data from info JSON file written during processing
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue(
os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn)
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally:
try_rm_tcs_files()

View File

@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
NPOIE,
ComedyCentralIE,
NRKTVIE,
RaiTVIE,
RaiPlayIE,
VikiIE,
ThePlatformIE,
ThePlatformFeedIE,
@@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
class TestRaiSubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiTVIE
class TestRaiPlaySubtitles(BaseTestSubtitles):
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiPlayIE
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True

View File

@@ -45,7 +45,7 @@ class ADNIE(InfoExtractor):
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\xb5@\xcfq\xa3\x98"N\xe4\xf3\x12\x98}}\x16\xd8'),
bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(

View File

@@ -107,7 +107,10 @@ class AENetworksIE(AENetworksBaseIE):
}
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url')
[r"media_url\s*=\s*'(?P<url>[^']+)'",
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_xpath
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
xpath_text,
@@ -72,13 +73,54 @@ class AfreecaTVIE(InfoExtractor):
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'flv',
'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디',
'uploader_id': 'badkids',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'duration': 107,
},
'params': {
'skip_download': True, # requires rtmpdump
'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '10481652_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '10481652_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
@@ -94,7 +136,7 @@ class AfreecaTVIE(InfoExtractor):
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
if m:
video_key['upload_date'] = m.group('upload_date')
video_key['part'] = m.group('part')
video_key['part'] = int(m.group('part'))
return video_key
def _real_extract(self, url):
@@ -109,23 +151,64 @@ class AfreecaTVIE(InfoExtractor):
raise ExtractorError('Specified AfreecaTV video does not exist',
expected=True)
video_url_raw = video_element.text
app, playpath = video_url_raw.split('mp4:')
video_url = video_element.text.strip()
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(video_xml, './track/duration',
'duration'))
duration = int_or_none(xpath_text(
video_xml, './track/duration', 'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
return {
common_entry = {
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}
info = common_entry.copy()
info.update({
'id': video_id,
'title': title,
'duration': duration,
})
if not video_url:
entries = []
for file_num, file_element in enumerate(
video_element.findall(compat_xpath('./file')), start=1):
file_url = file_element.text
if not file_url:
continue
video_key = self.parse_video_key(file_element.get('key', ''))
if not video_key:
continue
file_duration = int_or_none(file_element.get('duration'))
part = video_key.get('part', file_num)
format_id = '%s_%s' % (video_id, part)
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'title': '%s (part %d)' % (title, part),
'upload_date': video_key.get('upload_date'),
'duration': file_duration,
'formats': formats,
})
entries.append(file_info)
entries_info = info.copy()
entries_info.update({
'_type': 'multi_video',
'entries': entries,
})
return entries_info
info = {
'id': video_id,
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
'title': title,
'uploader': uploader,
'uploader_id': uploader_id,
@@ -133,6 +216,21 @@ class AfreecaTVIE(InfoExtractor):
'thumbnail': thumbnail,
}
if determine_ext(video_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
app, playpath = video_url.split('mp4:')
info.update({
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
})
return info
class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global'

View File

@@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf',
'md5': '8d02f53ee39cf006009180e21df1f3ba',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster',
'thumbnail': r're:https?://.*/poster\.jpg',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
@@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id')
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')
metadata = self._parse_json(jwconfig, video_id)
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
jwconfig = self._parse_json(self._search_regex(
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
@@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
return {
info_dict.update({
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage),
'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
}
})
return info_dict

View File

@@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
fmt.update({
'width': width,
'height': height,
'vbr': bitrate,
'tbr': bitrate,
'vcodec': encoding,
})
else:
@@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor):
'acodec': encoding,
'vcodec': 'none',
})
if protocol == 'http':
if protocol in ('http', 'https'):
# Direct link
fmt.update({
'url': href,
@@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor):
'rtmp_live': False,
'ext': 'flv',
})
else:
continue
formats.append(fmt)
elif kind == 'captions':
subtitles = self.extract_subtitles(media, programme_id)

View File

@@ -7,6 +7,7 @@ from ..utils import float_or_none
class CanvasIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',

View File

@@ -12,13 +12,14 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
unescapeHTML,
urlencode_postdata,
USER_AGENTS,
)
class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$'
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
@@ -62,40 +63,12 @@ class CeskaTelevizeIE(InfoExtractor):
},
'skip': 'Georestricted to Czech Republic',
}, {
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'only_matching': True,
}]
def _real_extract(self, url):
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '')
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
@@ -103,13 +76,28 @@ class CeskaTelevizeIE(InfoExtractor):
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
typ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type')
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id')
type_ = None
episode_id = None
playlist = self._parse_json(
self._search_regex(
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
default='{}'), playlist_id)
if playlist:
type_ = playlist.get('type')
episode_id = playlist.get('id')
if not type_:
type_ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
webpage, 'type')
if not episode_id:
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
webpage, 'episode_id')
data = {
'playlist[0][type]': typ,
'playlist[0][type]': type_,
'playlist[0][id]': episode_id,
'requestUrl': compat_urllib_parse_urlparse(url).path,
'requestSource': 'iVysilani',
@@ -245,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
yield line
return '\r\n'.join(_fix_subtitle(subtitles))
class CeskaTelevizePoradyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = unescapeHTML(self._search_regex(
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'iframe player url', group='url'))
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())

View File

@@ -33,10 +33,17 @@ class ChaturbateIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer(
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
m3u8_urls = []
if not m3u8_formats:
for m in re.finditer(
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
'url').replace('_fast', '')
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
if m3u8_url not in m3u8_urls:
m3u8_urls.append(m3u8_url)
if not m3u8_urls:
error = self._search_regex(
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
@@ -50,7 +57,8 @@ class ChaturbateIE(InfoExtractor):
raise ExtractorError('Unable to find stream URL')
formats = []
for m3u8_id, m3u8_url in m3u8_formats:
for m3u8_url in m3u8_urls:
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4',
# ffmpeg skips segments for fast m3u8

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
@@ -547,6 +548,34 @@ class InfoExtractor(object):
return encoding
def __check_blocked(self, content):
first_block = content[:512]
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in first_block):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in first_block:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
'blocklist.rkn.gov.ru' in content):
raise ExtractorError(
'Access to this webpage has been blocked by decision of the Russian government. '
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
@@ -588,25 +617,7 @@ class InfoExtractor(object):
except LookupError:
content = webpage_bytes.decode('utf-8', 'replace')
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in content[:512]):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in content[:512]:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
self.__check_blocked(content)
return content

View File

@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -49,6 +51,48 @@ class CuriosityStreamBaseIE(InfoExtractor):
limelight_media_id = media['limelight_media_id']
title = media['title']
formats = []
for encoding in media.get('encodings', []):
m3u8_url = encoding.get('master_playlist_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
encoding_url = encoding.get('url')
file_url = encoding.get('file_url')
if not encoding_url and not file_url:
continue
f = {
'width': int_or_none(encoding.get('width')),
'height': int_or_none(encoding.get('height')),
'vbr': int_or_none(encoding.get('video_bitrate')),
'abr': int_or_none(encoding.get('audio_bitrate')),
'filesize': int_or_none(encoding.get('size_in_bytes')),
'vcodec': encoding.get('video_codec'),
'acodec': encoding.get('audio_codec'),
'container': encoding.get('container_type'),
}
for f_url in (encoding_url, file_url):
if not f_url:
continue
fmt = f.copy()
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
if rtmp:
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': 'rtmp',
})
else:
fmt.update({
'url': f_url,
'format_id': 'http',
})
formats.append(fmt)
self._sort_formats(formats)
subtitles = {}
for closed_caption in media.get('closed_captions', []):
sub_url = closed_caption.get('file')
@@ -60,16 +104,14 @@ class CuriosityStreamBaseIE(InfoExtractor):
})
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'limelight:media:' + limelight_media_id,
'formats': formats,
'title': title,
'description': media.get('description'),
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
'duration': int_or_none(media.get('duration')),
'tags': media.get('tags'),
'subtitles': subtitles,
'ie_key': 'LimelightMedia',
}
@@ -78,14 +120,12 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
_TEST = {
'url': 'https://app.curiositystream.com/video/2',
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a',
'md5': '262bb2f257ff301115f1973540de8983',
'info_dict': {
'id': '2',
'ext': 'mp4',
'title': 'How Did You Develop The Internet?',
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
'timestamp': 1448388615,
'upload_date': '20151124',
}
}
@@ -105,7 +145,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
'playlist_mincount': 17,
'playlist_mincount': 12,
}
def _real_extract(self, url):

View File

@@ -0,0 +1,59 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import parse_duration
class DiscoveryVRIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
'md5': '32b1929798c464a54356378b7912eca4',
'info_dict': {
'id': 'discovery-vr-an-introduction',
'ext': 'mp4',
'title': 'Discovery VR - An Introduction',
'description': 'md5:80d418a10efb8899d9403e61d8790f06',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
bootstrap_data = self._search_regex(
r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
webpage, 'bootstrap data')
bootstrap_data = self._parse_json(
bootstrap_data.encode('utf-8').decode('unicode_escape'),
display_id)
videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
video_data = next(video for video in videos if video.get('slug') == display_id)
series = video_data.get('showTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
formats = []
for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
f_url = video_data.get(f)
if not f_url:
continue
formats.append({
'format_id': format_id,
'url': f_url,
})
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail'),
'duration': parse_duration(video_data.get('runTime')),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -165,7 +165,10 @@ from .ccc import CCCIE
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE
from .ceskatelevize import (
CeskaTelevizeIE,
CeskaTelevizePoradyIE,
)
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
@@ -273,6 +276,7 @@ from .discoverygo import (
DiscoveryGoPlaylistIE,
)
from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE
@@ -537,6 +541,7 @@ from .mangomolo import (
)
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .medici import MediciIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
@@ -833,7 +838,11 @@ from .rozhlas import RozhlasIE
from .rtbf import RTBFIE
from .rte import RteIE, RteRadioIE
from .rtlnl import RtlNlIE
from .rtl2 import RTL2IE
from .rtl2 import (
RTL2IE,
RTL2YouIE,
RTL2YouSeriesIE,
)
from .rtp import RTPIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
@@ -975,6 +984,7 @@ from .theplatform import (
from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
@@ -1023,6 +1033,7 @@ from .tv2 import (
)
from .tv3 import TV3IE
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE
from .tvanouvelles import (
TVANouvellesIE,
@@ -1186,6 +1197,7 @@ from .vrv import (
VRVIE,
VRVSeriesIE,
)
from .vshare import VShareIE
from .medialaan import MedialaanIE
from .vube import VubeIE
from .vuclip import VuClipIE

View File

@@ -730,6 +730,21 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
# YouTube <object> embed
{
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
'md5': '516718101ec834f74318df76259fb3cc',
'info_dict': {
'id': 'msN87y-iEx0',
'ext': 'webm',
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
'upload_date': '20080526',
'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
'uploader': 'Christopher Sykes',
'uploader_id': 'ChristopherJSykes',
},
'add_ie': ['Youtube'],
},
# Camtasia studio
{
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
@@ -1080,6 +1095,21 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
{
# Kaltura iframe embed
'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
'info_dict': {
'id': '0_f2cfbpwy',
'ext': 'mp4',
'title': 'I. M. Pei: A Centennial Celebration',
'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
'upload_date': '20170403',
'uploader_id': 'batchUser',
'timestamp': 1491232186,
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1923,6 +1953,7 @@ class GenericIE(InfoExtractor):
data-video-url=|
<embed[^>]+?src=|
embedSWF\(?:\s*|
<object[^>]+data=|
new\s+SWFObject\(
)
(["\'])

View File

@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
}],
},
},
'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
'params': {
'skip_download': True,
},
@@ -107,27 +108,37 @@ class KalturaIE(InfoExtractor):
@staticmethod
def _extract_url(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
mobj = (
re.search(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
(?P<q1>['\"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
(?P<q1>['"])wid(?P=q1)\s*:\s*
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or
re.search(
r'''(?xs)
(?P<q1>["\'])
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*?
(?:
entry_?[Ii]d|
(?P<q2>["\'])entry_?[Ii]d(?P=q2)
(?P<q2>["'])entry_?[Ii]d(?P=q2)
)\s*:\s*
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage))
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage) or
re.search(
r'''(?xs)
<iframe[^>]+src=(?P<q1>["'])
(?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)*
[?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?P=q1)
''', webpage)
)
if mobj:
embed_info = mobj.groupdict()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
update_url_query,
urlencode_postdata,
)
class MediciIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
_TEST = {
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
'md5': '004c21bb0a57248085b6ff3fec72719d',
'info_dict': {
'id': '3059',
'ext': 'flv',
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170408',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
# Sets csrftoken cookie
self._download_webpage(url, video_id)
MEDICI_URL = 'http://www.medici.tv/'
data = self._download_json(
MEDICI_URL, video_id,
data=urlencode_postdata({
'json': 'true',
'page': '/%s' % video_id,
'timezone_offset': -420,
}), headers={
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
'X-Requested-With': 'XMLHttpRequest',
'Referer': MEDICI_URL,
'Content-Type': 'application/x-www-form-urlencoded',
})
video = data['video']['videos']['video1']
title = video.get('nom') or data['title']
video_id = video.get('id') or video_id
formats = self._extract_f4m_formats(
update_url_query(video['url_akamai'], {
'hdcore': '3.1.0',
'plugin=aasp': '3.1.0.43.124',
}), video_id, f4m_id='hds')
description = data.get('meta_description')
thumbnail = video.get('url_thumbnail') or data.get('main_image')
upload_date = unified_strdate(data['video'].get('date'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats,
}

View File

@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>',
r'm-tooltip=["\']([\d,.]+) plays'],
r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None))
return {
@@ -138,12 +138,12 @@ class MixcloudPlaylistBaseIE(InfoExtractor):
def _get_user_description(self, page_content):
return self._html_search_regex(
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
page_content, 'user description', fatal=False)
class MixcloudUserIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
IE_NAME = 'mixcloud:user'
_TESTS = [{
@@ -151,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'playlist_mincount': 11,
}, {
@@ -159,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'playlist_mincount': 11,
}, {
@@ -167,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'params': {
'playlist_items': '1-100',
@@ -178,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_listens',
'title': 'Daniel Holbach (listens)',
'description': 'md5:327af72d1efeb404a8216c27240d1370',
'description': 'md5:def36060ac8747b3aabca54924897e47',
},
'params': {
'playlist_items': '1-100',
@@ -216,7 +216,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
IE_NAME = 'mixcloud:playlist'
_TESTS = [{
@@ -229,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'playlist_mincount': 16,
}, {
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
'info_dict': {
'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'Jazzcat on Ness Radio',
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
},
'playlist_mincount': 23
'only_matching': True,
}]
def _real_extract(self, url):
@@ -243,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
playlist_id = mobj.group('playlist')
video_id = '%s_%s' % (user_id, playlist_id)
profile = self._download_webpage(
webpage = self._download_webpage(
url, user_id,
note='Downloading playlist page',
errnote='Unable to download playlist page')
description = self._get_user_description(profile)
playlist_title = self._html_search_regex(
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
profile, 'playlist title')
title = self._html_search_regex(
r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
webpage, 'playlist title',
default=None) or self._og_search_title(webpage, fatal=False)
description = self._get_user_description(webpage)
entries = OnDemandPagedList(
functools.partial(
@@ -259,11 +255,11 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
self._PAGE_SIZE)
return self.playlist_result(entries, video_id, playlist_title, description)
return self.playlist_result(entries, video_id, title, description)
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
_VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
IE_NAME = 'mixcloud:stream'
_TEST = {

View File

@@ -313,9 +313,9 @@ class NPOIE(NPOBaseIE):
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
_TEST = {
_TESTS = [{
'url': 'http://www.npo.nl/live/npo-1',
'info_dict': {
'id': 'LI_NL1_4188102',
@@ -327,10 +327,13 @@ class NPOLiveIE(NPOBaseIE):
'params': {
'skip_download': True,
}
}
}, {
'url': 'http://www.npo.nl/live',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
display_id = self._match_id(url) or 'npo-1'
webpage = self._download_webpage(url, display_id)

View File

@@ -176,7 +176,7 @@ class OdnoklassnikiIE(InfoExtractor):
})
return info
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd'))
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd', 'full'))
formats = [{
'url': f['url'],

View File

@@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor):
class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE):
}, {
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
'only_matching': True,
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
}]
@staticmethod
@@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE):
class PeriscopeUserIE(PeriscopeBaseIE):
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
IE_DESC = 'Periscope user videos'
IE_NAME = 'periscope:user'

View File

@@ -97,6 +97,25 @@ class RaiBaseIE(InfoExtractor):
'formats': formats,
}.items() if v is not None)
@staticmethod
def _extract_subtitles(url, subtitle_url):
subtitles = {}
if subtitle_url and isinstance(subtitle_url, compat_str):
subtitle_url = urljoin(url, subtitle_url)
STL_EXT = '.stl'
SRT_EXT = '.srt'
subtitles['it'] = [{
'ext': 'stl',
'url': subtitle_url,
}]
if subtitle_url.endswith(STL_EXT):
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
subtitles['it'].append({
'ext': 'srt',
'url': srt_url,
})
return subtitles
class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
@@ -168,6 +187,8 @@ class RaiPlayIE(RaiBaseIE):
timestamp = unified_timestamp(try_get(
media, lambda x: x['availabilities'][0]['start'], compat_str))
subtitles = self._extract_subtitles(url, video.get('subtitles'))
info = {
'id': video_id,
'title': title,
@@ -183,6 +204,7 @@ class RaiPlayIE(RaiBaseIE):
'season_number': int_or_none(try_get(
media, lambda x: x['isPartOf']['numeroStagioni'])),
'season': media.get('stagione') or None,
'subtitles': subtitles,
}
info.update(relinker_info)
@@ -307,17 +329,7 @@ class RaiIE(RaiBaseIE):
'url': compat_urlparse.urljoin(url, thumbnail_url),
})
subtitles = {}
captions = media.get('subtitlesUrl')
if captions:
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': captions,
}]
subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
info = {
'id': content_id,

View File

@@ -13,15 +13,15 @@ from ..utils import (
class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
'info_dict': {
'id': 'ford-lopatin-live-at-primavera-sound-2011',
'ext': 'mp3',
'title': 'Main Stage - Ford & Lopatin',
'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2452,
'timestamp': 1307103164,

View File

@@ -1,13 +1,26 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..utils import int_or_none
from ..aes import aes_cbc_decrypt
from ..compat import (
compat_ord,
compat_str,
)
from ..utils import (
bytes_to_intlist,
ExtractorError,
intlist_to_bytes,
int_or_none,
strip_or_none,
)
class RTL2IE(InfoExtractor):
IE_NAME = 'rtl2'
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
_TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
@@ -98,3 +111,98 @@ class RTL2IE(InfoExtractor):
'duration': int_or_none(video_info.get('duration')),
'formats': formats,
}
class RTL2YouBaseIE(InfoExtractor):
_BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
class RTL2YouIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you'
_VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
'info_dict': {
'id': '15740',
'ext': 'mp4',
'title': 'MJUNIK Home of YOU - #307 Hirn, wo bist du?!',
'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
'age_limit': 12,
},
}, {
'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
'only_matching': True,
}]
_AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
_GEO_COUNTRIES = ['DE']
def _real_extract(self, url):
video_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
stream_url = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(data)),
bytes_to_intlist(self._AES_KEY),
bytes_to_intlist(base64.b64decode(iv))
))
if b'rtl2_you_video_not_found' in stream_url:
raise ExtractorError('video not found', expected=True)
formats = self._extract_m3u8_formats(
stream_url[:-compat_ord(stream_url[-1])].decode(),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
video_data = self._download_json(
self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
series = video_data.get('formatTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
return {
'id': video_id,
'title': title,
'formats': formats,
'description': strip_or_none(video_data.get('description')),
'thumbnail': video_data.get('image'),
'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
'series': series,
'episode': episode,
'age_limit': int_or_none(video_data.get('minimumAge')),
}
class RTL2YouSeriesIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you:series'
_VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
_TEST = {
'url': 'http://you.rtl2.de/videos/115/dragon-ball',
'info_dict': {
'id': '115',
},
'playlist_mincount': 5,
}
def _real_extract(self, url):
series_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'videos',
series_id, query={
'formatId': series_id,
'limit': 1000000000,
})
entries = []
for video in stream_data.get('videos', []):
video_id = compat_str(video['videoId'])
if not video_id:
continue
entries.append(self.url_result(
'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
'RTL2You', video_id))
return self.playlist_result(entries, series_id)

View File

@@ -0,0 +1,32 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
class TheSunIE(InfoExtractor):
_VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
'info_dict': {
'id': '2261604',
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
},
'playlist_count': 2,
}
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
entries = []
for ooyala_id in re.findall(
r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
webpage):
entries.append(OoyalaIE._build_url_result(ooyala_id))
return self.playlist_result(
entries, article_id, self._og_search_title(webpage, fatal=False))

View File

@@ -0,0 +1,79 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
extract_attributes,
get_element_by_class,
int_or_none,
parse_duration,
parse_iso8601,
)
class TV5MondePlusIE(InfoExtractor):
IE_DESC = 'TV5MONDE+'
_VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'md5': '12130fc199f020673138a83466542ec6',
'info_dict': {
'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'ext': 'mp4',
'title': 'Tdah, mon amour - Enfants',
'description': 'md5:230e3aca23115afcf8006d1bece6df74',
'upload_date': '20170401',
'timestamp': 1491022860,
}
}
_GEO_BYPASS = False
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
series = get_element_by_class('video-detail__title', webpage)
title = episode = get_element_by_class(
'video-detail__subtitle', webpage) or series
if series and series != title:
title = '%s - %s' % (series, title)
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
video_files = self._parse_json(
vpl_data['data-broadcast'], display_id).get('files', [])
formats = []
for video_file in video_files:
v_url = video_file.get('url')
if not v_url:
continue
video_format = video_file.get('format') or determine_ext(v_url)
if video_format == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, display_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': v_url,
'format_id': video_format,
})
self._sort_formats(formats)
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': clean_html(get_element_by_class('video-detail__description', webpage)),
'thumbnail': vpl_data.get('data-image'),
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -9,6 +9,7 @@ from .common import InfoExtractor
class VierIE(InfoExtractor):
IE_NAME = 'vier'
IE_DESC = 'vier.be and vijf.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',

View File

@@ -10,6 +10,7 @@ from ..utils import (
class VRTIE(InfoExtractor):
IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
_TESTS = [
# deredactie.be

View File

@@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://vshare.io/d/0f64ce6',
'md5': '16d7b8fef58846db47419199ff1ab3e7',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
}
}, {
'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://vshare.io/d/%s' % video_id, video_id)
title = self._html_search_regex(
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
video_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
webpage, 'video url', group='url')
return {
'id': video_id,
'title': title,
'url': video_url,
}

View File

@@ -1,12 +1,10 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)'
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
_TESTS = [{
'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': '9d04de741161603bf7071bbf4e883186',
@@ -17,48 +15,26 @@ class WorldStarHipHopIE(InfoExtractor):
}
}, {
'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': 'dc1c76c83ecc4190bb1eb143899b87d3',
'info_dict': {
'id': 'wshh6a7q1ny0G34ZwuIO',
'ext': 'mp4',
'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
}
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?', webpage)
if m_vevo_id is not None:
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
entries = self._parse_html5_media_entries(url, webpage, video_id)
video_url = self._search_regex(
[r'so\.addVariable\("file","(.*?)"\)',
r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
webpage, 'video URL')
if not entries:
return self.url_result(url, 'Generic')
if 'youtube' in video_url:
return self.url_result(video_url, ie='Youtube')
video_title = self._html_search_regex(
title = self._html_search_regex(
[r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
webpage, 'title')
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video.
thumbnail = self._html_search_regex(
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
default=None)
if not thumbnail:
_title = r'candytitles.*>(.*)</span>'
mobj = re.search(_title, webpage)
if mobj is not None:
video_title = mobj.group(1)
return {
info = entries[0]
info.update({
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': thumbnail,
}
'title': title,
})
return info

View File

@@ -28,6 +28,8 @@ class XFileShareIE(InfoExtractor):
('streamin.to', 'Streamin.To'),
('xvidstage.com', 'XVIDSTAGE'),
('vidabc.com', 'Vid ABC'),
('vidbom.com', 'VidBom'),
('vidlo.us', 'vidlo'),
)
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.04.02'
__version__ = '2017.04.11'