Compare commits

..

51 Commits

Author SHA1 Message Date
Sergey M․
1730878167 release 2017.04.11 2017-04-11 02:17:53 +07:00
Sergey M․
689cd458a6 [ChangeLog] Actualize 2017-04-11 02:16:20 +07:00
Sergey M․
6b9466de2f [afreecatv] Fix extraction (closes #12706) 2017-04-11 02:05:53 +07:00
John Hawkinson
61568e50cf [generic] Add support for <object> youtube embeds (closes #12637) 2017-04-11 00:04:32 +07:00
Sergey M․
364a69e8c6 [test_download] Fix testing playlists with single video and add comments 2017-04-11 00:01:02 +07:00
Sergey M․
6240925b40 [bbccouk] Treat bitrate as audio+video bitrate in media selector 2017-04-10 22:56:22 +07:00
Sergey M․
964744af95 [bbccouk] Skip unrecognized formats in media selector (#12701) 2017-04-10 22:53:51 +07:00
Sergey M․
1af959ef9f [bbccouk] Add support for https protocol in media selector (closes #12701) 2017-04-10 22:53:06 +07:00
Remita Amine
a206ef62df [curiositystream] fix extraction(closes #12638) 2017-04-10 13:50:08 +01:00
Remita Amine
3f2ce6896a [adn] update subtitle decryption key 2017-04-09 12:33:29 +01:00
Sergey M․
a6f7263cf4 [chaturbate] Fix extraction (closes #12665) 2017-04-09 01:39:40 +07:00
Sergey M․
4372436504 release 2017.04.09 2017-04-09 00:01:28 +07:00
Sergey M․
eb8cc8ea3b [ChangeLog] Actualize 2017-04-08 23:59:17 +07:00
Sergey M․
41b263ac8a [canvas] Add IE_DESC (closes #12478) 2017-04-08 22:45:45 +07:00
Sergey M․
ca8fca9d9d [vrt] Add IE_DESC (closes #12477) 2017-04-08 22:44:31 +07:00
midas02
e129fa0846 [vier] Add IE_DESC 2017-04-08 22:43:29 +07:00
Sergey M․
2bd875edfe [medici] Add extractor (closes #3406) 2017-04-08 22:38:37 +07:00
Steven Maude
95152630db [rbmaradio] Add support for redbullradio.com URLs 2017-04-08 21:39:07 +07:00
Sergey M․
04e431cf97 [npo:live] Improve (closes #12555) 2017-04-08 21:31:22 +07:00
Aldo Gunsing
1591ba258a [npo:live] Add support for default url 2017-04-08 21:30:38 +07:00
Sergey M․
29c6726646 [mixcloud] Fix view count extraction and modernize 2017-04-08 21:11:08 +07:00
Sergey M․
a66e25859a [mixcloud:playlist] Relax title extraction and fix description extraction (closes #12582) 2017-04-08 21:04:09 +07:00
Kfir Breger
c93c0fc2fd [mixcloud:playlist] Fix title extraction 2017-04-08 20:47:07 +07:00
Sergey M․
90e3f18fc1 [thesun] Extract playlists (closes #11298, closes #12674) 2017-04-08 20:08:31 +07:00
Entropy
5f3e0b69ef [TheSun] Add new extractor 2017-04-08 19:54:04 +07:00
Sergey M․
28b674ca23 [ceskateleveize:porady] Add extractor (closes #7411, closes #12645) 2017-04-08 19:46:42 +07:00
Sergey M․
e18f1da97a [ceskateleveize] Improve extraction and remove URL replacement hacks 2017-04-08 19:41:14 +07:00
Sergey M․
78280352ca [kaltura] Cleanup regexes from redundant escaping 2017-04-08 16:48:27 +07:00
John Hawkinson
a01825a541 [kaltura] Add support for iframe embeds 2017-04-08 16:40:57 +07:00
Sergey M․
f8f2da25ab [wshh] Extract html5 entries and delegate to generic extractor (closes #12676) 2017-04-08 16:01:56 +07:00
Yen Chi Hsuan
4c03973296 [airmozilla] Fix extraction (closes #12670) 2017-04-08 15:39:58 +08:00
Sergey M․
60e5016199 [test_download] Remove unused import 2017-04-08 14:19:01 +07:00
Sergey M․
c4d6fc6d65 [test_subtitles] Fix raiplay test 2017-04-08 14:11:20 +07:00
Sergey M․
1b3feca0a7 [raiplay] Extract subtitles 2017-04-08 14:11:03 +07:00
Sergey M․
80b2fdf9ac [test_download] Match info dicts against tests before matching info file 2017-04-08 14:10:12 +07:00
John Hawkinson
3bef10a50c [test_download] typo in comment 2017-04-08 13:07:25 +07:00
John Hawkinson
a84da06f49 [test_download] Improve diagnostic on wrong 'id' 2017-04-08 13:07:25 +07:00
Sergey M․
3461f5db06 [xfileshare] Add support for vidlo.us (closes #12660) 2017-04-06 23:39:09 +07:00
Sergey M․
0378b8b917 [xfileshare] Add support for vidbom.com (closes #12661) 2017-04-06 23:38:16 +07:00
Sergey M․
7f04386b89 [aenetworks] Add more video URL regexes (closes #12657) 2017-04-06 02:36:48 +07:00
Remita Amine
fac39cccd4 [odnoklassniki] fix format sorting for 1080p quality 2017-04-03 23:39:56 +01:00
Remita Amine
b68e00b08a [rtl2] add support for you.rtl2.de(closes #10257) 2017-04-03 21:36:35 +01:00
Sergey M․
2ab0bfcd81 [vshare] Add extractor (closes #12278) 2017-04-04 03:05:18 +07:00
Sergey M․
b022f4f600 release 2017.04.03 2017-04-03 03:53:55 +07:00
Sergey M․
e2435ba5f3 [ChangeLog] Actualize 2017-04-03 03:52:44 +07:00
Remita Amine
a9bb61a425 [discoveryvr] Add new extractor(closes #12578) 2017-04-02 09:22:09 +01:00
Remita Amine
dbf70c489f [tv5mondeplus] clean description and use stable id 2017-04-02 00:26:48 +01:00
Remita Amine
61e2331ad8 [tv5mondeplus] Add new extractor(closes #11386) 2017-04-01 23:49:40 +01:00
Sergey M․
fd47550885 [extractor/common] Add coding cookie 2017-04-02 04:42:10 +07:00
Sergey M․
4457823dda [extractor/common] Move censorship checks to a separate method and add check for just another ISP 2017-04-02 03:57:44 +07:00
Sergey M․
b3633fa0ce [pericope] Add support for pscp.tv URLs 2017-04-02 03:20:28 +07:00
35 changed files with 894 additions and 208 deletions

View File

@@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.02** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.11**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.04.02 [debug] youtube-dl version 2017.04.11
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@@ -1,7 +1,54 @@
version 2017.04.11
Extractors
* [afreecatv] Fix extraction (#12706)
+ [generic] Add support for <object> YouTube embeds (#12637)
* [bbccouk] Treat bitrate as audio+video bitrate in media selector
+ [bbccouk] Skip unrecognized formats in media selector (#12701)
+ [bbccouk] Add support for https protocol in media selector (#12701)
* [curiositystream] Fix extraction (#12638)
* [adn] Update subtitle decryption key
* [chaturbate] Fix extraction (#12665, #12688, #12690)
version 2017.04.09
Extractors
+ [medici] Add support for medici.tv (#3406)
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
+ [npo:live] Add support for default URL (#12555)
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
* [ceskateleveize] Improve extraction and remove URL replacement hacks
+ [kaltura] Add support for iframe embeds (#12679)
* [airmozilla] Fix extraction (#12670)
* [wshh] Extract html5 entries and delegate to generic extractor (12676)
+ [raiplay] Extract subtitles
+ [xfileshare] Add support for vidlo.us (#12660)
+ [xfileshare] Add support for vidbom.com (#12661)
+ [aenetworks] Add more video URL regular expressions (#12657)
+ [odnoklassniki] Fix format sorting for 1080p quality
+ [rtl2] Add support for you.rtl2.de (#10257)
+ [vshare] Add support for vshare.io (#12278)
version 2017.04.03
Core
+ [extractor/common] Add censorship check for TransTelekom ISP
* [extractor/common] Move censorship checks to a separate method
Extractors
+ [discoveryvr] Add support for discoveryvr.com (#12578)
+ [tv5mondeplus] Add support for tv5mondeplus.com (#11386)
+ [periscope] Add support for pscp.tv URLs (#12618, #12625)
version 2017.04.02 version 2017.04.02
Core Core
[YoutubeDL] Return early when extraction of url_transparent fails * [YoutubeDL] Return early when extraction of url_transparent fails
Extractors Extractors
* [rai] Fix and improve extraction (#11790) * [rai] Fix and improve extraction (#11790)

View File

@@ -127,7 +127,7 @@
- **CamWithHer** - **CamWithHer**
- **canalc2.tv** - **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **Canvas** - **Canvas**: canvas.be and een.be
- **CarambaTV** - **CarambaTV**
- **CarambaTVPage** - **CarambaTVPage**
- **CartoonNetwork** - **CartoonNetwork**
@@ -145,6 +145,7 @@
- **CCTV**: 央视网 - **CCTV**: 央视网
- **CDA** - **CDA**
- **CeskaTelevize** - **CeskaTelevize**
- **CeskaTelevizePorady**
- **channel9**: Channel 9 - **channel9**: Channel 9
- **CharlieRose** - **CharlieRose**
- **Chaturbate** - **Chaturbate**
@@ -213,6 +214,7 @@
- **DiscoveryGo** - **DiscoveryGo**
- **DiscoveryGoPlaylist** - **DiscoveryGoPlaylist**
- **DiscoveryNetworksDe** - **DiscoveryNetworksDe**
- **DiscoveryVR**
- **Disney** - **Disney**
- **Dotsub** - **Dotsub**
- **DouyuTV**: 斗鱼 - **DouyuTV**: 斗鱼
@@ -430,6 +432,7 @@
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **media.ccc.de** - **media.ccc.de**
- **Medialaan** - **Medialaan**
- **Medici**
- **Meipai**: 美拍 - **Meipai**: 美拍
- **MelonVOD** - **MelonVOD**
- **META** - **META**
@@ -657,7 +660,9 @@
- **rte**: Raidió Teilifís Éireann TV - **rte**: Raidió Teilifís Éireann TV
- **rte:radio**: Raidió Teilifís Éireann radio - **rte:radio**: Raidió Teilifís Éireann radio
- **rtl.nl**: rtl.nl and rtlxl.nl - **rtl.nl**: rtl.nl and rtlxl.nl
- **RTL2** - **rtl2**
- **rtl2:you**
- **rtl2:you:series**
- **RTP** - **RTP**
- **RTS**: RTS.ch - **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta - **rtve.es:alacarta**: RTVE a la carta
@@ -779,6 +784,7 @@
- **TheScene** - **TheScene**
- **TheSixtyOne** - **TheSixtyOne**
- **TheStar** - **TheStar**
- **TheSun**
- **TheWeatherChannel** - **TheWeatherChannel**
- **ThisAmericanLife** - **ThisAmericanLife**
- **ThisAV** - **ThisAV**
@@ -815,6 +821,7 @@
- **TV2Article** - **TV2Article**
- **TV3** - **TV3**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA** - **TVA**
- **TVANouvelles** - **TVANouvelles**
- **TVANouvellesArticle** - **TVANouvellesArticle**
@@ -891,7 +898,7 @@
- **vidme:user** - **vidme:user**
- **vidme:user:likes** - **vidme:user:likes**
- **Vidzi** - **Vidzi**
- **vier** - **vier**: vier.be and vijf.be
- **vier:videos** - **vier:videos**
- **ViewLift** - **ViewLift**
- **ViewLiftEmbed** - **ViewLiftEmbed**
@@ -928,9 +935,10 @@
- **Vporn** - **Vporn**
- **vpro**: npo.nl and ntr.nl - **vpro**: npo.nl and ntr.nl
- **Vrak** - **Vrak**
- **VRT** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
- **vrv** - **vrv**
- **vrv:series** - **vrv:series**
- **VShare**
- **vube**: Vube.com - **vube**: Vube.com
- **VuClip** - **VuClip**
- **VVVVID** - **VVVVID**
@@ -958,7 +966,7 @@
- **WSJ**: Wall Street Journal - **WSJ**: Wall Street Journal
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
- **XHamster** - **XHamster**
- **XHamsterEmbed** - **XHamsterEmbed**
- **xiami:album**: 虾米音乐 - 专辑 - **xiami:album**: 虾米音乐 - 专辑

View File

@@ -151,7 +151,7 @@ def generator(test_case, tname):
try_num = 1 try_num = 1
while True: while True:
try: try:
# We're not using .download here sine that is just a shim # We're not using .download here since that is just a shim
# for outside error handling, and returns the exit code # for outside error handling, and returns the exit code
# instead of the result dict. # instead of the result dict.
res_dict = ydl.extract_info( res_dict = ydl.extract_info(
@@ -199,7 +199,16 @@ def generator(test_case, tname):
self.assertEqual( self.assertEqual(
test_case['playlist_duration_sum'], got_duration) test_case['playlist_duration_sum'], got_duration)
for tc in test_cases: # Generalize both playlists and single videos to unified format for
# simplicity
if 'entries' not in res_dict:
res_dict['entries'] = [res_dict]
for tc_num, tc in enumerate(test_cases):
tc_res_dict = res_dict['entries'][tc_num]
# First, check test cases' data against extracted data alone
expect_info_dict(self, tc_res_dict, tc.get('info_dict', {}))
# Now, check downloaded file consistency
tc_filename = get_tc_filename(tc) tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False): if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
@@ -217,13 +226,14 @@ def generator(test_case, tname):
if 'md5' in tc: if 'md5' in tc:
md5_for_file = _file_md5(tc_filename) md5_for_file = _file_md5(tc_filename)
self.assertEqual(md5_for_file, tc['md5']) self.assertEqual(md5_for_file, tc['md5'])
# Finally, check test cases' data again but this time against
# extracted data from info JSON file written during processing
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
self.assertTrue( self.assertTrue(
os.path.exists(info_json_fn), os.path.exists(info_json_fn),
'Missing info file %s' % info_json_fn) 'Missing info file %s' % info_json_fn)
with io.open(info_json_fn, encoding='utf-8') as infof: with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
expect_info_dict(self, info_dict, tc.get('info_dict', {})) expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally: finally:
try_rm_tcs_files() try_rm_tcs_files()

View File

@@ -21,7 +21,7 @@ from youtube_dl.extractor import (
NPOIE, NPOIE,
ComedyCentralIE, ComedyCentralIE,
NRKTVIE, NRKTVIE,
RaiTVIE, RaiPlayIE,
VikiIE, VikiIE,
ThePlatformIE, ThePlatformIE,
ThePlatformFeedIE, ThePlatformFeedIE,
@@ -258,9 +258,9 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
class TestRaiSubtitles(BaseTestSubtitles): class TestRaiPlaySubtitles(BaseTestSubtitles):
url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html' url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
IE = RaiTVIE IE = RaiPlayIE
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True

View File

@@ -45,7 +45,7 @@ class ADNIE(InfoExtractor):
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), bytes_to_intlist(base64.b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\xb5@\xcfq\xa3\x98"N\xe4\xf3\x12\x98}}\x16\xd8'), bytes_to_intlist(b'\nd\xaf\xd2J\xd0\xfc\xe1\xfc\xdf\xb61\xe8\xe1\xf0\xcc'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) bytes_to_intlist(base64.b64decode(enc_subtitles[:24]))
)) ))
subtitles_json = self._parse_json( subtitles_json = self._parse_json(

View File

@@ -107,7 +107,10 @@ class AENetworksIE(AENetworksBaseIE):
} }
video_id = self._html_search_meta('aetn:VideoID', webpage) video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex( media_url = self._search_regex(
r"media_url\s*=\s*'([^']+)'", webpage, 'video url') [r"media_url\s*=\s*'(?P<url>[^']+)'",
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
webpage, 'video url', group='url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex( theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata) info = self._parse_theplatform_metadata(theplatform_metadata)

View File

@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_xpath from ..compat import compat_xpath
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
xpath_text, xpath_text,
@@ -72,13 +73,54 @@ class AfreecaTVIE(InfoExtractor):
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793', 'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': { 'info_dict': {
'id': '18650793', 'id': '18650793',
'ext': 'flv', 'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디', 'uploader': '윈아디',
'uploader_id': 'badkids', 'uploader_id': 'badkids',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!', 'duration': 107,
}, },
'params': { 'params': {
'skip_download': True, # requires rtmpdump 'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '10481652_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '10481652_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
}, },
}, { }, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
@@ -94,7 +136,7 @@ class AfreecaTVIE(InfoExtractor):
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key) m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
if m: if m:
video_key['upload_date'] = m.group('upload_date') video_key['upload_date'] = m.group('upload_date')
video_key['part'] = m.group('part') video_key['part'] = int(m.group('part'))
return video_key return video_key
def _real_extract(self, url): def _real_extract(self, url):
@@ -109,23 +151,64 @@ class AfreecaTVIE(InfoExtractor):
raise ExtractorError('Specified AfreecaTV video does not exist', raise ExtractorError('Specified AfreecaTV video does not exist',
expected=True) expected=True)
video_url_raw = video_element.text video_url = video_element.text.strip()
app, playpath = video_url_raw.split('mp4:')
title = xpath_text(video_xml, './track/title', 'title', fatal=True) title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader') uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(video_xml, './track/duration', duration = int_or_none(xpath_text(
'duration')) video_xml, './track/duration', 'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
return { common_entry = {
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}
info = common_entry.copy()
info.update({
'id': video_id,
'title': title,
'duration': duration,
})
if not video_url:
entries = []
for file_num, file_element in enumerate(
video_element.findall(compat_xpath('./file')), start=1):
file_url = file_element.text
if not file_url:
continue
video_key = self.parse_video_key(file_element.get('key', ''))
if not video_key:
continue
file_duration = int_or_none(file_element.get('duration'))
part = video_key.get('part', file_num)
format_id = '%s_%s' % (video_id, part)
formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'title': '%s (part %d)' % (title, part),
'upload_date': video_key.get('upload_date'),
'duration': file_duration,
'formats': formats,
})
entries.append(file_info)
entries_info = info.copy()
entries_info.update({
'_type': 'multi_video',
'entries': entries,
})
return entries_info
info = {
'id': video_id, 'id': video_id,
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
'title': title, 'title': title,
'uploader': uploader, 'uploader': uploader,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
@@ -133,6 +216,21 @@ class AfreecaTVIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }
if determine_ext(video_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
app, playpath = video_url.split('mp4:')
info.update({
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
})
return info
class AfreecaTVGlobalIE(AfreecaTVIE): class AfreecaTVGlobalIE(AfreecaTVIE):
IE_NAME = 'afreecatv:global' IE_NAME = 'afreecatv:global'

View File

@@ -15,12 +15,12 @@ class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?' _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = { _TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '2e3e7486ba5d180e829d453875b9b8bf', 'md5': '8d02f53ee39cf006009180e21df1f3ba',
'info_dict': { 'info_dict': {
'id': '6x4q2w', 'id': '6x4q2w',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': r're:https?://vid\.ly/(?P<id>[0-9a-z-]+)/poster', 'thumbnail': r're:https?://.*/poster\.jpg',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800, 'timestamp': 1422487800,
'upload_date': '20150128', 'upload_date': '20150128',
@@ -34,21 +34,13 @@ class AirMozillaIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid.ly/(.*?)/embed', webpage, 'id') video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata') jwconfig = self._parse_json(self._search_regex(
metadata = self._parse_json(jwconfig, video_id) r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
formats = [{
'url': source['file'],
'ext': source['type'],
'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'),
'format': source['label'],
'height': int(source['label'].rstrip('p')),
} for source in metadata['playlist'][0]['sources']]
self._sort_formats(formats)
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
view_count = int_or_none(self._html_search_regex( view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)', r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
@@ -58,17 +50,17 @@ class AirMozillaIE(InfoExtractor):
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)', r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False)) webpage, 'duration', fatal=False))
return { info_dict.update({
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'formats': formats,
'url': self._og_search_url(webpage), 'url': self._og_search_url(webpage),
'display_id': display_id, 'display_id': display_id,
'thumbnail': metadata['playlist'][0].get('image'),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'timestamp': timestamp, 'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None), 'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage), 'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
} })
return info_dict

View File

@@ -361,7 +361,7 @@ class BBCCoUkIE(InfoExtractor):
fmt.update({ fmt.update({
'width': width, 'width': width,
'height': height, 'height': height,
'vbr': bitrate, 'tbr': bitrate,
'vcodec': encoding, 'vcodec': encoding,
}) })
else: else:
@@ -370,7 +370,7 @@ class BBCCoUkIE(InfoExtractor):
'acodec': encoding, 'acodec': encoding,
'vcodec': 'none', 'vcodec': 'none',
}) })
if protocol == 'http': if protocol in ('http', 'https'):
# Direct link # Direct link
fmt.update({ fmt.update({
'url': href, 'url': href,
@@ -389,6 +389,8 @@ class BBCCoUkIE(InfoExtractor):
'rtmp_live': False, 'rtmp_live': False,
'ext': 'flv', 'ext': 'flv',
}) })
else:
continue
formats.append(fmt) formats.append(fmt)
elif kind == 'captions': elif kind == 'captions':
subtitles = self.extract_subtitles(media, programme_id) subtitles = self.extract_subtitles(media, programme_id)

View File

@@ -7,6 +7,7 @@ from ..utils import float_or_none
class CanvasIE(InfoExtractor): class CanvasIE(InfoExtractor):
IE_DESC = 'canvas.be and een.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',

View File

@@ -12,13 +12,14 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
sanitized_Request, sanitized_Request,
unescapeHTML,
urlencode_postdata, urlencode_postdata,
USER_AGENTS, USER_AGENTS,
) )
class CeskaTelevizeIE(InfoExtractor): class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P<id>[^/#?]+)/*(?:[#?].*)?$' _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': { 'info_dict': {
@@ -62,40 +63,12 @@ class CeskaTelevizeIE(InfoExtractor):
}, },
'skip': 'Georestricted to Czech Republic', 'skip': 'Georestricted to Czech Republic',
}, { }, {
# video with 18+ caution trailer 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 'only_matching': True,
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
url = url.replace('/porady/', '/ivysilani/').replace('/video/', '') playlist_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
@@ -103,13 +76,28 @@ class CeskaTelevizeIE(InfoExtractor):
if '%s</p>' % NOT_AVAILABLE_STRING in webpage: if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
typ = self._html_search_regex( type_ = None
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', webpage, 'type') episode_id = None
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', webpage, 'episode_id') playlist = self._parse_json(
self._search_regex(
r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
default='{}'), playlist_id)
if playlist:
type_ = playlist.get('type')
episode_id = playlist.get('id')
if not type_:
type_ = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
webpage, 'type')
if not episode_id:
episode_id = self._html_search_regex(
r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
webpage, 'episode_id')
data = { data = {
'playlist[0][type]': typ, 'playlist[0][type]': type_,
'playlist[0][id]': episode_id, 'playlist[0][id]': episode_id,
'requestUrl': compat_urllib_parse_urlparse(url).path, 'requestUrl': compat_urllib_parse_urlparse(url).path,
'requestSource': 'iVysilani', 'requestSource': 'iVysilani',
@@ -245,3 +233,47 @@ class CeskaTelevizeIE(InfoExtractor):
yield line yield line
return '\r\n'.join(_fix_subtitle(subtitles)) return '\r\n'.join(_fix_subtitle(subtitles))
class CeskaTelevizePoradyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
_TESTS = [{
# video with 18+ caution trailer
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
'info_dict': {
'id': '215562210900007-bogotart',
'title': 'Queer: Bogotart',
'description': 'Alternativní průvodce současným queer světem',
},
'playlist': [{
'info_dict': {
'id': '61924494876844842',
'ext': 'mp4',
'title': 'Queer: Bogotart (Varování 18+)',
'duration': 10.2,
},
}, {
'info_dict': {
'id': '61924494877068022',
'ext': 'mp4',
'title': 'Queer: Bogotart (Queer)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 1558.3,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = unescapeHTML(self._search_regex(
r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'iframe player url', group='url'))
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())

View File

@@ -33,10 +33,17 @@ class ChaturbateIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
m3u8_formats = [(m.group('id').lower(), m.group('url')) for m in re.finditer( m3u8_urls = []
r'hlsSource(?P<id>.+?)\s*=\s*(?P<q>["\'])(?P<url>http.+?)(?P=q)', webpage)]
if not m3u8_formats: for m in re.finditer(
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
'url').replace('_fast', '')
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
if m3u8_url not in m3u8_urls:
m3u8_urls.append(m3u8_url)
if not m3u8_urls:
error = self._search_regex( error = self._search_regex(
[r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>', [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'], r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
@@ -50,7 +57,8 @@ class ChaturbateIE(InfoExtractor):
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
formats = [] formats = []
for m3u8_id, m3u8_url in m3u8_formats: for m3u8_url in m3u8_urls:
m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_url, video_id, ext='mp4',
# ffmpeg skips segments for fast m3u8 # ffmpeg skips segments for fast m3u8

View File

@@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64 import base64
@@ -547,6 +548,34 @@ class InfoExtractor(object):
return encoding return encoding
def __check_blocked(self, content):
first_block = content[:512]
if ('<title>Access to this site is blocked</title>' in content and
'Websense' in first_block):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in first_block:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
'blocklist.rkn.gov.ru' in content):
raise ExtractorError(
'Access to this webpage has been blocked by decision of the Russian government. '
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
content_type = urlh.headers.get('Content-Type', '') content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read() webpage_bytes = urlh.read()
@@ -588,25 +617,7 @@ class InfoExtractor(object):
except LookupError: except LookupError:
content = webpage_bytes.decode('utf-8', 'replace') content = webpage_bytes.decode('utf-8', 'replace')
if ('<title>Access to this site is blocked</title>' in content and self.__check_blocked(content)
'Websense' in content[:512]):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
'Websense information URL', default=None)
if blocked_iframe:
msg += ' Visit %s for more details' % blocked_iframe
raise ExtractorError(msg, expected=True)
if '<title>The URL you requested has been blocked</title>' in content[:512]:
msg = (
'Access to this webpage has been blocked by Indian censorship. '
'Use a VPN or proxy server (with --proxy) to route around it.')
block_msg = self._html_search_regex(
r'</h1><p>(.*?)</p>',
content, 'block message', default=None)
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
return content return content

View File

@@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@@ -49,6 +51,48 @@ class CuriosityStreamBaseIE(InfoExtractor):
limelight_media_id = media['limelight_media_id'] limelight_media_id = media['limelight_media_id']
title = media['title'] title = media['title']
formats = []
for encoding in media.get('encodings', []):
m3u8_url = encoding.get('master_playlist_url')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
encoding_url = encoding.get('url')
file_url = encoding.get('file_url')
if not encoding_url and not file_url:
continue
f = {
'width': int_or_none(encoding.get('width')),
'height': int_or_none(encoding.get('height')),
'vbr': int_or_none(encoding.get('video_bitrate')),
'abr': int_or_none(encoding.get('audio_bitrate')),
'filesize': int_or_none(encoding.get('size_in_bytes')),
'vcodec': encoding.get('video_codec'),
'acodec': encoding.get('audio_codec'),
'container': encoding.get('container_type'),
}
for f_url in (encoding_url, file_url):
if not f_url:
continue
fmt = f.copy()
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
if rtmp:
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': 'rtmp',
})
else:
fmt.update({
'url': f_url,
'format_id': 'http',
})
formats.append(fmt)
self._sort_formats(formats)
subtitles = {} subtitles = {}
for closed_caption in media.get('closed_captions', []): for closed_caption in media.get('closed_captions', []):
sub_url = closed_caption.get('file') sub_url = closed_caption.get('file')
@@ -60,16 +104,14 @@ class CuriosityStreamBaseIE(InfoExtractor):
}) })
return { return {
'_type': 'url_transparent',
'id': video_id, 'id': video_id,
'url': 'limelight:media:' + limelight_media_id, 'formats': formats,
'title': title, 'title': title,
'description': media.get('description'), 'description': media.get('description'),
'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
'duration': int_or_none(media.get('duration')), 'duration': int_or_none(media.get('duration')),
'tags': media.get('tags'), 'tags': media.get('tags'),
'subtitles': subtitles, 'subtitles': subtitles,
'ie_key': 'LimelightMedia',
} }
@@ -78,14 +120,12 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
_VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)' _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'https://app.curiositystream.com/video/2', 'url': 'https://app.curiositystream.com/video/2',
'md5': 'a0074c190e6cddaf86900b28d3e9ee7a', 'md5': '262bb2f257ff301115f1973540de8983',
'info_dict': { 'info_dict': {
'id': '2', 'id': '2',
'ext': 'mp4', 'ext': 'mp4',
'title': 'How Did You Develop The Internet?', 'title': 'How Did You Develop The Internet?',
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
'timestamp': 1448388615,
'upload_date': '20151124',
} }
} }
@@ -105,7 +145,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
'title': 'Curious Minds: The Internet', 'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?', 'description': 'How is the internet shaping our lives in the 21st Century?',
}, },
'playlist_mincount': 17, 'playlist_mincount': 12,
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@@ -0,0 +1,59 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import parse_duration
class DiscoveryVRIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
'md5': '32b1929798c464a54356378b7912eca4',
'info_dict': {
'id': 'discovery-vr-an-introduction',
'ext': 'mp4',
'title': 'Discovery VR - An Introduction',
'description': 'md5:80d418a10efb8899d9403e61d8790f06',
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
bootstrap_data = self._search_regex(
r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
webpage, 'bootstrap data')
bootstrap_data = self._parse_json(
bootstrap_data.encode('utf-8').decode('unicode_escape'),
display_id)
videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
video_data = next(video for video in videos if video.get('slug') == display_id)
series = video_data.get('showTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
formats = []
for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
f_url = video_data.get(f)
if not f_url:
continue
formats.append({
'format_id': format_id,
'url': f_url,
})
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnail': video_data.get('thumbnail'),
'duration': parse_duration(video_data.get('runTime')),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -165,7 +165,10 @@ from .ccc import CCCIE
from .ccma import CCMAIE from .ccma import CCMAIE
from .cctv import CCTVIE from .cctv import CCTVIE
from .cda import CDAIE from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import (
CeskaTelevizeIE,
CeskaTelevizePoradyIE,
)
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE from .chaturbate import ChaturbateIE
@@ -273,6 +276,7 @@ from .discoverygo import (
DiscoveryGoPlaylistIE, DiscoveryGoPlaylistIE,
) )
from .discoverynetworks import DiscoveryNetworksDeIE from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE from .dispeak import DigitallySpeakingIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
@@ -537,6 +541,7 @@ from .mangomolo import (
) )
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .medici import MediciIE
from .meipai import MeipaiIE from .meipai import MeipaiIE
from .melonvod import MelonVODIE from .melonvod import MelonVODIE
from .meta import METAIE from .meta import METAIE
@@ -833,7 +838,11 @@ from .rozhlas import RozhlasIE
from .rtbf import RTBFIE from .rtbf import RTBFIE
from .rte import RteIE, RteRadioIE from .rte import RteIE, RteRadioIE
from .rtlnl import RtlNlIE from .rtlnl import RtlNlIE
from .rtl2 import RTL2IE from .rtl2 import (
RTL2IE,
RTL2YouIE,
RTL2YouSeriesIE,
)
from .rtp import RTPIE from .rtp import RTPIE
from .rts import RTSIE from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
@@ -975,6 +984,7 @@ from .theplatform import (
from .thescene import TheSceneIE from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE from .thestar import TheStarIE
from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
@@ -1023,6 +1033,7 @@ from .tv2 import (
) )
from .tv3 import TV3IE from .tv3 import TV3IE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE from .tva import TVAIE
from .tvanouvelles import ( from .tvanouvelles import (
TVANouvellesIE, TVANouvellesIE,
@@ -1186,6 +1197,7 @@ from .vrv import (
VRVIE, VRVIE,
VRVSeriesIE, VRVSeriesIE,
) )
from .vshare import VShareIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .vube import VubeIE from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE

View File

@@ -730,6 +730,21 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
# YouTube <object> embed
{
'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
'md5': '516718101ec834f74318df76259fb3cc',
'info_dict': {
'id': 'msN87y-iEx0',
'ext': 'webm',
'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
'upload_date': '20080526',
'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
'uploader': 'Christopher Sykes',
'uploader_id': 'ChristopherJSykes',
},
'add_ie': ['Youtube'],
},
# Camtasia studio # Camtasia studio
{ {
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
@@ -1080,6 +1095,21 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Kaltura'], 'add_ie': ['Kaltura'],
}, },
{
# Kaltura iframe embed
'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
'info_dict': {
'id': '0_f2cfbpwy',
'ext': 'mp4',
'title': 'I. M. Pei: A Centennial Celebration',
'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
'upload_date': '20170403',
'uploader_id': 'batchUser',
'timestamp': 1491232186,
},
'add_ie': ['Kaltura'],
},
# Eagle.Platform embed (generic URL) # Eagle.Platform embed (generic URL)
{ {
'url': 'http://lenta.ru/news/2015/03/06/navalny/', 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1923,6 +1953,7 @@ class GenericIE(InfoExtractor):
data-video-url=| data-video-url=|
<embed[^>]+?src=| <embed[^>]+?src=|
embedSWF\(?:\s*| embedSWF\(?:\s*|
<object[^>]+data=|
new\s+SWFObject\( new\s+SWFObject\(
) )
(["\']) (["\'])

View File

@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
}], }],
}, },
}, },
'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
@@ -107,27 +108,37 @@ class KalturaIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
mobj = ( mobj = (
re.search( re.search(
r"""(?xs) r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\( kWidget\.(?:thumb)?[Ee]mbed\(
\{.*? \{.*?
(?P<q1>['\"])wid(?P=q1)\s*:\s* (?P<q1>['"])wid(?P=q1)\s*:\s*
(?P<q2>['\"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['\"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage) or """, webpage) or
re.search( re.search(
r'''(?xs) r'''(?xs)
(?P<q1>["\']) (?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
(?P=q1).*? (?P=q1).*?
(?: (?:
entry_?[Ii]d| entry_?[Ii]d|
(?P<q2>["\'])entry_?[Ii]d(?P=q2) (?P<q2>["'])entry_?[Ii]d(?P=q2)
)\s*:\s* )\s*:\s*
(?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage)) ''', webpage) or
re.search(
r'''(?xs)
<iframe[^>]+src=(?P<q1>["'])
(?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)*
[?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?P=q1)
''', webpage)
)
if mobj: if mobj:
embed_info = mobj.groupdict() embed_info = mobj.groupdict()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info url = 'kaltura:%(partner_id)s:%(id)s' % embed_info

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
unified_strdate,
update_url_query,
urlencode_postdata,
)
class MediciIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
_TEST = {
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
'md5': '004c21bb0a57248085b6ff3fec72719d',
'info_dict': {
'id': '3059',
'ext': 'flv',
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170408',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
# Sets csrftoken cookie
self._download_webpage(url, video_id)
MEDICI_URL = 'http://www.medici.tv/'
data = self._download_json(
MEDICI_URL, video_id,
data=urlencode_postdata({
'json': 'true',
'page': '/%s' % video_id,
'timezone_offset': -420,
}), headers={
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
'X-Requested-With': 'XMLHttpRequest',
'Referer': MEDICI_URL,
'Content-Type': 'application/x-www-form-urlencoded',
})
video = data['video']['videos']['video1']
title = video.get('nom') or data['title']
video_id = video.get('id') or video_id
formats = self._extract_f4m_formats(
update_url_query(video['url_akamai'], {
'hdcore': '3.1.0',
'plugin=aasp': '3.1.0.43.124',
}), video_id, f4m_id='hds')
description = data.get('meta_description')
thumbnail = video.get('url_thumbnail') or data.get('main_image')
upload_date = unified_strdate(data['video'].get('date'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats,
}

View File

@@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor):
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>', r'/listeners/?">([0-9,.]+)</a>',
r'm-tooltip=["\']([\d,.]+) plays'], r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
webpage, 'play count', default=None)) webpage, 'play count', default=None))
return { return {
@@ -138,12 +138,12 @@ class MixcloudPlaylistBaseIE(InfoExtractor):
def _get_user_description(self, page_content): def _get_user_description(self, page_content):
return self._html_search_regex( return self._html_search_regex(
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>', r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
page_content, 'user description', fatal=False) page_content, 'user description', fatal=False)
class MixcloudUserIE(MixcloudPlaylistBaseIE): class MixcloudUserIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$' _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
IE_NAME = 'mixcloud:user' IE_NAME = 'mixcloud:user'
_TESTS = [{ _TESTS = [{
@@ -151,7 +151,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370', 'description': 'md5:def36060ac8747b3aabca54924897e47',
}, },
'playlist_mincount': 11, 'playlist_mincount': 11,
}, { }, {
@@ -159,7 +159,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': { 'info_dict': {
'id': 'dholbach_uploads', 'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)', 'title': 'Daniel Holbach (uploads)',
'description': 'md5:327af72d1efeb404a8216c27240d1370', 'description': 'md5:def36060ac8747b3aabca54924897e47',
}, },
'playlist_mincount': 11, 'playlist_mincount': 11,
}, { }, {
@@ -167,7 +167,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': { 'info_dict': {
'id': 'dholbach_favorites', 'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)', 'title': 'Daniel Holbach (favorites)',
'description': 'md5:327af72d1efeb404a8216c27240d1370', 'description': 'md5:def36060ac8747b3aabca54924897e47',
}, },
'params': { 'params': {
'playlist_items': '1-100', 'playlist_items': '1-100',
@@ -178,7 +178,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': { 'info_dict': {
'id': 'dholbach_listens', 'id': 'dholbach_listens',
'title': 'Daniel Holbach (listens)', 'title': 'Daniel Holbach (listens)',
'description': 'md5:327af72d1efeb404a8216c27240d1370', 'description': 'md5:def36060ac8747b3aabca54924897e47',
}, },
'params': { 'params': {
'playlist_items': '1-100', 'playlist_items': '1-100',
@@ -216,7 +216,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$' _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
IE_NAME = 'mixcloud:playlist' IE_NAME = 'mixcloud:playlist'
_TESTS = [{ _TESTS = [{
@@ -229,12 +229,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'playlist_mincount': 16, 'playlist_mincount': 16,
}, { }, {
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/', 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
'info_dict': { 'only_matching': True,
'id': 'maxvibes_jazzcat-on-ness-radio',
'title': 'Jazzcat on Ness Radio',
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
},
'playlist_mincount': 23
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@@ -243,15 +238,16 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
playlist_id = mobj.group('playlist') playlist_id = mobj.group('playlist')
video_id = '%s_%s' % (user_id, playlist_id) video_id = '%s_%s' % (user_id, playlist_id)
profile = self._download_webpage( webpage = self._download_webpage(
url, user_id, url, user_id,
note='Downloading playlist page', note='Downloading playlist page',
errnote='Unable to download playlist page') errnote='Unable to download playlist page')
description = self._get_user_description(profile) title = self._html_search_regex(
playlist_title = self._html_search_regex( r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>', webpage, 'playlist title',
profile, 'playlist title') default=None) or self._og_search_title(webpage, fatal=False)
description = self._get_user_description(webpage)
entries = OnDemandPagedList( entries = OnDemandPagedList(
functools.partial( functools.partial(
@@ -259,11 +255,11 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'), '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
self._PAGE_SIZE) self._PAGE_SIZE)
return self.playlist_result(entries, video_id, playlist_title, description) return self.playlist_result(entries, video_id, title, description)
class MixcloudStreamIE(MixcloudPlaylistBaseIE): class MixcloudStreamIE(MixcloudPlaylistBaseIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$' _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
IE_NAME = 'mixcloud:stream' IE_NAME = 'mixcloud:stream'
_TEST = { _TEST = {

View File

@@ -313,9 +313,9 @@ class NPOIE(NPOBaseIE):
class NPOLiveIE(NPOBaseIE): class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live' IE_NAME = 'npo.nl:live'
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
_TEST = { _TESTS = [{
'url': 'http://www.npo.nl/live/npo-1', 'url': 'http://www.npo.nl/live/npo-1',
'info_dict': { 'info_dict': {
'id': 'LI_NL1_4188102', 'id': 'LI_NL1_4188102',
@@ -327,10 +327,13 @@ class NPOLiveIE(NPOBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} }
} }, {
'url': 'http://www.npo.nl/live',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url) or 'npo-1'
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)

View File

@@ -176,7 +176,7 @@ class OdnoklassnikiIE(InfoExtractor):
}) })
return info return info
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd')) quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd', 'full'))
formats = [{ formats = [{
'url': f['url'], 'url': f['url'],

View File

@@ -20,7 +20,7 @@ class PeriscopeBaseIE(InfoExtractor):
class PeriscopeIE(PeriscopeBaseIE): class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope' IE_DESC = 'Periscope'
IE_NAME = 'periscope' IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
# Alive example URLs can be found here http://onperiscope.com/ # Alive example URLs can be found here http://onperiscope.com/
_TESTS = [{ _TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
@@ -41,6 +41,9 @@ class PeriscopeIE(PeriscopeBaseIE):
}, { }, {
'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@@ -103,7 +106,7 @@ class PeriscopeIE(PeriscopeBaseIE):
class PeriscopeUserIE(PeriscopeBaseIE): class PeriscopeUserIE(PeriscopeBaseIE):
_VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$' _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
IE_DESC = 'Periscope user videos' IE_DESC = 'Periscope user videos'
IE_NAME = 'periscope:user' IE_NAME = 'periscope:user'

View File

@@ -97,6 +97,25 @@ class RaiBaseIE(InfoExtractor):
'formats': formats, 'formats': formats,
}.items() if v is not None) }.items() if v is not None)
@staticmethod
def _extract_subtitles(url, subtitle_url):
subtitles = {}
if subtitle_url and isinstance(subtitle_url, compat_str):
subtitle_url = urljoin(url, subtitle_url)
STL_EXT = '.stl'
SRT_EXT = '.srt'
subtitles['it'] = [{
'ext': 'stl',
'url': subtitle_url,
}]
if subtitle_url.endswith(STL_EXT):
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
subtitles['it'].append({
'ext': 'srt',
'url': srt_url,
})
return subtitles
class RaiPlayIE(RaiBaseIE): class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
@@ -168,6 +187,8 @@ class RaiPlayIE(RaiBaseIE):
timestamp = unified_timestamp(try_get( timestamp = unified_timestamp(try_get(
media, lambda x: x['availabilities'][0]['start'], compat_str)) media, lambda x: x['availabilities'][0]['start'], compat_str))
subtitles = self._extract_subtitles(url, video.get('subtitles'))
info = { info = {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@@ -183,6 +204,7 @@ class RaiPlayIE(RaiBaseIE):
'season_number': int_or_none(try_get( 'season_number': int_or_none(try_get(
media, lambda x: x['isPartOf']['numeroStagioni'])), media, lambda x: x['isPartOf']['numeroStagioni'])),
'season': media.get('stagione') or None, 'season': media.get('stagione') or None,
'subtitles': subtitles,
} }
info.update(relinker_info) info.update(relinker_info)
@@ -307,17 +329,7 @@ class RaiIE(RaiBaseIE):
'url': compat_urlparse.urljoin(url, thumbnail_url), 'url': compat_urlparse.urljoin(url, thumbnail_url),
}) })
subtitles = {} subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
captions = media.get('subtitlesUrl')
if captions:
STL_EXT = '.stl'
SRT_EXT = '.srt'
if captions.endswith(STL_EXT):
captions = captions[:-len(STL_EXT)] + SRT_EXT
subtitles['it'] = [{
'ext': 'srt',
'url': captions,
}]
info = { info = {
'id': content_id, 'id': content_id,

View File

@@ -13,15 +13,15 @@ from ..utils import (
class RBMARadioIE(InfoExtractor): class RBMARadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
'info_dict': { 'info_dict': {
'id': 'ford-lopatin-live-at-primavera-sound-2011', 'id': 'ford-lopatin-live-at-primavera-sound-2011',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Main Stage - Ford & Lopatin', 'title': 'Main Stage - Ford & Lopatin at Primavera Sound',
'description': 'md5:4f340fb48426423530af5a9d87bd7b91', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 2452, 'duration': 2452,
'timestamp': 1307103164, 'timestamp': 1307103164,

View File

@@ -1,13 +1,26 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..aes import aes_cbc_decrypt
from ..compat import (
compat_ord,
compat_str,
)
from ..utils import (
bytes_to_intlist,
ExtractorError,
intlist_to_bytes,
int_or_none,
strip_or_none,
)
class RTL2IE(InfoExtractor): class RTL2IE(InfoExtractor):
IE_NAME = 'rtl2'
_VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))' _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
@@ -98,3 +111,98 @@ class RTL2IE(InfoExtractor):
'duration': int_or_none(video_info.get('duration')), 'duration': int_or_none(video_info.get('duration')),
'formats': formats, 'formats': formats,
} }
class RTL2YouBaseIE(InfoExtractor):
_BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
class RTL2YouIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you'
_VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
'info_dict': {
'id': '15740',
'ext': 'mp4',
'title': 'MJUNIK Home of YOU - #307 Hirn, wo bist du?!',
'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
'age_limit': 12,
},
}, {
'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
'only_matching': True,
}]
_AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
_GEO_COUNTRIES = ['DE']
def _real_extract(self, url):
video_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':')
stream_url = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(data)),
bytes_to_intlist(self._AES_KEY),
bytes_to_intlist(base64.b64decode(iv))
))
if b'rtl2_you_video_not_found' in stream_url:
raise ExtractorError('video not found', expected=True)
formats = self._extract_m3u8_formats(
stream_url[:-compat_ord(stream_url[-1])].decode(),
video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
video_data = self._download_json(
self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
series = video_data.get('formatTitle')
title = episode = video_data.get('title') or series
if series and series != title:
title = '%s - %s' % (series, title)
return {
'id': video_id,
'title': title,
'formats': formats,
'description': strip_or_none(video_data.get('description')),
'thumbnail': video_data.get('image'),
'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
'series': series,
'episode': episode,
'age_limit': int_or_none(video_data.get('minimumAge')),
}
class RTL2YouSeriesIE(RTL2YouBaseIE):
IE_NAME = 'rtl2:you:series'
_VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
_TEST = {
'url': 'http://you.rtl2.de/videos/115/dragon-ball',
'info_dict': {
'id': '115',
},
'playlist_mincount': 5,
}
def _real_extract(self, url):
series_id = self._match_id(url)
stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'videos',
series_id, query={
'formatId': series_id,
'limit': 1000000000,
})
entries = []
for video in stream_data.get('videos', []):
video_id = compat_str(video['videoId'])
if not video_id:
continue
entries.append(self.url_result(
'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
'RTL2You', video_id))
return self.playlist_result(entries, series_id)

View File

@@ -0,0 +1,32 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
class TheSunIE(InfoExtractor):
_VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
'info_dict': {
'id': '2261604',
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
},
'playlist_count': 2,
}
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
entries = []
for ooyala_id in re.findall(
r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
webpage):
entries.append(OoyalaIE._build_url_result(ooyala_id))
return self.playlist_result(
entries, article_id, self._og_search_title(webpage, fatal=False))

View File

@@ -0,0 +1,79 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
extract_attributes,
get_element_by_class,
int_or_none,
parse_duration,
parse_iso8601,
)
class TV5MondePlusIE(InfoExtractor):
IE_DESC = 'TV5MONDE+'
_VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
_TEST = {
'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'md5': '12130fc199f020673138a83466542ec6',
'info_dict': {
'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
'ext': 'mp4',
'title': 'Tdah, mon amour - Enfants',
'description': 'md5:230e3aca23115afcf8006d1bece6df74',
'upload_date': '20170401',
'timestamp': 1491022860,
}
}
_GEO_BYPASS = False
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
series = get_element_by_class('video-detail__title', webpage)
title = episode = get_element_by_class(
'video-detail__subtitle', webpage) or series
if series and series != title:
title = '%s - %s' % (series, title)
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
video_files = self._parse_json(
vpl_data['data-broadcast'], display_id).get('files', [])
formats = []
for video_file in video_files:
v_url = video_file.get('url')
if not v_url:
continue
video_format = video_file.get('format') or determine_ext(v_url)
if video_format == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, display_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': v_url,
'format_id': video_format,
})
self._sort_formats(formats)
return {
'id': display_id,
'display_id': display_id,
'title': title,
'description': clean_html(get_element_by_class('video-detail__description', webpage)),
'thumbnail': vpl_data.get('data-image'),
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
'formats': formats,
'episode': episode,
'series': series,
}

View File

@@ -9,6 +9,7 @@ from .common import InfoExtractor
class VierIE(InfoExtractor): class VierIE(InfoExtractor):
IE_NAME = 'vier' IE_NAME = 'vier'
IE_DESC = 'vier.be and vijf.be'
_VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',

View File

@@ -10,6 +10,7 @@ from ..utils import (
class VRTIE(InfoExtractor): class VRTIE(InfoExtractor):
IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
_VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*' _VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
_TESTS = [ _TESTS = [
# deredactie.be # deredactie.be

View File

@@ -0,0 +1,38 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class VShareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://vshare.io/d/0f64ce6',
'md5': '16d7b8fef58846db47419199ff1ab3e7',
'info_dict': {
'id': '0f64ce6',
'title': 'vl14062007715967',
'ext': 'mp4',
}
}, {
'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://vshare.io/d/%s' % video_id, video_id)
title = self._html_search_regex(
r'(?s)<div id="root-container">(.+?)<br/>', webpage, 'title')
video_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:https?:)?//.+?)\1[^>]*>[Cc]lick\s+here',
webpage, 'video url', group='url')
return {
'id': video_id,
'title': title,
'url': video_url,
}

View File

@@ -1,12 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class WorldStarHipHopIE(InfoExtractor): class WorldStarHipHopIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P<id>.*)' _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO', 'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': '9d04de741161603bf7071bbf4e883186', 'md5': '9d04de741161603bf7071bbf4e883186',
@@ -17,48 +15,26 @@ class WorldStarHipHopIE(InfoExtractor):
} }
}, { }, {
'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO', 'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
'md5': 'dc1c76c83ecc4190bb1eb143899b87d3', 'only_matching': True,
'info_dict': {
'id': 'wshh6a7q1ny0G34ZwuIO',
'ext': 'mp4',
'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?', webpage) entries = self._parse_html5_media_entries(url, webpage, video_id)
if m_vevo_id is not None:
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
video_url = self._search_regex( if not entries:
[r'so\.addVariable\("file","(.*?)"\)', return self.url_result(url, 'Generic')
r'<div class="artlist">\s*<a[^>]+href="([^"]+)">'],
webpage, 'video URL')
if 'youtube' in video_url: title = self._html_search_regex(
return self.url_result(video_url, ie='Youtube')
video_title = self._html_search_regex(
[r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>', [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'], r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
webpage, 'title') webpage, 'title')
# Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. info = entries[0]
thumbnail = self._html_search_regex( info.update({
r'rel="image_src" href="(.*)" />', webpage, 'thumbnail',
default=None)
if not thumbnail:
_title = r'candytitles.*>(.*)</span>'
mobj = re.search(_title, webpage)
if mobj is not None:
video_title = mobj.group(1)
return {
'id': video_id, 'id': video_id,
'url': video_url, 'title': title,
'title': video_title, })
'thumbnail': thumbnail, return info
}

View File

@@ -28,6 +28,8 @@ class XFileShareIE(InfoExtractor):
('streamin.to', 'Streamin.To'), ('streamin.to', 'Streamin.To'),
('xvidstage.com', 'XVIDSTAGE'), ('xvidstage.com', 'XVIDSTAGE'),
('vidabc.com', 'Vid ABC'), ('vidabc.com', 'Vid ABC'),
('vidbom.com', 'VidBom'),
('vidlo.us', 'vidlo'),
) )
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2017.04.02' __version__ = '2017.04.11'