Compare commits

...

46 Commits

Author SHA1 Message Date
Sergey M․
b3d7dce429 release 2016.08.17 2016-08-17 06:21:21 +07:00
Sergey M․
a44694ab4e [ChangeLog] Actualize 2016-08-17 06:19:22 +07:00
Sergey M․
ab19b46b88 [extremetube] Modernize 2016-08-17 06:02:12 +07:00
Sergey M․
8804f10e6b [tube8] Modernize 2016-08-17 05:46:45 +07:00
Sergey M․
6be17c0870 [mofosex] Extract all formats and modernize (Closes #10335) 2016-08-17 05:45:49 +07:00
Sergey M․
8652770bd2 [keezmovies] Improve and modernize 2016-08-17 05:44:46 +07:00
Sergey M․
2a1321a272 [vbox7:generic] Add support for vbox7 embeds 2016-08-17 01:02:59 +07:00
Sergey M․
9c0fa60bf3 [vbox7] Add support for embed URLs 2016-08-17 00:42:02 +07:00
Sergey M․
502d87c546 [mtg] Improve view count extraction 2016-08-17 00:32:28 +07:00
Sergey M․
b35b0d73d8 [viafree] Add extractor (Closes #10358) 2016-08-17 00:21:30 +07:00
Sergey M․
6e7e4a6edf [mtg] Add support for viafree URLs (#10358) 2016-08-17 00:19:43 +07:00
Remita Amine
53fef319f1 [fxnetworks] extend _VALID_URL to support simpsonsworld.com 2016-08-16 16:22:34 +01:00
Remita Amine
2cabee2a7d [amcnetworks] fix typo 2016-08-16 16:22:34 +01:00
Remita Amine
11f502fac1 [theplatform] extract subtitles with multiple formats from the metadata 2016-08-16 16:22:34 +01:00
Sergey M․
98affc1a48 [xvideos] Fix test 2016-08-16 21:20:15 +07:00
Sergey M․
70a2829fee [xvideos] Fix HLS extraction (Closes #10356) 2016-08-16 21:17:52 +07:00
Remita Amine
837e56c8ee [amcnetworks] extract episode metadata 2016-08-16 14:49:32 +01:00
Remita Amine
b5ddee8c77 [amcnetworks] Add new extractor 2016-08-16 13:44:01 +01:00
Sergey M․
fb64adcbd3 [adobepass] PEP 8 2016-08-16 04:45:21 +07:00
Sergey M․
4f640f2890 [bbc:playlist] Fix tests 2016-08-16 04:43:10 +07:00
Sergey M․
254e64a20a [bbc:playlist] Add support for pagination (Closes #10349) 2016-08-16 04:36:23 +07:00
Remita Amine
818ac213eb [adobepass] add IE suffix to the extractor and remove duplicate constant 2016-08-15 21:36:34 +01:00
Remita Amine
cbef4d5c9f [fxnetworks] add test and check geo restriction 2016-08-15 17:10:45 +01:00
Remita Amine
bf90c46790 [fxnetworks] Add new extractor(closes #9462) 2016-08-15 16:34:32 +01:00
Yen Chi Hsuan
69eb4d699f [cbsnews] Remove invalid tests. CBS Live videos gets deleted soon. 2016-08-15 20:29:22 +08:00
Yen Chi Hsuan
6d8ec8c3b7 [ChangeLog] Update for CBSLocal and related changes 2016-08-15 13:39:43 +08:00
Yen Chi Hsuan
760845ce99 [cbslocal] Adapt to SendtoNewsIE 2016-08-15 13:37:37 +08:00
Yen Chi Hsuan
5c2d087221 [sendtonews] Fix extraction 2016-08-15 13:31:08 +08:00
Yen Chi Hsuan
b6c4e36728 [jwplatform] Parse video_id from JWPlayer data
And remove a mysterious comma from 115c65793a
2016-08-15 13:29:01 +08:00
Sergey M․
1a57b8c18c [zippcast] Remove extractor (Closes #10332)
ZippCast is shut down
2016-08-15 08:25:24 +07:00
Remita Amine
24eb13b1c6 [uplynk,viceland] update tests and change uplynk extractors names 2016-08-14 22:45:43 +01:00
Remita Amine
525e0316c0 [adobepass] fix check for pendingLogout errors 2016-08-14 21:25:43 +01:00
Remita Amine
7e60ce9cf7 [adobepass] clear cache in case of pendingLogout errors 2016-08-14 21:24:33 +01:00
Remita Amine
e811bcf8f8 [viceland] raise ExtractorError for errors other than HTTP 400 2016-08-14 20:13:35 +01:00
Remita Amine
6103f59095 [viceland] remove outdated comment 2016-08-14 19:08:35 +01:00
Remita Amine
9fa5789279 [viceland] fix info extraction(closes #8799) 2016-08-14 19:04:23 +01:00
Remita Amine
d2ac04674d [viceland] Add new extractor(#8799) 2016-08-14 18:04:50 +01:00
Remita Amine
1fd6e30988 [adobepass] create separate class for adobe pass authentication 2016-08-14 18:04:50 +01:00
Sergey M․
884cdb6cd9 [life:embed] Improve extraction 2016-08-14 20:49:11 +07:00
Remita Amine
9771b1f901 [theplatform] use _get_netrc_login_info and fix session expiration check(#10345) 2016-08-14 11:55:28 +01:00
Remita Amine
2118fdd1a9 [common] add separate method for getting netrc ligin info 2016-08-14 11:55:28 +01:00
Sergey M․
320d597c21 [vgtv] Detect geo restricted videos (#10348) 2016-08-14 16:25:14 +07:00
Remita Amine
aaf44a2f47 [uplynk] Add new extractor 2016-08-13 22:53:41 +01:00
Yen Chi Hsuan
fafabc0712 Update ChangeLog for #10342
[skip ci]
2016-08-14 02:33:15 +08:00
Yen Chi Hsuan
409760a932 Merge pull request #10342 from muphil/patch-1
[xiami] bug fix for extractor xiami.py
2016-08-14 02:30:50 +08:00
phi
097eba019d bug fix for extractor xiami.py
Before applying this patch, when downloading resources from xiami.com, it crashes with these:
Traceback (most recent call last):
  File "/home/phi/.local/bin/youtube-dl", line 11, in <module>
    sys.exit(main())
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/__init__.py", line 433, in main
    _real_main(argv)
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/__init__.py", line 423, in _real_main
    retcode = ydl.download(all_urls)
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/YoutubeDL.py", line 1786, in download
    url, force_generic_extractor=self.params.get('force_generic_extractor', False))
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/YoutubeDL.py", line 691, in extract_info
    ie_result = ie.extract(url)
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 347, in extract
    return self._real_extract(url)
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/xiami.py", line 116, in _real_extract
    return self._extract_tracks(self._match_id(url))[0]
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/xiami.py", line 43, in _extract_tracks
    '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id)
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 562, in _download_json
    json_string, video_id, transform_source=transform_source, fatal=fatal)
  File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 568, in _parse_json
    return json.loads(json_string)
  File "/usr/lib/python3.5/json/__init__.py", line 312, in loads
    s.__class__.__name__))
TypeError: the JSON object must be str, not 'NoneType'

This patch solves exactly this problem.
2016-08-14 02:18:59 +08:00
33 changed files with 1049 additions and 539 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.08.13
[debug] youtube-dl version 2016.08.17
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@@ -1,3 +1,31 @@
version 2016.08.17
Core
+ Add _get_netrc_login_info
Extractors
* [mofosex] Extract all formats (#10335)
+ [generic] Add support for vbox7 embeds
+ [vbox7] Add support for embed URLs
+ [viafree] Add extractor (#10358)
+ [mtg] Add support for viafree URLs (#10358)
* [theplatform] Extract all subtitles per language
+ [xvideos] Fix HLS extraction (#10356)
+ [amcnetworks] Add extractor
+ [bbc:playlist] Add support for pagination (#10349)
+ [fxnetworks] Add extractor (#9462)
* [cbslocal] Fix extraction for SendtoNews-based videos
* [sendtonews] Fix extraction
* [jwplatform] Extract video id from JWPlayer data
- [zippcast] Remove extractor (#10332)
+ [viceland] Add extractor (#8799)
+ [adobepass] Add base extractor for Adobe Pass Authentication
* [life:embed] Improve extraction
* [vgtv] Detect geo restricted videos (#10348)
+ [uplynk] Add extractor
* [xiami] Fix extraction (#10342)
version 2016.08.13
Core
@@ -23,6 +51,7 @@ Extractors
+ [pbs] Add support for high quality HTTP formats
+ [crunchyroll] Add support for HLS formats (#10301)
version 2016.08.12
Core

View File

@@ -35,6 +35,7 @@
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
- **AMCNetworks**
- **AnimeOnDemand**
- **anitube.se**
- **AnySex**
@@ -247,6 +248,7 @@
- **Funimation**
- **FunnyOrDie**
- **Fusion**
- **FXNetworks**
- **GameInformer**
- **GameOne**
- **gameone:playlist**
@@ -398,6 +400,7 @@
- **Moviezine**
- **MPORA**
- **MSN**
- **mtg**: MTG services
- **MTV**
- **mtv.de**
- **mtvservices:embedded**
@@ -731,7 +734,6 @@
- **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska
- **tvp:series**
- **TVPlay**: TV3Play and related services
- **Tweakers**
- **twitch:chapter**
- **twitch:clips**
@@ -748,6 +750,8 @@
- **UDNEmbed**: 聯合影音
- **Unistra**
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
- **Urort**: NRK P3 Urørt
- **URPlay**
- **USAToday**
@@ -765,7 +769,9 @@
- **VevoPlaylist**
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **Viafree**
- **Vice**
- **Viceland**
- **ViceShow**
- **Vidbit**
- **Viddler**
@@ -887,4 +893,3 @@
- **ZDFChannel**
- **zingmp3:album**: mp3.zing.vn albums
- **zingmp3:song**: mp3.zing.vn songs
- **ZippCast**

View File

@@ -20,6 +20,7 @@ from ..utils import (
encodeFilename,
sanitize_open,
parse_m3u8_attributes,
update_url_query,
)
@@ -82,6 +83,7 @@ class HlsFD(FragmentFD):
self._prepare_and_start_frag_download(ctx)
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
@@ -95,6 +97,8 @@ class HlsFD(FragmentFD):
if re.match(r'^https?://', line)
else compat_urlparse.urljoin(man_url, line))
frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
if extra_param_to_segment_url:
frag_url = update_url_query(frag_url, extra_param_to_segment_url)
success = ctx['dl'].download(frag_filename, {'url': frag_url})
if not success:
return False
@@ -120,6 +124,8 @@ class HlsFD(FragmentFD):
if not re.match(r'^https?://', decrypt_info['URI']):
decrypt_info['URI'] = compat_urlparse.urljoin(
man_url, decrypt_info['URI'])
if extra_param_to_segment_url:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url)
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:])

View File

@@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
import time
import xml.etree.ElementTree as etree
from .common import InfoExtractor
from ..utils import (
unescapeHTML,
urlencode_postdata,
unified_timestamp,
)
class AdobePassIE(InfoExtractor):
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):
channel = etree.Element('channel')
channel_title = etree.SubElement(channel, 'title')
channel_title.text = provider_id
item = etree.SubElement(channel, 'item')
resource_title = etree.SubElement(item, 'title')
resource_title.text = title
resource_guid = etree.SubElement(item, 'guid')
resource_guid.text = guid
resource_rating = etree.SubElement(item, 'media:rating')
resource_rating.attrib = {'scheme': 'urn:v-chip'}
resource_rating.text = rating
return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
def xml_text(xml_str, tag):
return self._search_regex(
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
mvpd_headers = {
'ap_42': 'anonymous',
'ap_11': 'Linux i686',
'ap_z': self._USER_AGENT,
'User-Agent': self._USER_AGENT,
}
guid = xml_text(resource, 'guid')
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token:
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires')))
if token_expires and token_expires <= int(time.time()):
authn_token = None
requestor_info = {}
if not authn_token:
# TODO add support for other TV Providers
mso_id = 'DTV'
username, password = self._get_netrc_login_info(mso_id)
if not username or not password:
return ''
def post_form(form_page, note, data={}):
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
return self._download_webpage(
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
provider_redirect_page = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
provider_login_page = post_form(
provider_redirect_page, 'Downloading Provider Login Page')
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
'username': username,
'password': password,
})
post_form(mvpd_confirm_page, 'Confirming Login')
session = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
'Retrieving Session', data=urlencode_postdata({
'_method': 'GET',
'requestor_id': requestor_id,
}), headers=mvpd_headers)
if '<pendingLogout' in session:
self._downloader.cache.store('mvpd', requestor_id, {})
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if not authz_token:
authorize = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
'Retrieving Authorization Token', data=urlencode_postdata({
'resource_id': resource,
'requestor_id': requestor_id,
'authentication_token': authn_token,
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
'userMeta': '1',
}), headers=mvpd_headers)
if '<pendingLogout' in authorize:
self._downloader.cache.store('mvpd', requestor_id, {})
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
})
short_authorize = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
video_id, 'Retrieving Media Token', data=urlencode_postdata({
'authz_token': authz_token,
'requestor_id': requestor_id,
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
'hashed_guid': 'false',
}), headers=mvpd_headers)
if '<pendingLogout' in short_authorize:
self._downloader.cache.store('mvpd', requestor_id, {})
return self._extract_mvpd_auth(url, video_id, requestor_id, resource)
return short_authorize

View File

@@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE):
info = self._parse_theplatform_metadata(theplatform_metadata)
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating'])
resource = self._get_mvpd_resource(
requestor_id, theplatform_metadata['title'],
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
theplatform_metadata['ratings'][0]['rating'])
query['auth'] = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
info.update(self._search_json_ld(webpage, video_id, fatal=False))

View File

@@ -0,0 +1,91 @@
# coding: utf-8
from __future__ import unicode_literals
from .theplatform import ThePlatformIE
from ..utils import (
update_url_query,
parse_age_limit,
int_or_none,
)
class AMCNetworksIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '',
'info_dict': {
'id': 's3MX01Nl4vPH',
'ext': 'mp4',
'title': 'Maron - Season 4 - Step 1',
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
'age_limit': 17,
'upload_date': '20160505',
'timestamp': 1462468831,
'uploader': 'AMCN',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True,
}, {
'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot',
'only_matching': True,
}, {
'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal',
'only_matching': True,
}, {
'url': 'http://www.ifc.com/movies/chaos',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
query = {
'mbr': 'true',
'manifest': 'm3u',
}
media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url')
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid']
title = theplatform_metadata['title']
rating = theplatform_metadata['ratings'][0]['rating']
auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required')
if auth_required == 'true':
requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id')
resource = self._get_mvpd_resource(requestor_id, title, video_id, rating)
query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource)
media_url = update_url_query(media_url, query)
formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
self._sort_formats(formats)
info.update({
'id': video_id,
'subtitles': subtitles,
'formats': formats,
'age_limit': parse_age_limit(parse_age_limit(rating)),
})
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
if ns_keys:
ns = list(ns_keys)[0]
series = theplatform_metadata.get(ns + '$show')
season_number = int_or_none(theplatform_metadata.get(ns + '$season'))
episode = theplatform_metadata.get(ns + '$episodeTitle')
episode_number = int_or_none(theplatform_metadata.get(ns + '$episode'))
if season_number:
title = 'Season %d - %s' % (season_number, title)
if series:
title = '%s - %s' % (series, title)
info.update({
'title': title,
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
})
return info

View File

@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import re
import itertools
from .common import InfoExtractor
from ..utils import (
@@ -17,6 +18,7 @@ from ..utils import (
from ..compat import (
compat_etree_fromstring,
compat_HTTPError,
compat_urlparse,
)
@@ -1056,19 +1058,35 @@ class BBCCoUkArticleIE(InfoExtractor):
class BBCCoUkPlaylistBaseIE(InfoExtractor):
def _entries(self, webpage, url, playlist_id):
single_page = 'page' in compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)
for page_num in itertools.count(2):
for video_id in re.findall(
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
yield self.url_result(
self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
if single_page:
return
next_page = self._search_regex(
r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
webpage, 'next page url', default=None, group='url')
if not next_page:
break
webpage = self._download_webpage(
compat_urlparse.urljoin(url, next_page), playlist_id,
'Downloading page %d' % page_num, page_num)
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
for video_id in re.findall(
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
title, description = self._extract_title_and_description(webpage)
return self.playlist_result(entries, playlist_id, title, description)
return self.playlist_result(
self._entries(webpage, url, playlist_id),
playlist_id, title, description)
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
@@ -1117,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
'description': 'French thriller serial about a missing teenager.',
},
'playlist_mincount': 7,
}, {
# multipage playlist, explicit page
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
'info_dict': {
'id': 'b00mfl7n',
'title': 'Frozen Planet - Clips - BBC One',
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
},
'playlist_mincount': 24,
}, {
# multipage playlist, all pages
'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
'info_dict': {
'id': 'b00mfl7n',
'title': 'Frozen Planet - Clips - BBC One',
'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
},
'playlist_mincount': 142,
}, {
'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
'only_matching': True,

View File

@@ -41,13 +41,8 @@ class CBSLocalIE(AnvatoIE):
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
'ext': 'mp4',
'title': 'Recap: CLE 15, CIN 6',
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
'upload_date': '20160516',
'timestamp': 1463433840,
'duration': 49,
},
'playlist_count': 9,
'params': {
# m3u8 download
'skip_download': True,
@@ -60,12 +55,11 @@ class CBSLocalIE(AnvatoIE):
sendtonews_url = SendtoNewsIE._extract_url(webpage)
if sendtonews_url:
info_dict = {
'_type': 'url_transparent',
'url': compat_urlparse.urljoin(url, sendtonews_url),
}
else:
info_dict = self._extract_anvato_videos(webpage, display_id)
return self.url_result(
compat_urlparse.urljoin(url, sendtonews_url),
ie=SendtoNewsIE.ie_key())
info_dict = self._extract_anvato_videos(webpage, display_id)
time_str = self._html_search_regex(
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)

View File

@@ -70,7 +70,8 @@ class CBSNewsLiveVideoIE(InfoExtractor):
IE_DESC = 'CBS News Live Videos'
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
_TESTS = [{
# Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples
_TEST = {
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
'info_dict': {
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
@@ -78,15 +79,8 @@ class CBSNewsLiveVideoIE(InfoExtractor):
'title': 'Clinton, Sanders Prepare To Face Off In NH',
'duration': 334,
},
'skip': 'Video gone, redirected to http://www.cbsnews.com/live/',
}, {
'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/',
'info_dict': {
'id': 'video-shows-intense-paragliding-accident',
'ext': 'flv',
'title': 'Video Shows Intense Paragliding Accident',
},
}]
'skip': 'Video gone',
}
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -662,6 +662,24 @@ class InfoExtractor(object):
else:
return res
def _get_netrc_login_info(self, netrc_machine=None):
username = None
password = None
netrc_machine = netrc_machine or self._NETRC_MACHINE
if self._downloader.params.get('usenetrc', False):
try:
info = netrc.netrc().authenticators(netrc_machine)
if info is not None:
username = info[0]
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
return (username, password)
def _get_login_info(self):
"""
Get the login info as (username, password)
@@ -679,16 +697,8 @@ class InfoExtractor(object):
if downloader_params.get('username') is not None:
username = downloader_params['username']
password = downloader_params['password']
elif downloader_params.get('usenetrc', False):
try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None:
username = info[0]
password = info[2]
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err))
else:
username, password = self._get_netrc_login_info()
return (username, password)

View File

@@ -29,6 +29,7 @@ from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE
from .amcnetworks import AMCNetworksIE
from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anysex import AnySexIE
@@ -287,6 +288,7 @@ from .freevideo import FreeVideoIE
from .funimation import FunimationIE
from .funnyordie import FunnyOrDieIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
from .gameinformer import GameInformerIE
from .gameone import (
GameOneIE,
@@ -896,7 +898,10 @@ from .tvp import (
TVPIE,
TVPSeriesIE,
)
from .tvplay import TVPlayIE
from .tvplay import (
TVPlayIE,
ViafreeIE,
)
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
@@ -926,6 +931,10 @@ from .udn import UDNEmbedIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
from .uol import UOLIE
from .uplynk import (
UplynkIE,
UplynkPreplayIE,
)
from .urort import UrortIE
from .urplay import URPlayIE
from .usatoday import USATodayIE
@@ -954,6 +963,7 @@ from .vice import (
ViceIE,
ViceShowIE,
)
from .viceland import VicelandIE
from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE
@@ -1104,4 +1114,3 @@ from .zingmp3 import (
ZingMp3SongIE,
ZingMp3AlbumIE,
)
from .zippcast import ZippCastIE

View File

@@ -1,22 +1,17 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
sanitized_Request,
str_to_int,
)
from ..utils import str_to_int
from .keezmovies import KeezMoviesIE
class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P<id>[^/#?&]+)'
class ExtremeTubeIE(KeezMoviesIE):
_VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P<display_id>[^/]+)-)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
'info_dict': {
'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431',
'id': '652431',
'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow',
'ext': 'mp4',
'title': 'Music Video 14 british euro brit european cumshots swallow',
'uploader': 'unknown',
@@ -35,58 +30,22 @@ class ExtremeTubeIE(InfoExtractor):
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, info = self._extract_info(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
if not info['title']:
info['title'] = self._search_regex(
r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
video_title = self._html_search_regex(
r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title')
uploader = self._html_search_regex(
r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
webpage, 'uploader', fatal=False)
view_count = str_to_int(self._html_search_regex(
view_count = str_to_int(self._search_regex(
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
webpage, 'view count', fatal=False))
flash_vars = self._parse_json(
self._search_regex(
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'),
video_id)
formats = []
for quality_key, video_url in flash_vars.items():
height = int_or_none(self._search_regex(
r'quality_(\d+)[pP]$', quality_key, 'height', default=None))
if not height:
continue
f = {
'url': video_url,
}
mobj = re.search(
r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
if mobj:
height = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate'))
f.update({
'format_id': '%dp-%dk' % (height, bitrate),
'height': height,
'tbr': bitrate,
})
else:
f.update({
'format_id': '%dp' % height,
'height': height,
})
formats.append(f)
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'formats': formats,
info.update({
'uploader': uploader,
'view_count': view_count,
'age_limit': 18,
}
})
return info

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
extract_attributes,
parse_age_limit,
smuggle_url,
)
class FXNetworksIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.fxnetworks.com/video/719841347694',
'md5': '1447d4722e42ebca19e5232ab93abb22',
'info_dict': {
'id': '719841347694',
'ext': 'mp4',
'title': 'Vanpage',
'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
'age_limit': 14,
'uploader': 'NEWA-FNG-FX',
'upload_date': '20160706',
'timestamp': 1467844741,
},
'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.simpsonsworld.com/video/716094019682',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if 'The content you are trying to access is not available in your region.' in webpage:
self.raise_geo_restricted()
video_data = extract_attributes(self._search_regex(
r'(<a.+?rel="http://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
release_url = video_data['rel']
title = video_data['data-title']
rating = video_data.get('data-rating')
query = {
'mbr': 'true',
}
if player_type == 'movies':
query.update({
'manifest': 'm3u',
})
else:
query.update({
'switch': 'http',
})
if video_data.get('data-req-auth') == '1':
resource = self._get_mvpd_resource(
video_data['data-channel'], title,
video_data.get('data-guid'), rating)
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
'thumbnail': video_data.get('data-large-thumb'),
'age_limit': parse_age_limit(rating),
'ie_key': 'ThePlatform',
}

View File

@@ -72,6 +72,7 @@ from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
from .soundcloud import SoundcloudIE
from .vbox7 import Vbox7IE
class GenericIE(InfoExtractor):
@@ -1373,6 +1374,18 @@ class GenericIE(InfoExtractor):
},
'add_ie': [ArkenaIE.ie_key()],
},
{
'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
'info_dict': {
'id': '1c7141f46c',
'ext': 'mp4',
'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
},
'params': {
'skip_download': True,
},
'add_ie': [Vbox7IE.ie_key()],
},
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -2239,6 +2252,11 @@ class GenericIE(InfoExtractor):
'uploader': video_uploader,
}
# Look for VBOX7 embeds
vbox7_url = Vbox7IE._extract_url(webpage)
if vbox7_url:
return self.url_result(vbox7_url, Vbox7IE.ie_key())
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')

View File

@@ -30,7 +30,7 @@ class JWPlatformBaseIE(InfoExtractor):
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
# JWPlayer backward compatibility: flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
if 'playlist' not in jwplayer_data:
@@ -43,6 +43,8 @@ class JWPlatformBaseIE(InfoExtractor):
if 'sources' not in video_data:
video_data['sources'] = [video_data]
this_video_id = video_id or video_data['mediaid']
formats = []
for source in video_data['sources']:
source_url = self._proto_relative_url(source['file'])
@@ -52,7 +54,7 @@ class JWPlatformBaseIE(InfoExtractor):
ext = mimetype2ext(source_type) or determine_ext(source_url)
if source_type == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
formats.append({
@@ -68,7 +70,7 @@ class JWPlatformBaseIE(InfoExtractor):
'ext': ext,
}
if source_url.startswith('rtmp'):
a_format['ext'] = 'flv',
a_format['ext'] = 'flv'
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
# of jwplayer.flash.swf
@@ -95,7 +97,7 @@ class JWPlatformBaseIE(InfoExtractor):
})
entries.append({
'id': video_id,
'id': this_video_id,
'title': video_data['title'] if require_title else video_data.get('title'),
'description': video_data.get('description'),
'thumbnail': self._proto_relative_url(video_data.get('image')),

View File

@@ -3,64 +3,124 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..aes import aes_decrypt_text
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
sanitized_Request,
url_basename,
determine_ext,
ExtractorError,
int_or_none,
str_to_int,
strip_or_none,
)
class KeezMoviesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0',
'info_dict': {
'id': '1214711',
'display_id': 'petite-asian-lady-mai-playing-in-bathtub',
'ext': 'mp4',
'title': 'Petite Asian Lady Mai Playing In Bathtub',
'age_limit': 18,
'thumbnail': 're:^https?://.*\.jpg$',
'view_count': int,
'age_limit': 18,
}
}
}, {
'url': 'http://www.keezmovies.com/video/1214711',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
def _extract_info(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
# embedded video
mobj = re.search(r'href="([^"]+)"></iframe>', webpage)
if mobj:
embedded_url = mobj.group(1)
return self.url_result(embedded_url)
video_title = self._html_search_regex(
r'<h1 [^>]*>([^<]+)', webpage, 'title')
flashvars = self._parse_json(self._search_regex(
r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id)
webpage = self._download_webpage(
url, display_id, headers={'Cookie': 'age_verified=1'})
formats = []
for height in (180, 240, 480):
if flashvars.get('quality_%dp' % height):
video_url = flashvars['quality_%dp' % height]
a_format = {
'url': video_url,
'height': height,
'format_id': '%dp' % height,
}
filename_parts = url_basename(video_url).split('_')
if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]):
a_format['tbr'] = int(filename_parts[1][:-1])
formats.append(a_format)
format_urls = set()
age_limit = self._rta_search(webpage)
title = None
thumbnail = None
duration = None
encrypted = False
return {
def extract_format(format_url, height=None):
if not isinstance(format_url, compat_str) or not format_url.startswith('http'):
return
if format_url in format_urls:
return
format_urls.add(format_url)
tbr = int_or_none(self._search_regex(
r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None))
if not height:
height = int_or_none(self._search_regex(
r'[/_](\d+)[pP][/_]', format_url, 'height', default=None))
if encrypted:
format_url = aes_decrypt_text(
video_url, title, 32).decode('utf-8')
formats.append({
'url': format_url,
'format_id': '%dp' % height if height else None,
'height': height,
'tbr': tbr,
})
flashvars = self._parse_json(
self._search_regex(
r'flashvars\s*=\s*({.+?});', webpage,
'flashvars', default='{}'),
display_id, fatal=False)
if flashvars:
title = flashvars.get('video_title')
thumbnail = flashvars.get('image_url')
duration = int_or_none(flashvars.get('video_duration'))
encrypted = flashvars.get('encrypted') is True
for key, value in flashvars.items():
mobj = re.search(r'quality_(\d+)[pP]', key)
if mobj:
extract_format(value, int(mobj.group(1)))
video_url = flashvars.get('video_url')
if video_url and determine_ext(video_url, None):
extract_format(video_url)
video_url = self._html_search_regex(
r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1',
webpage, 'video url', default=None, group='url')
if video_url:
extract_format(compat_urllib_parse_unquote(video_url))
if not formats:
if 'title="This video is no longer available"' in webpage:
raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True)
self._sort_formats(formats)
if not title:
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)', webpage, 'title')
return webpage, {
'id': video_id,
'title': video_title,
'display_id': display_id,
'title': strip_or_none(title),
'thumbnail': thumbnail,
'duration': duration,
'age_limit': 18,
'formats': formats,
'age_limit': age_limit,
'thumbnail': flashvars.get('image_url')
}
def _real_extract(self, url):
webpage, info = self._extract_info(url)
info['view_count'] = str_to_int(self._search_regex(
r'<b>([\d,.]+)</b> Views?', webpage, 'view count', fatal=False))
return info

View File

@@ -4,7 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
determine_ext,
ExtractorError,
@@ -96,7 +99,7 @@ class LifeNewsIE(InfoExtractor):
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
iframe_links = re.findall(
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']',
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']',
webpage)
if not video_urls and not iframe_links:
@@ -164,9 +167,9 @@ class LifeNewsIE(InfoExtractor):
class LifeEmbedIE(InfoExtractor):
IE_NAME = 'life:embed'
_VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
_VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P<id>[\da-f]{32})'
_TEST = {
_TESTS = [{
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
'md5': 'b889715c9e49cb1981281d0e5458fbbe',
'info_dict': {
@@ -175,30 +178,57 @@ class LifeEmbedIE(InfoExtractor):
'title': 'e50c2dec2867350528e2574c899b8291',
'thumbnail': 're:http://.*\.jpg',
}
}
}, {
# with 1080p
'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
thumbnail = None
formats = []
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
video_url = compat_urlparse.urljoin(url, video_url)
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='m3u8'))
else:
formats.append({
'url': video_url,
'format_id': ext,
'preference': 1,
})
def extract_m3u8(manifest_url):
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='m3u8'))
def extract_original(original_url):
formats.append({
'url': original_url,
'format_id': determine_ext(original_url, None),
'preference': 1,
})
playlist = self._parse_json(
self._search_regex(
r'options\s*=\s*({.+?});', webpage, 'options', default='{}'),
video_id).get('playlist', {})
if playlist:
master = playlist.get('master')
if isinstance(master, compat_str) and determine_ext(master) == 'm3u8':
extract_m3u8(compat_urlparse.urljoin(url, master))
original = playlist.get('original')
if isinstance(original, compat_str):
extract_original(original)
thumbnail = playlist.get('image')
# Old rendition fallback
if not formats:
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
video_url = compat_urlparse.urljoin(url, video_url)
if determine_ext(video_url) == 'm3u8':
extract_m3u8(video_url)
else:
extract_original(video_url)
self._sort_formats(formats)
thumbnail = self._search_regex(
thumbnail = thumbnail or self._search_regex(
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
return {

View File

@@ -1,53 +1,56 @@
from __future__ import unicode_literals
import os
import re
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urllib_parse_urlparse,
from ..utils import (
int_or_none,
str_to_int,
unified_strdate,
)
from ..utils import sanitized_Request
from .keezmovies import KeezMoviesIE
class MofosexIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'
_TEST = {
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
class MofosexIE(KeezMoviesIE):
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
_TESTS = [{
'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
'md5': '39a15853632b7b2e5679f92f69b78e91',
'info_dict': {
'id': '5018',
'id': '318131',
'display_id': 'amateur-teen-playing-and-masturbating-318131',
'ext': 'mp4',
'title': 'Japanese Teen Music Video',
'title': 'amateur teen playing and masturbating',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20121114',
'view_count': int,
'like_count': int,
'dislike_count': int,
'age_limit': 18,
}
}
}, {
# This video is no longer available
'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://www.' + mobj.group('url')
webpage, info = self._extract_info(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
view_count = str_to_int(self._search_regex(
r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
like_count = int_or_none(self._search_regex(
r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
'like count', fatal=False))
dislike_count = int_or_none(self._search_regex(
r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
'like count', fatal=False))
upload_date = unified_strdate(self._html_search_regex(
r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title')
video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url'))
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[5].split('_')[:2]
format = '-'.join(format)
info.update({
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'upload_date': upload_date,
'thumbnail': self._og_search_thumbnail(webpage),
})
age_limit = self._rta_search(webpage)
return {
'id': video_id,
'title': video_title,
'url': video_url,
'ext': extension,
'format': format,
'format_id': format,
'age_limit': age_limit,
}
return info

View File

@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE
from ..utils import (
smuggle_url,
url_basename,
@@ -65,7 +65,7 @@ class NationalGeographicVideoIE(InfoExtractor):
}
class NationalGeographicIE(ThePlatformIE):
class NationalGeographicIE(AdobePassIE):
IE_NAME = 'natgeo'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
@@ -119,7 +119,7 @@ class NationalGeographicIE(ThePlatformIE):
auth_resource_id = self._search_regex(
r"video_auth_resourceId\s*=\s*'([^']+)'",
webpage, 'auth resource id')
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or ''
query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id)
return {
'_type': 'url_transparent',
@@ -131,7 +131,7 @@ class NationalGeographicIE(ThePlatformIE):
}
class NationalGeographicEpisodeGuideIE(ThePlatformIE):
class NationalGeographicEpisodeGuideIE(InfoExtractor):
IE_NAME = 'natgeo:episodeguide'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
_TESTS = [

View File

@@ -4,33 +4,43 @@ from __future__ import unicode_literals
import re
from .jwplatform import JWPlatformBaseIE
from ..compat import compat_parse_qs
from ..utils import (
ExtractorError,
parse_duration,
float_or_none,
parse_iso8601,
update_url_query,
)
class SendtoNewsIE(JWPlatformBaseIE):
_VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P<id>[0-9A-Za-z-]+)'
_TEST = {
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
'ext': 'mp4',
'title': 'Recap: CLE 15, CIN 6',
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
'duration': 49,
'id': 'GxfCe0Zo7D-175909-5588'
},
'playlist_count': 9,
# test the first video only to prevent lengthy tests
'playlist': [{
'info_dict': {
'id': '198180',
'ext': 'mp4',
'title': 'Recap: CLE 5, LAA 4',
'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win',
'duration': 57.343,
'thumbnail': 're:https?://.*\.jpg$',
'upload_date': '20160815',
'timestamp': 1471221961,
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
}
_URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
_URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s'
@classmethod
def _extract_url(cls, webpage):
@@ -39,48 +49,41 @@ class SendtoNewsIE(JWPlatformBaseIE):
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
\1>''', webpage)
if mobj:
sk, mk, pk = mobj.group('SC').split('-')
return cls._URL_TEMPLATE % (sk, mk, pk)
sc = mobj.group('SC')
return cls._URL_TEMPLATE % sc
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
params = compat_parse_qs(mobj.group('query'))
playlist_id = self._match_id(url)
if 'SK' not in params or 'MK' not in params or 'PK' not in params:
raise ExtractorError('Invalid URL', expected=True)
data_url = update_url_query(
url.replace('embedplayer.php', 'data_read.php'),
{'cmd': 'loadInitial'})
playlist_data = self._download_json(data_url, playlist_id)
video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
entries = []
for video in playlist_data['playlistData'][0]:
info_dict = self._parse_jwplayer_data(
video['jwconfiguration'],
require_title=False, rtmp_params={'no_resume': True})
webpage = self._download_webpage(url, video_id)
thumbnails = []
if video.get('thumbnailUrl'):
thumbnails.append({
'id': 'normal',
'url': video['thumbnailUrl'],
})
if video.get('smThumbnailUrl'):
thumbnails.append({
'id': 'small',
'url': video['smThumbnailUrl'],
})
info_dict.update({
'title': video['S_headLine'],
'description': video.get('S_fullStory'),
'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
})
entries.append(info_dict)
jwplayer_data_str = self._search_regex(
r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
js_vars = {
'w': 1024,
'h': 768,
'modeVar': 'html5',
}
for name, val in js_vars.items():
js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
info_dict = self._parse_jwplayer_data(
self._parse_json(jwplayer_data_str, video_id),
video_id, require_title=False, rtmp_params={'no_resume': True})
title = self._html_search_regex(
r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
description = self._html_search_regex(
r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
'description', fatal=False)
duration = parse_duration(self._html_search_regex(
r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
'duration', fatal=False))
info_dict.update({
'title': title,
'description': description,
'duration': duration,
})
return info_dict
return self.playlist_result(entries, playlist_id)

View File

@@ -1,13 +1,13 @@
from __future__ import unicode_literals
from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE
from ..utils import (
update_url_query,
smuggle_url,
)
class SyfyIE(ThePlatformIE):
class SyfyIE(AdobePassIE):
_VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
@@ -40,7 +40,9 @@ class SyfyIE(ThePlatformIE):
'manifest': 'm3u',
}
if syfy_mpx.get('entitlement') == 'auth':
resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14'))
resource = self._get_mvpd_resource(
'syfy', title, video_id,
syfy_mpx.get('mpxRating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'syfy', resource)

View File

@@ -6,10 +6,10 @@ import time
import hmac
import binascii
import hashlib
import netrc
from .once import OnceIE
from .adobepass import AdobePassIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
@@ -25,9 +25,6 @@ from ..utils import (
xpath_with_ns,
mimetype2ext,
find_xpath_attr,
unescapeHTML,
urlencode_postdata,
unified_timestamp,
)
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@@ -76,10 +73,10 @@ class ThePlatformBaseIE(OnceIE):
if isinstance(captions, list):
for caption in captions:
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
subtitles[lang] = [{
subtitles.setdefault(lang, []).append({
'ext': mimetype2ext(mime),
'url': src,
}]
})
return {
'title': info['title'],
@@ -96,7 +93,7 @@ class ThePlatformBaseIE(OnceIE):
return self._parse_theplatform_metadata(info)
class ThePlatformIE(ThePlatformBaseIE):
class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
_VALID_URL = r'''(?x)
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
@@ -167,7 +164,6 @@ class ThePlatformIE(ThePlatformBaseIE):
'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781',
'only_matching': True,
}]
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
@classmethod
def _extract_urls(cls, webpage):
@@ -202,96 +198,6 @@ class ThePlatformIE(ThePlatformBaseIE):
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
return '%s&sig=%s' % (url, sig)
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
def xml_text(xml_str, tag):
return self._search_regex(
'<%s>(.+?)</%s>' % (tag, tag), xml_str, tag)
mvpd_headers = {
'ap_42': 'anonymous',
'ap_11': 'Linux i686',
'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0',
}
guid = xml_text(resource, 'guid')
requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token:
token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', ''))
if token_expires and token_expires >= time.time():
authn_token = None
if not authn_token:
# TODO add support for other TV Providers
mso_id = 'DTV'
login_info = netrc.netrc().authenticators(mso_id)
if not login_info:
return None
def post_form(form_page, note, data={}):
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
return self._download_webpage(
post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
provider_redirect_page = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
'Downloading Provider Redirect Page', query={
'noflash': 'true',
'mso_id': mso_id,
'requestor_id': requestor_id,
'no_iframe': 'false',
'domain_name': 'adobe.com',
'redirect_url': url,
})
provider_login_page = post_form(
provider_redirect_page, 'Downloading Provider Login Page')
mvpd_confirm_page = post_form(provider_login_page, 'Logging in', {
'username': login_info[0],
'password': login_info[2],
})
post_form(mvpd_confirm_page, 'Confirming Login')
session = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id,
'Retrieving Session', data=urlencode_postdata({
'_method': 'GET',
'requestor_id': requestor_id,
}), headers=mvpd_headers)
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if not authz_token:
authorize = self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id,
'Retrieving Authorization Token', data=urlencode_postdata({
'resource_id': resource,
'requestor_id': requestor_id,
'authentication_token': authn_token,
'mso_id': xml_text(authn_token, 'simpleTokenMsoID'),
'userMeta': '1',
}), headers=mvpd_headers)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
self._downloader.cache.store('mvpd', requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'),
})
return self._download_webpage(
self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize',
video_id, 'Retrieving Media Token', data=urlencode_postdata({
'authz_token': authz_token,
'requestor_id': requestor_id,
'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'),
'hashed_guid': 'false',
}), headers=mvpd_headers)
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})

View File

@@ -1,18 +1,13 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
sanitized_Request,
str_to_int,
)
from ..aes import aes_decrypt_text
from .keezmovies import KeezMoviesIE
class Tube8IE(InfoExtractor):
class Tube8IE(KeezMoviesIE):
_VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
@@ -33,47 +28,17 @@ class Tube8IE(InfoExtractor):
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage, info = self._extract_info(url)
req = sanitized_Request(url)
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, display_id)
if not info['title']:
info['title'] = self._html_search_regex(
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
flashvars = self._parse_json(
self._search_regex(
r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'),
video_id)
formats = []
for key, video_url in flashvars.items():
if not isinstance(video_url, compat_str) or not video_url.startswith('http'):
continue
height = self._search_regex(
r'quality_(\d+)[pP]', key, 'height', default=None)
if not height:
continue
if flashvars.get('encrypted') is True:
video_url = aes_decrypt_text(
video_url, flashvars['video_title'], 32).decode('utf-8')
formats.append({
'url': video_url,
'format_id': '%sp' % height,
'height': int(height),
})
self._sort_formats(formats)
thumbnail = flashvars.get('image_url')
title = self._html_search_regex(
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex(
r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'<span class="username">\s*(.+?)\s*<',
webpage, 'uploader', fatal=False)
duration = int_or_none(flashvars.get('video_duration'))
like_count = int_or_none(self._search_regex(
r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
@@ -86,18 +51,13 @@ class Tube8IE(InfoExtractor):
r'<span id="allCommentsCount">(\d+)</span>',
webpage, 'comment count', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'title': title,
info.update({
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
'age_limit': 18,
'formats': formats,
}
})
return info

View File

@@ -15,21 +15,31 @@ from ..utils import (
int_or_none,
parse_iso8601,
qualities,
try_get,
update_url_query,
)
class TVPlayIE(InfoExtractor):
IE_DESC = 'TV3Play and related services'
_VALID_URL = r'''(?x)https?://(?:www\.)?
(?:tvplay(?:\.skaties)?\.lv/parraides|
(?:tv3play|play\.tv3)\.lt/programos|
tv3play(?:\.tv3)?\.ee/sisu|
tv(?:3|6|8|10)play\.se/program|
(?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer|
play\.novatv\.bg/programi
)/[^/]+/(?P<id>\d+)
'''
IE_NAME = 'mtg'
IE_DESC = 'MTG services'
_VALID_URL = r'''(?x)
(?:
mtg:|
https?://
(?:www\.)?
(?:
tvplay(?:\.skaties)?\.lv/parraides|
(?:tv3play|play\.tv3)\.lt/programos|
tv3play(?:\.tv3)?\.ee/sisu|
(?:tv(?:3|6|8|10)play|viafree)\.se/program|
(?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
play\.novatv\.bg/programi
)
/(?:[^/]+/)+
)
(?P<id>\d+)
'''
_TESTS = [
{
'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
@@ -194,9 +204,22 @@ class TVPlayIE(InfoExtractor):
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
'only_matching': True,
},
{
# views is null
'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
'only_matching': True,
},
{
'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
'only_matching': True,
},
{
'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
'only_matching': True,
},
{
'url': 'mtg:418113',
'only_matching': True,
}
]
@@ -204,13 +227,13 @@ class TVPlayIE(InfoExtractor):
video_id = self._match_id(url)
video = self._download_json(
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
title = video['title']
try:
streams = self._download_json(
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id,
'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
video_id, 'Downloading streams JSON')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
@@ -289,8 +312,61 @@ class TVPlayIE(InfoExtractor):
'season_number': season_number,
'duration': int_or_none(video.get('duration')),
'timestamp': parse_iso8601(video.get('created_at')),
'view_count': int_or_none(video.get('views', {}).get('total')),
'view_count': try_get(video, lambda x: x['views']['total'], int),
'age_limit': int_or_none(video.get('age_limit', 0)),
'formats': formats,
'subtitles': subtitles,
}
class ViafreeIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
viafree\.
(?:
(?:dk|no)/programmer|
se/program
)
/(?:[^/]+/)+(?P<id>[^/?#&]+)
'''
_TESTS = [{
'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
'info_dict': {
'id': '395375',
'ext': 'mp4',
'title': 'Husräddarna S02E02',
'description': 'md5:4db5c933e37db629b5a2f75dfb34829e',
'series': 'Husräddarna',
'season': 'Säsong 2',
'season_number': 2,
'duration': 2576,
'timestamp': 1400596321,
'upload_date': '20140520',
},
'params': {
'skip_download': True,
},
'add_ie': [TVPlayIE.ie_key()],
}, {
'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
'only_matching': True,
}, {
'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](?P<id>\d{6,})',
webpage, 'video id')
return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key())

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
ExtractorError,
)
class UplynkIE(InfoExtractor):
IE_NAME = 'uplynk'
_VALID_URL = r'https?://.*?\.uplynk\.com/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P<session_id>[^&]+))?'
_TEST = {
'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8',
'info_dict': {
'id': 'e89eaf2ce9054aa89d92ddb2d817a52e',
'ext': 'mp4',
'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4',
'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _extract_uplynk_info(self, uplynk_content_url):
path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
display_id = video_id or external_id
formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4')
if session_id:
for f in formats:
f['extra_param_to_segment_url'] = {
'pbs': session_id,
}
self._sort_formats(formats)
asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
if asset.get('error') == 1:
raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True)
return {
'id': asset['asset'],
'title': asset['desc'],
'thumbnail': asset.get('default_poster_url'),
'duration': float_or_none(asset.get('duration')),
'uploader_id': asset.get('owner'),
'formats': formats,
}
def _real_extract(self, url):
return self._extract_uplynk_info(url)
class UplynkPreplayIE(UplynkIE):
IE_NAME = 'uplynk:preplay'
_VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?P<path>ext/[0-9a-f]{32}/(?P<external_id>[^/?&]+)|(?P<id>[0-9a-f]{32}))\.json'
_TEST = None
def _real_extract(self, url):
path, external_id, video_id = re.match(self._VALID_URL, url).groups()
display_id = video_id or external_id
preplay = self._download_json(url, display_id)
content_url = 'http://content.uplynk.com/%s.m3u8' % path
session_id = preplay.get('sid')
if session_id:
content_url += '?pbs=' + session_id
return self._extract_uplynk_info(content_url)

View File

@@ -1,12 +1,14 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import urlencode_postdata
class Vbox7IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P<id>[^/]+)'
_VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P<id>[\da-fA-F]+)'
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -24,15 +26,27 @@ class Vbox7IE(InfoExtractor):
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
},
'skip': 'georestricted',
}, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
webpage)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(
'http://vbox7.com/play:%s' % video_id, video_id)
title = self._html_search_regex(
r'<title>(.*)</title>', webpage, 'title').split('/')[0].strip()
r'<title>(.+?)</title>', webpage, 'title').split('/')[0].strip()
video_url = self._search_regex(
r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1',

View File

@@ -8,6 +8,7 @@ from .xstream import XstreamIE
from ..utils import (
ExtractorError,
float_or_none,
try_get,
)
@@ -129,6 +130,11 @@ class VGTVIE(XstreamIE):
'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
'only_matching': True,
},
{
# geoblocked
'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
'only_matching': True,
},
]
def _real_extract(self, url):
@@ -196,6 +202,12 @@ class VGTVIE(XstreamIE):
info['formats'].extend(formats)
if not info['formats']:
properties = try_get(
data, lambda x: x['streamConfiguration']['properties'], list)
if properties and 'geoblocked' in properties:
raise self.raise_geo_restricted()
self._sort_formats(info['formats'])
info.update({

View File

@@ -0,0 +1,107 @@
# coding: utf-8
from __future__ import unicode_literals
import time
import hashlib
import json
from .adobepass import AdobePassIE
from ..compat import compat_HTTPError
from ..utils import (
int_or_none,
parse_age_limit,
str_or_none,
parse_duration,
ExtractorError,
extract_attributes,
)
class VicelandIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
_TEST = {
'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e',
'info_dict': {
'id': '57608447973ee7705f6fbd4e',
'ext': 'mp4',
'title': 'CYBERWAR (Trailer)',
'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.',
'age_limit': 14,
'timestamp': 1466008539,
'upload_date': '20160615',
'uploader_id': '11',
'uploader': 'Viceland',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['UplynkPreplay'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
watch_hub_data = extract_attributes(self._search_regex(
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
title = watch_hub_data['video-title']
query = {}
if watch_hub_data.get('video-locked') == '1':
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
# https://www.viceland.com/assets/common/js/web.vendor.bundle.js
exp = int(time.time()) + 14400
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
})
try:
preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
base = video_data['base']
uplynk_preplay_url = preplay['preplayURL']
episode = video_data.get('episode', {})
channel = video_data.get('channel', {})
subtitles = {}
cc_url = preplay.get('ccURL')
if cc_url:
subtitles['en'] = [{
'url': cc_url,
}]
return {
'_type': 'url_transparent',
'url': uplynk_preplay_url,
'id': video_id,
'title': title,
'description': base.get('body'),
'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'),
'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')),
'timestamp': int_or_none(video_data.get('created_at')),
'age_limit': parse_age_limit(video_data.get('video_rating')),
'series': video_data.get('show_title') or watch_hub_data.get('show-title'),
'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
'season_number': int_or_none(watch_hub_data.get('season')),
'season_id': str_or_none(episode.get('season_id')),
'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
'ie_key': 'UplynkPreplay',
}

View File

@@ -13,6 +13,7 @@ class XiamiBaseIE(InfoExtractor):
webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs)
if '>Xiami is currently not available in your country.<' in webpage:
self.raise_geo_restricted('Xiami is currently not available in your country')
return webpage
def _extract_track(self, track, track_id=None):
title = track['title']

View File

@@ -15,10 +15,10 @@ class XVideosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P<id>[0-9]+)(?:.*)'
_TEST = {
'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl',
'md5': '4b46ae6ea5e6e9086e714d883313c0c9',
'md5': '14cea69fcb84db54293b1e971466c2e1',
'info_dict': {
'id': '4588838',
'ext': 'flv',
'ext': 'mp4',
'title': 'Biker Takes his Girl',
'age_limit': 18,
}
@@ -42,24 +42,24 @@ class XVideosIE(InfoExtractor):
video_url = compat_urllib_parse_unquote(self._search_regex(
r'flv_url=(.+?)&', webpage, 'video URL', default=''))
if video_url:
formats.append({'url': video_url})
formats.append({
'url': video_url,
'format_id': 'flv',
})
player_args = self._search_regex(
r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None)
if player_args:
for arg in player_args.split(','):
format_url = self._search_regex(
r'(["\'])(?P<url>https?://.+?)\1', arg, 'url',
default=None, group='url')
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'mp4':
formats.append({'url': format_url})
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
for kind, _, format_url in re.findall(
r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage):
format_id = kind.lower()
if format_id == 'hls':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
elif format_id in ('urllow', 'urlhigh'):
formats.append({
'url': format_url,
'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]),
'quality': -2 if format_id.endswith('low') else None,
})
self._sort_formats(formats)

View File

@@ -1,94 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
str_to_int,
)
class ZippCastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?zippcast\.com/(?:video/|videoview\.php\?.*\bvplay=)(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
# m3u8, hq direct link
'url': 'http://www.zippcast.com/video/c9cfd5c7e44dbc29c81',
'md5': '5ea0263b5606866c4d6cda0fc5e8c6b6',
'info_dict': {
'id': 'c9cfd5c7e44dbc29c81',
'ext': 'mp4',
'title': '[Vinesauce] Vinny - Digital Space Traveler',
'description': 'Muted on youtube, but now uploaded in it\'s original form.',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'vinesauce',
'view_count': int,
'categories': ['Entertainment'],
'tags': list,
},
}, {
# f4m, lq ipod direct link
'url': 'http://www.zippcast.com/video/b79c0a233e9c6581775',
'only_matching': True,
}, {
'url': 'http://www.zippcast.com/videoview.php?vplay=c9cfd5c7e44dbc29c81&auto=no',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.zippcast.com/video/%s' % video_id, video_id)
formats = []
video_url = self._search_regex(
r'<source[^>]+src=(["\'])(?P<url>.+?)\1', webpage,
'video url', default=None, group='url')
if video_url:
formats.append({
'url': video_url,
'format_id': 'http',
'preference': 0, # direct link is almost always of worse quality
})
src_url = self._search_regex(
r'src\s*:\s*(?:escape\()?(["\'])(?P<url>http://.+?)\1',
webpage, 'src', default=None, group='url')
ext = determine_ext(src_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
src_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src_url, video_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage) or self._html_search_meta(
'description', webpage)
uploader = self._search_regex(
r'<a[^>]+href="https?://[^/]+/profile/[^>]+>([^<]+)</a>',
webpage, 'uploader', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
view_count = str_to_int(self._search_regex(
r'>([\d,.]+) views!', webpage, 'view count', fatal=False))
categories = re.findall(
r'<a[^>]+href="https?://[^/]+/categories/[^"]+">([^<]+),?<',
webpage)
tags = re.findall(
r'<a[^>]+href="https?://[^/]+/search/tags/[^"]+">([^<]+),?<',
webpage)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'view_count': view_count,
'categories': categories,
'tags': tags,
'formats': formats,
}

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.08.13'
__version__ = '2016.08.17'