Compare commits
20 Commits
2017.02.04
...
2017.02.07
Author | SHA1 | Date | |
---|---|---|---|
013877298d | |||
c87f95f991 | |||
f28aeff264 | |||
242a14a1f6 | |||
d5d904ff7d | |||
5620f840f6 | |||
b7a8c1bcfa | |||
7097bffba6 | |||
2aec7256ae | |||
815482d4eb | |||
9c14fe9681 | |||
e705755739 | |||
019f4c0371 | |||
2ab2c0d1f5 | |||
caf0f5f8b7 | |||
e4e50f60b1 | |||
6ef3e65a7b | |||
6fd138bed8 | |||
49bd8d5e2e | |||
3d2c2752c5 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04.1**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.07**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.02.04.1
|
||||
[debug] youtube-dl version 2017.02.07
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -6,6 +6,7 @@ python:
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
sudo: false
|
||||
script: nosetests test --verbose
|
||||
notifications:
|
||||
|
19
ChangeLog
19
ChangeLog
@ -1,3 +1,22 @@
|
||||
version 2017.02.07
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
|
||||
+ [downloader/fragment] Respect --no-part
|
||||
* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
|
||||
|
||||
Extractors
|
||||
* [pornhub] Fix extraction (#11997)
|
||||
+ [canalplus] Add support for cstar.fr (#11990)
|
||||
+ [extractor/generic] Improve RTMP support (#11993)
|
||||
+ [gaskrank] Add support for gaskrank.tv (#11685)
|
||||
* [bandcamp] Fix extraction for incomplete albums (#11727)
|
||||
* [iwara] Fix extraction (#11781)
|
||||
* [googledrive] Fix extraction on Python 3.6
|
||||
+ [videopress] Add support for videopress.com
|
||||
+ [afreecatv] Extract RTMP formats
|
||||
|
||||
|
||||
version 2017.02.04.1
|
||||
|
||||
Extractors
|
||||
|
@ -282,6 +282,7 @@
|
||||
- **Gamersyde**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gaskrank**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
@ -859,6 +860,7 @@
|
||||
- **videomore:season**
|
||||
- **videomore:video**
|
||||
- **VideoPremium**
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **vidme**
|
||||
|
@ -61,6 +61,7 @@ class FragmentFD(FileDownloader):
|
||||
'noprogress': True,
|
||||
'ratelimit': self.params.get('ratelimit'),
|
||||
'retries': self.params.get('retries', 0),
|
||||
'nopart': self.params.get('nopart', False),
|
||||
'test': self.params.get('test', False),
|
||||
}
|
||||
)
|
||||
|
@ -221,10 +221,23 @@ class AfreecaTVGlobalIE(AfreecaTVIE):
|
||||
s_url = s.get('purl')
|
||||
if not s_url:
|
||||
continue
|
||||
# TODO: extract rtmp formats
|
||||
if s.get('stype') == 'HLS':
|
||||
stype = s.get('stype')
|
||||
if stype == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
s_url, channel_id, 'mp4', fatal=False))
|
||||
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
|
||||
elif stype == 'RTMP':
|
||||
format_id = [stype]
|
||||
label = s.get('label')
|
||||
if label:
|
||||
format_id.append(label)
|
||||
formats.append({
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': s_url,
|
||||
'tbr': int_or_none(s.get('bps')),
|
||||
'height': int_or_none(s.get('brt')),
|
||||
'ext': 'flv',
|
||||
'rtmp_live': True,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
|
@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'id': 'entropy-ep',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# not all tracks have songs
|
||||
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
|
||||
'info_dict': {
|
||||
'id': 'we-are-the-plague',
|
||||
'title': 'WE ARE THE PLAGUE',
|
||||
'uploader_id': 'insulters',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
album_id = mobj.group('album_id')
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
||||
if not tracks_paths:
|
||||
track_elements = re.findall(
|
||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||
if not track_elements:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
# Only tracks with duration info have songs
|
||||
entries = [
|
||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||
for t_path in tracks_paths]
|
||||
for elem_content, t_path in track_elements
|
||||
if self._html_search_meta('duration', elem_content, default=None)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
|
@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
|
||||
(?:www\.)?d8\.tv|
|
||||
(?:www\.)?c8\.fr|
|
||||
(?:www\.)?d17\.tv|
|
||||
(?:(?:football|www)\.)?cstar\.fr|
|
||||
(?:www\.)?itele\.fr
|
||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||
@ -40,6 +41,7 @@ class CanalplusIE(InfoExtractor):
|
||||
'd8': 'd8',
|
||||
'c8': 'd8',
|
||||
'd17': 'd17',
|
||||
'cstar': 'd17',
|
||||
'itele': 'itele',
|
||||
}
|
||||
|
||||
@ -86,6 +88,19 @@ class CanalplusIE(InfoExtractor):
|
||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||
'upload_date': '20161014',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
|
||||
'info_dict': {
|
||||
'id': '1416769',
|
||||
'display_id': 'pid7566-feminines-videos',
|
||||
'ext': 'mp4',
|
||||
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
|
||||
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
|
||||
'upload_date': '20160921',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||
'only_matching': True,
|
||||
|
@ -1315,8 +1315,8 @@ class InfoExtractor(object):
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
|
||||
# TODO: update acodec for for audio only formats with the same GROUP-ID
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
|
||||
# TODO: update acodec for audio only formats with the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
@ -1959,7 +1959,12 @@ class InfoExtractor(object):
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
||||
media_tags.extend(re.findall(
|
||||
# We only allow video|audio followed by a whitespace or '>'.
|
||||
# Allowing more characters may end up in significant slow down (see
|
||||
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
|
||||
# http://www.porntrex.com/maps/videositemap.xml).
|
||||
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
|
@ -349,6 +349,7 @@ from .gameone import (
|
||||
from .gamersyde import GamersydeIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gaskrank import GaskrankIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
@ -1095,6 +1096,7 @@ from .videomore import (
|
||||
VideomoreSeasonIE,
|
||||
)
|
||||
from .videopremium import VideoPremiumIE
|
||||
from .videopress import VideoPressIE
|
||||
from .vidio import VidioIE
|
||||
from .vidme import (
|
||||
VidmeIE,
|
||||
|
123
youtube_dl/extractor/gaskrank.py
Normal file
123
youtube_dl/extractor/gaskrank.py
Normal file
@ -0,0 +1,123 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class GaskrankIE(InfoExtractor):
|
||||
"""InfoExtractor for gaskrank.tv"""
|
||||
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||
'info_dict': {
|
||||
'id': '201601/26955',
|
||||
'ext': 'mp4',
|
||||
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'categories': ['motorrad-fun'],
|
||||
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||
'uploader_id': 'Bikefun',
|
||||
'upload_date': '20170110',
|
||||
'uploader_url': None,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||
'info_dict': {
|
||||
'id': '201106/15920',
|
||||
'ext': 'mp4',
|
||||
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'categories': ['racing'],
|
||||
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||
'uploader_id': 'IOM',
|
||||
'upload_date': '20160506',
|
||||
'uploader_url': 'www.iomtt.com',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
"""extract information from gaskrank.tv"""
|
||||
def fix_json(code):
|
||||
"""Removes trailing comma in json: {{},} --> {{}}"""
|
||||
return re.sub(r',\s*}', r'}', js_to_json(code))
|
||||
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||
title = self._search_regex(
|
||||
r'movieName\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*:\s*\'([^\']*)\'',
|
||||
webpage, 'thumbnail', default=None)
|
||||
|
||||
mobj = re.search(
|
||||
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
uploader_id = mobj.groupdict().get('uploader_id')
|
||||
upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
|
||||
|
||||
uploader_url = self._search_regex(
|
||||
r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
|
||||
webpage, 'uploader_url', default=None)
|
||||
tags = re.findall(
|
||||
r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
|
||||
webpage)
|
||||
|
||||
view_count = self._search_regex(
|
||||
r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
|
||||
webpage, 'view_count', default=None)
|
||||
if view_count:
|
||||
view_count = int_or_none(view_count.replace('.', ''))
|
||||
|
||||
average_rating = self._search_regex(
|
||||
r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
|
||||
webpage, 'average_rating')
|
||||
if average_rating:
|
||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playlist\s*:\s*\[([^\]]*)\]',
|
||||
webpage, 'playlist', default='{}'),
|
||||
display_id, transform_source=fix_json, fatal=False)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||
playlist.get('0').get('src'), 'video id')
|
||||
|
||||
formats = []
|
||||
for key in playlist:
|
||||
formats.append({
|
||||
'url': playlist[key]['src'],
|
||||
'format_id': key,
|
||||
'quality': playlist[key].get('quality')})
|
||||
self._sort_formats(formats, field_preference=['format_id'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'display_id': display_id,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'uploader_url': uploader_url,
|
||||
'tags': tags,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
}
|
@ -29,6 +29,7 @@ from ..utils import (
|
||||
UnsupportedError,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
@ -81,6 +82,7 @@ from .videa import VideaIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .ustream import UstreamIE
|
||||
from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -946,6 +948,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||
},
|
||||
},
|
||||
# jwplayer rtmp
|
||||
{
|
||||
'url': 'http://www.suffolk.edu/sjc/',
|
||||
'info_dict': {
|
||||
'id': 'sjclive',
|
||||
'ext': 'flv',
|
||||
'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
|
||||
'uploader': 'www.suffolk.edu',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
@ -1473,6 +1488,21 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TwentyMinutenIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# VideoPress embed
|
||||
'url': 'https://en.support.wordpress.com/videopress/',
|
||||
'info_dict': {
|
||||
'id': 'OcobLTqC',
|
||||
'ext': 'm4v',
|
||||
'title': 'IMG_5786',
|
||||
'timestamp': 1435711927,
|
||||
'upload_date': '20150701',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [VideoPressIE.ie_key()],
|
||||
}
|
||||
# {
|
||||
# # TODO: find another test
|
||||
@ -2438,6 +2468,12 @@ class GenericIE(InfoExtractor):
|
||||
return _playlist_from_matches(
|
||||
openload_urls, ie=OpenloadIE.ie_key())
|
||||
|
||||
# Look for VideoPress embeds
|
||||
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||
if videopress_urls:
|
||||
return _playlist_from_matches(
|
||||
videopress_urls, ie=VideoPressIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
@ -2465,6 +2501,8 @@ class GenericIE(InfoExtractor):
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
if RtmpIE.suitable(vurl):
|
||||
return True
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
vext = determine_ext(vpath)
|
||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||
@ -2572,6 +2610,15 @@ class GenericIE(InfoExtractor):
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
if RtmpIE.suitable(video_url):
|
||||
entry_info_dict.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': RtmpIE.ie_key(),
|
||||
'url': video_url,
|
||||
})
|
||||
entries.append(entry_info_dict)
|
||||
continue
|
||||
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'smil':
|
||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
)
|
||||
|
||||
|
||||
@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
|
||||
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||
'info_dict': {
|
||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 46,
|
||||
'duration': 45,
|
||||
}
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
|
||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
resolution = fmt.split('/')[1]
|
||||
width, height = resolution.split('x')
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
'url': lowercase_escape(fmt_url),
|
||||
'format_id': fmt_id,
|
||||
'resolution': resolution,
|
||||
'width': int_or_none(width),
|
||||
|
@ -3,14 +3,18 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import remove_end
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class IwaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': 'amVwUl1EHpAD9RD',
|
||||
'ext': 'mp4',
|
||||
@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||
'ext': 'mp4',
|
||||
'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
|
||||
'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['GoogleDrive'],
|
||||
}, {
|
||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||
'md5': '1d85f1e5217d2791626cff5ec83bb189',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '6liAP9s2Ojc',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'age_limit': 18,
|
||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||
'upload_date': '20160910',
|
||||
@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor):
|
||||
# ecchi is 'sexy' in Japanese
|
||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
|
||||
|
||||
if not entries:
|
||||
if not video_data:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||
webpage, 'iframe URL', group='url')
|
||||
@ -67,11 +71,25 @@ class IwaraIE(InfoExtractor):
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||
|
||||
info_dict = entries[0]
|
||||
info_dict.update({
|
||||
formats = []
|
||||
for a_format in video_data:
|
||||
format_id = a_format.get('resolution')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)p', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': a_format['uri'],
|
||||
'format_id': format_id,
|
||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||
'height': height,
|
||||
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
||||
'quality': 1 if format_id == 'Source' else 0,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -156,7 +156,12 @@ class PornHubIE(InfoExtractor):
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))
|
||||
video_urls = []
|
||||
for quote, video_url in re.findall(
|
||||
r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
|
||||
video_urls.append(compat_urllib_parse_unquote(re.sub(
|
||||
r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
|
||||
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
password = compat_urllib_parse_unquote_plus(
|
||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||
|
@ -4,11 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils import js_to_json
|
||||
|
||||
|
||||
class SportBoxEmbedIE(InfoExtractor):
|
||||
|
99
youtube_dl/extractor/videopress.py
Normal file
99
youtube_dl/extractor/videopress.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
parse_age_limit,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class VideoPressIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://videopress.com/embed/kUJmAcSf',
|
||||
'md5': '706956a6c875873d51010921310e4bc6',
|
||||
'info_dict': {
|
||||
'id': 'kUJmAcSf',
|
||||
'ext': 'mp4',
|
||||
'title': 'VideoPress Demo',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 634.6,
|
||||
'timestamp': 1434983935,
|
||||
'upload_date': '20150622',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# 17+, requires birth_* params
|
||||
'url': 'https://videopress.com/embed/iH3gstfZ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||
video_id, query={
|
||||
'birth_month': random.randint(1, 12),
|
||||
'birth_day': random.randint(1, 31),
|
||||
'birth_year': random.randint(1950, 1995),
|
||||
})
|
||||
|
||||
title = video['title']
|
||||
|
||||
def base_url(scheme):
|
||||
return try_get(
|
||||
video, lambda x: x['file_url_base'][scheme], compat_str)
|
||||
|
||||
base_url = base_url('https') or base_url('http')
|
||||
|
||||
QUALITIES = ('std', 'dvd', 'hd')
|
||||
quality = qualities(QUALITIES)
|
||||
|
||||
formats = []
|
||||
for format_id, f in video['files'].items():
|
||||
if not isinstance(f, dict):
|
||||
continue
|
||||
for ext, path in f.items():
|
||||
if ext in ('mp4', 'ogg'):
|
||||
formats.append({
|
||||
'url': urljoin(base_url, path),
|
||||
'format_id': '%s-%s' % (format_id, ext),
|
||||
'ext': determine_ext(path, ext),
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
original_url = try_get(video, lambda x: x['original'], compat_str)
|
||||
if original_url:
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': 'original',
|
||||
'quality': len(QUALITIES),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': video.get('poster'),
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'timestamp': unified_timestamp(video.get('upload_date')),
|
||||
'age_limit': parse_age_limit(video.get('rating')),
|
||||
'formats': formats,
|
||||
}
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.02.04.1'
|
||||
__version__ = '2017.02.07'
|
||||
|
Reference in New Issue
Block a user