Compare commits
20 Commits
2017.02.04
...
2017.02.07
Author | SHA1 | Date | |
---|---|---|---|
013877298d | |||
c87f95f991 | |||
f28aeff264 | |||
242a14a1f6 | |||
d5d904ff7d | |||
5620f840f6 | |||
b7a8c1bcfa | |||
7097bffba6 | |||
2aec7256ae | |||
815482d4eb | |||
9c14fe9681 | |||
e705755739 | |||
019f4c0371 | |||
2ab2c0d1f5 | |||
caf0f5f8b7 | |||
e4e50f60b1 | |||
6ef3e65a7b | |||
6fd138bed8 | |||
49bd8d5e2e | |||
3d2c2752c5 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.04.1**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.07**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2017.02.04.1
|
[debug] youtube-dl version 2017.02.07
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -6,6 +6,7 @@ python:
|
|||||||
- "3.3"
|
- "3.3"
|
||||||
- "3.4"
|
- "3.4"
|
||||||
- "3.5"
|
- "3.5"
|
||||||
|
- "3.6"
|
||||||
sudo: false
|
sudo: false
|
||||||
script: nosetests test --verbose
|
script: nosetests test --verbose
|
||||||
notifications:
|
notifications:
|
||||||
|
1
AUTHORS
1
AUTHORS
@ -201,3 +201,4 @@ Stephen Chen
|
|||||||
Fabian Stahl
|
Fabian Stahl
|
||||||
Bagira
|
Bagira
|
||||||
Odd Stråbø
|
Odd Stråbø
|
||||||
|
Philip Herzog
|
||||||
|
19
ChangeLog
19
ChangeLog
@ -1,3 +1,22 @@
|
|||||||
|
version 2017.02.07
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Fix audio only with audio group in m3u8 (#11995)
|
||||||
|
+ [downloader/fragment] Respect --no-part
|
||||||
|
* [extractor/common] Speed-up HTML5 media entries extraction (#11979)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [pornhub] Fix extraction (#11997)
|
||||||
|
+ [canalplus] Add support for cstar.fr (#11990)
|
||||||
|
+ [extractor/generic] Improve RTMP support (#11993)
|
||||||
|
+ [gaskrank] Add support for gaskrank.tv (#11685)
|
||||||
|
* [bandcamp] Fix extraction for incomplete albums (#11727)
|
||||||
|
* [iwara] Fix extraction (#11781)
|
||||||
|
* [googledrive] Fix extraction on Python 3.6
|
||||||
|
+ [videopress] Add support for videopress.com
|
||||||
|
+ [afreecatv] Extract RTMP formats
|
||||||
|
|
||||||
|
|
||||||
version 2017.02.04.1
|
version 2017.02.04.1
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -282,6 +282,7 @@
|
|||||||
- **Gamersyde**
|
- **Gamersyde**
|
||||||
- **GameSpot**
|
- **GameSpot**
|
||||||
- **GameStar**
|
- **GameStar**
|
||||||
|
- **Gaskrank**
|
||||||
- **Gazeta**
|
- **Gazeta**
|
||||||
- **GDCVault**
|
- **GDCVault**
|
||||||
- **generic**: Generic downloader that works on some sites
|
- **generic**: Generic downloader that works on some sites
|
||||||
@ -859,6 +860,7 @@
|
|||||||
- **videomore:season**
|
- **videomore:season**
|
||||||
- **videomore:video**
|
- **videomore:video**
|
||||||
- **VideoPremium**
|
- **VideoPremium**
|
||||||
|
- **VideoPress**
|
||||||
- **videoweed**: VideoWeed
|
- **videoweed**: VideoWeed
|
||||||
- **Vidio**
|
- **Vidio**
|
||||||
- **vidme**
|
- **vidme**
|
||||||
|
@ -61,6 +61,7 @@ class FragmentFD(FileDownloader):
|
|||||||
'noprogress': True,
|
'noprogress': True,
|
||||||
'ratelimit': self.params.get('ratelimit'),
|
'ratelimit': self.params.get('ratelimit'),
|
||||||
'retries': self.params.get('retries', 0),
|
'retries': self.params.get('retries', 0),
|
||||||
|
'nopart': self.params.get('nopart', False),
|
||||||
'test': self.params.get('test', False),
|
'test': self.params.get('test', False),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -221,10 +221,23 @@ class AfreecaTVGlobalIE(AfreecaTVIE):
|
|||||||
s_url = s.get('purl')
|
s_url = s.get('purl')
|
||||||
if not s_url:
|
if not s_url:
|
||||||
continue
|
continue
|
||||||
# TODO: extract rtmp formats
|
stype = s.get('stype')
|
||||||
if s.get('stype') == 'HLS':
|
if stype == 'HLS':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
s_url, channel_id, 'mp4', fatal=False))
|
s_url, channel_id, 'mp4', m3u8_id=stype, fatal=False))
|
||||||
|
elif stype == 'RTMP':
|
||||||
|
format_id = [stype]
|
||||||
|
label = s.get('label')
|
||||||
|
if label:
|
||||||
|
format_id.append(label)
|
||||||
|
formats.append({
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'url': s_url,
|
||||||
|
'tbr': int_or_none(s.get('bps')),
|
||||||
|
'height': int_or_none(s.get('brt')),
|
||||||
|
'ext': 'flv',
|
||||||
|
'rtmp_live': True,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info.update({
|
info.update({
|
||||||
|
@ -209,6 +209,15 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'id': 'entropy-ep',
|
'id': 'entropy-ep',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
# not all tracks have songs
|
||||||
|
'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'we-are-the-plague',
|
||||||
|
'title': 'WE ARE THE PLAGUE',
|
||||||
|
'uploader_id': 'insulters',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -217,12 +226,16 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
album_id = mobj.group('album_id')
|
album_id = mobj.group('album_id')
|
||||||
playlist_id = album_id or uploader_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage)
|
track_elements = re.findall(
|
||||||
if not tracks_paths:
|
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||||
|
if not track_elements:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key())
|
||||||
for t_path in tracks_paths]
|
for elem_content, t_path in track_elements
|
||||||
|
if self._html_search_meta('duration', elem_content, default=None)]
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||||
webpage, 'title', fatal=False)
|
webpage, 'title', fatal=False)
|
||||||
|
@ -27,6 +27,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
(?:www\.)?d8\.tv|
|
(?:www\.)?d8\.tv|
|
||||||
(?:www\.)?c8\.fr|
|
(?:www\.)?c8\.fr|
|
||||||
(?:www\.)?d17\.tv|
|
(?:www\.)?d17\.tv|
|
||||||
|
(?:(?:football|www)\.)?cstar\.fr|
|
||||||
(?:www\.)?itele\.fr
|
(?:www\.)?itele\.fr
|
||||||
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
)/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?|
|
||||||
player\.canalplus\.fr/#/(?P<id>\d+)
|
player\.canalplus\.fr/#/(?P<id>\d+)
|
||||||
@ -40,6 +41,7 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'd8': 'd8',
|
'd8': 'd8',
|
||||||
'c8': 'd8',
|
'c8': 'd8',
|
||||||
'd17': 'd17',
|
'd17': 'd17',
|
||||||
|
'cstar': 'd17',
|
||||||
'itele': 'itele',
|
'itele': 'itele',
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,6 +88,19 @@ class CanalplusIE(InfoExtractor):
|
|||||||
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
|
||||||
'upload_date': '20161014',
|
'upload_date': '20161014',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://football.cstar.fr/cstar-minisite-foot/pid7566-feminines-videos.html?vid=1416769',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1416769',
|
||||||
|
'display_id': 'pid7566-feminines-videos',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'France - Albanie : les temps forts de la soirée - 20/09/2016',
|
||||||
|
'description': 'md5:c3f30f2aaac294c1c969b3294de6904e',
|
||||||
|
'upload_date': '20160921',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://m.canalplus.fr/?vid=1398231',
|
'url': 'http://m.canalplus.fr/?vid=1398231',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -1315,8 +1315,8 @@ class InfoExtractor(object):
|
|||||||
'abr': abr,
|
'abr': abr,
|
||||||
})
|
})
|
||||||
f.update(parse_codecs(last_info.get('CODECS')))
|
f.update(parse_codecs(last_info.get('CODECS')))
|
||||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False:
|
if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
|
||||||
# TODO: update acodec for for audio only formats with the same GROUP-ID
|
# TODO: update acodec for audio only formats with the same GROUP-ID
|
||||||
f['acodec'] = 'none'
|
f['acodec'] = 'none'
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
last_info = {}
|
last_info = {}
|
||||||
@ -1959,7 +1959,12 @@ class InfoExtractor(object):
|
|||||||
media_tags = [(media_tag, media_type, '')
|
media_tags = [(media_tag, media_type, '')
|
||||||
for media_tag, media_type
|
for media_tag, media_type
|
||||||
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
|
||||||
media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
|
media_tags.extend(re.findall(
|
||||||
|
# We only allow video|audio followed by a whitespace or '>'.
|
||||||
|
# Allowing more characters may end up in significant slow down (see
|
||||||
|
# https://github.com/rg3/youtube-dl/issues/11979, example URL:
|
||||||
|
# http://www.porntrex.com/maps/videositemap.xml).
|
||||||
|
r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||||
for media_tag, media_type, media_content in media_tags:
|
for media_tag, media_type, media_content in media_tags:
|
||||||
media_info = {
|
media_info = {
|
||||||
'formats': [],
|
'formats': [],
|
||||||
|
@ -349,6 +349,7 @@ from .gameone import (
|
|||||||
from .gamersyde import GamersydeIE
|
from .gamersyde import GamersydeIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
|
from .gaskrank import GaskrankIE
|
||||||
from .gazeta import GazetaIE
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
@ -1095,6 +1096,7 @@ from .videomore import (
|
|||||||
VideomoreSeasonIE,
|
VideomoreSeasonIE,
|
||||||
)
|
)
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
|
from .videopress import VideoPressIE
|
||||||
from .vidio import VidioIE
|
from .vidio import VidioIE
|
||||||
from .vidme import (
|
from .vidme import (
|
||||||
VidmeIE,
|
VidmeIE,
|
||||||
|
123
youtube_dl/extractor/gaskrank.py
Normal file
123
youtube_dl/extractor/gaskrank.py
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GaskrankIE(InfoExtractor):
|
||||||
|
"""InfoExtractor for gaskrank.tv"""
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gaskrank\.tv/tv/(?P<categories>[^/]+)/(?P<id>[^/]+)\.html?'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.gaskrank.tv/tv/motorrad-fun/strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden.htm',
|
||||||
|
'md5': '1ae88dbac97887d85ebd1157a95fc4f9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201601/26955',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Strike! Einparken können nur Männer - Flurschaden hält sich in Grenzen *lol*',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'categories': ['motorrad-fun'],
|
||||||
|
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
|
||||||
|
'uploader_id': 'Bikefun',
|
||||||
|
'upload_date': '20170110',
|
||||||
|
'uploader_url': None,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',
|
||||||
|
'md5': 'c33ee32c711bc6c8224bfcbe62b23095',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201106/15920',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Isle of Man - Michael Dunlop vs Guy Martin - schwindelig kucken',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'categories': ['racing'],
|
||||||
|
'display_id': 'isle-of-man-tt-2011-michael-du-15920',
|
||||||
|
'uploader_id': 'IOM',
|
||||||
|
'upload_date': '20160506',
|
||||||
|
'uploader_url': 'www.iomtt.com',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
"""extract information from gaskrank.tv"""
|
||||||
|
def fix_json(code):
|
||||||
|
"""Removes trailing comma in json: {{},} --> {{}}"""
|
||||||
|
return re.sub(r',\s*}', r'}', js_to_json(code))
|
||||||
|
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
categories = [re.match(self._VALID_URL, url).group('categories')]
|
||||||
|
title = self._search_regex(
|
||||||
|
r'movieName\s*:\s*\'([^\']*)\'',
|
||||||
|
webpage, 'title')
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'poster\s*:\s*\'([^\']*)\'',
|
||||||
|
webpage, 'thumbnail', default=None)
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
uploader_id = mobj.groupdict().get('uploader_id')
|
||||||
|
upload_date = unified_strdate(mobj.groupdict().get('upload_date'))
|
||||||
|
|
||||||
|
uploader_url = self._search_regex(
|
||||||
|
r'Homepage:\s*<[^>]*>(?P<uploader_url>[^<]*)',
|
||||||
|
webpage, 'uploader_url', default=None)
|
||||||
|
tags = re.findall(
|
||||||
|
r'/tv/tags/[^/]+/"\s*>(?P<tag>[^<]*?)<',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
view_count = self._search_regex(
|
||||||
|
r'class\s*=\s*"gkRight"(?:[^>]*>\s*<[^>]*)*icon-eye-open(?:[^>]*>\s*<[^>]*)*>\s*(?P<view_count>[0-9\.]*)',
|
||||||
|
webpage, 'view_count', default=None)
|
||||||
|
if view_count:
|
||||||
|
view_count = int_or_none(view_count.replace('.', ''))
|
||||||
|
|
||||||
|
average_rating = self._search_regex(
|
||||||
|
r'itemprop\s*=\s*"ratingValue"[^>]*>\s*(?P<average_rating>[0-9,]+)',
|
||||||
|
webpage, 'average_rating')
|
||||||
|
if average_rating:
|
||||||
|
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||||
|
|
||||||
|
playlist = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'playlist\s*:\s*\[([^\]]*)\]',
|
||||||
|
webpage, 'playlist', default='{}'),
|
||||||
|
display_id, transform_source=fix_json, fatal=False)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'https?://movies\.gaskrank\.tv/([^-]*?)(-[^\.]*)?\.mp4',
|
||||||
|
playlist.get('0').get('src'), 'video id')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for key in playlist:
|
||||||
|
formats.append({
|
||||||
|
'url': playlist[key]['src'],
|
||||||
|
'format_id': key,
|
||||||
|
'quality': playlist[key].get('quality')})
|
||||||
|
self._sort_formats(formats, field_preference=['format_id'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'display_id': display_id,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader_url': uploader_url,
|
||||||
|
'tags': tags,
|
||||||
|
'view_count': view_count,
|
||||||
|
'average_rating': average_rating,
|
||||||
|
}
|
@ -29,6 +29,7 @@ from ..utils import (
|
|||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
from .commonprotocols import RtmpIE
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
BrightcoveLegacyIE,
|
BrightcoveLegacyIE,
|
||||||
BrightcoveNewIE,
|
BrightcoveNewIE,
|
||||||
@ -81,6 +82,7 @@ from .videa import VideaIE
|
|||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
from .openload import OpenloadIE
|
from .openload import OpenloadIE
|
||||||
|
from .videopress import VideoPressIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -946,6 +948,19 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# jwplayer rtmp
|
||||||
|
{
|
||||||
|
'url': 'http://www.suffolk.edu/sjc/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sjclive',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
|
||||||
|
'uploader': 'www.suffolk.edu',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
# rtl.nl embed
|
# rtl.nl embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||||
@ -1473,6 +1488,21 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [TwentyMinutenIE.ie_key()],
|
'add_ie': [TwentyMinutenIE.ie_key()],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# VideoPress embed
|
||||||
|
'url': 'https://en.support.wordpress.com/videopress/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OcobLTqC',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'title': 'IMG_5786',
|
||||||
|
'timestamp': 1435711927,
|
||||||
|
'upload_date': '20150701',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': [VideoPressIE.ie_key()],
|
||||||
}
|
}
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
@ -2438,6 +2468,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
openload_urls, ie=OpenloadIE.ie_key())
|
openload_urls, ie=OpenloadIE.ie_key())
|
||||||
|
|
||||||
|
# Look for VideoPress embeds
|
||||||
|
videopress_urls = VideoPressIE._extract_urls(webpage)
|
||||||
|
if videopress_urls:
|
||||||
|
return _playlist_from_matches(
|
||||||
|
videopress_urls, ie=VideoPressIE.ie_key())
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
@ -2465,6 +2501,8 @@ class GenericIE(InfoExtractor):
|
|||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
if YoutubeIE.suitable(vurl):
|
if YoutubeIE.suitable(vurl):
|
||||||
return True
|
return True
|
||||||
|
if RtmpIE.suitable(vurl):
|
||||||
|
return True
|
||||||
vpath = compat_urlparse.urlparse(vurl).path
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
vext = determine_ext(vpath)
|
vext = determine_ext(vpath)
|
||||||
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
|
||||||
@ -2572,6 +2610,15 @@ class GenericIE(InfoExtractor):
|
|||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if RtmpIE.suitable(video_url):
|
||||||
|
entry_info_dict.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': RtmpIE.ie_key(),
|
||||||
|
'url': video_url,
|
||||||
|
})
|
||||||
|
entries.append(entry_info_dict)
|
||||||
|
continue
|
||||||
|
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'smil':
|
if ext == 'smil':
|
||||||
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
|
||||||
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
lowercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||||
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
|
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny.mp4',
|
'title': 'Big Buck Bunny.mp4',
|
||||||
'duration': 46,
|
'duration': 45,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
|
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||||
|
|
||||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||||
if reason:
|
if reason:
|
||||||
@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
resolution = fmt.split('/')[1]
|
resolution = fmt.split('/')[1]
|
||||||
width, height = resolution.split('x')
|
width, height = resolution.split('x')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': fmt_url,
|
'url': lowercase_escape(fmt_url),
|
||||||
'format_id': fmt_id,
|
'format_id': fmt_id,
|
||||||
'resolution': resolution,
|
'resolution': resolution,
|
||||||
'width': int_or_none(width),
|
'width': int_or_none(width),
|
||||||
|
@ -3,14 +3,18 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlparse
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
from ..utils import remove_end
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class IwaraIE(InfoExtractor):
|
class IwaraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||||
'md5': '1d53866b2c514b23ed69e4352fdc9839',
|
# md5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'amVwUl1EHpAD9RD',
|
'id': 'amVwUl1EHpAD9RD',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -23,17 +27,17 @@ class IwaraIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4',
|
'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'add_ie': ['GoogleDrive'],
|
'add_ie': ['GoogleDrive'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||||
'md5': '1d85f1e5217d2791626cff5ec83bb189',
|
# md5 is unstable
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6liAP9s2Ojc',
|
'id': '6liAP9s2Ojc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 0,
|
'age_limit': 18,
|
||||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||||
'upload_date': '20160910',
|
'upload_date': '20160910',
|
||||||
@ -52,9 +56,9 @@ class IwaraIE(InfoExtractor):
|
|||||||
# ecchi is 'sexy' in Japanese
|
# ecchi is 'sexy' in Japanese
|
||||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
|
||||||
|
|
||||||
if not entries:
|
if not video_data:
|
||||||
iframe_url = self._html_search_regex(
|
iframe_url = self._html_search_regex(
|
||||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||||
webpage, 'iframe URL', group='url')
|
webpage, 'iframe URL', group='url')
|
||||||
@ -67,11 +71,25 @@ class IwaraIE(InfoExtractor):
|
|||||||
title = remove_end(self._html_search_regex(
|
title = remove_end(self._html_search_regex(
|
||||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||||
|
|
||||||
info_dict = entries[0]
|
formats = []
|
||||||
info_dict.update({
|
for a_format in video_data:
|
||||||
|
format_id = a_format.get('resolution')
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'(\d+)p', format_id, 'height', default=None))
|
||||||
|
formats.append({
|
||||||
|
'url': a_format['uri'],
|
||||||
|
'format_id': format_id,
|
||||||
|
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||||
|
'height': height,
|
||||||
|
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
||||||
|
'quality': 1 if format_id == 'Source' else 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
})
|
'formats': formats,
|
||||||
|
}
|
||||||
return info_dict
|
|
||||||
|
@ -156,7 +156,12 @@ class PornHubIE(InfoExtractor):
|
|||||||
comment_count = self._extract_count(
|
comment_count = self._extract_count(
|
||||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage)))
|
video_urls = []
|
||||||
|
for quote, video_url in re.findall(
|
||||||
|
r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
|
||||||
|
video_urls.append(compat_urllib_parse_unquote(re.sub(
|
||||||
|
r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
|
||||||
|
|
||||||
if webpage.find('"encrypted":true') != -1:
|
if webpage.find('"encrypted":true') != -1:
|
||||||
password = compat_urllib_parse_unquote_plus(
|
password = compat_urllib_parse_unquote_plus(
|
||||||
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
self._search_regex(r'"video_title":"([^"]+)', webpage, 'password'))
|
||||||
|
@ -4,11 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..utils import js_to_json
|
||||||
from ..utils import (
|
|
||||||
js_to_json,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SportBoxEmbedIE(InfoExtractor):
|
class SportBoxEmbedIE(InfoExtractor):
|
||||||
|
99
youtube_dl/extractor/videopress.py
Normal file
99
youtube_dl/extractor/videopress.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
parse_age_limit,
|
||||||
|
qualities,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VideoPressIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://videopress.com/embed/kUJmAcSf',
|
||||||
|
'md5': '706956a6c875873d51010921310e4bc6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kUJmAcSf',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'VideoPress Demo',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 634.6,
|
||||||
|
'timestamp': 1434983935,
|
||||||
|
'upload_date': '20150622',
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# 17+, requires birth_* params
|
||||||
|
'url': 'https://videopress.com/embed/iH3gstfZ',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||||
|
video_id, query={
|
||||||
|
'birth_month': random.randint(1, 12),
|
||||||
|
'birth_day': random.randint(1, 31),
|
||||||
|
'birth_year': random.randint(1950, 1995),
|
||||||
|
})
|
||||||
|
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
def base_url(scheme):
|
||||||
|
return try_get(
|
||||||
|
video, lambda x: x['file_url_base'][scheme], compat_str)
|
||||||
|
|
||||||
|
base_url = base_url('https') or base_url('http')
|
||||||
|
|
||||||
|
QUALITIES = ('std', 'dvd', 'hd')
|
||||||
|
quality = qualities(QUALITIES)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, f in video['files'].items():
|
||||||
|
if not isinstance(f, dict):
|
||||||
|
continue
|
||||||
|
for ext, path in f.items():
|
||||||
|
if ext in ('mp4', 'ogg'):
|
||||||
|
formats.append({
|
||||||
|
'url': urljoin(base_url, path),
|
||||||
|
'format_id': '%s-%s' % (format_id, ext),
|
||||||
|
'ext': determine_ext(path, ext),
|
||||||
|
'quality': quality(format_id),
|
||||||
|
})
|
||||||
|
original_url = try_get(video, lambda x: x['original'], compat_str)
|
||||||
|
if original_url:
|
||||||
|
formats.append({
|
||||||
|
'url': original_url,
|
||||||
|
'format_id': 'original',
|
||||||
|
'quality': len(QUALITIES),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'thumbnail': video.get('poster'),
|
||||||
|
'duration': float_or_none(video.get('duration'), 1000),
|
||||||
|
'timestamp': unified_timestamp(video.get('upload_date')),
|
||||||
|
'age_limit': parse_age_limit(video.get('rating')),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2017.02.04.1'
|
__version__ = '2017.02.07'
|
||||||
|
Reference in New Issue
Block a user