Compare commits
51 Commits
2015.02.11
...
2015.02.17
Author | SHA1 | Date | |
---|---|---|---|
|
46312e0b46 | ||
|
f9216ed6ad | ||
|
65bf37ef83 | ||
|
f740fae2a4 | ||
|
fbc503d696 | ||
|
662435f728 | ||
|
163d966707 | ||
|
85729c51af | ||
|
1db5fbcfe3 | ||
|
59b8ab5834 | ||
|
a568180441 | ||
|
85e80f71cd | ||
|
bfa6bdcd8b | ||
|
03cd72b007 | ||
|
5bfd430f81 | ||
|
73fac4e911 | ||
|
8fb474fb17 | ||
|
f813928e4b | ||
|
b9c7a97318 | ||
|
9fb2f1cd6d | ||
|
6ca7732d5e | ||
|
b0ab0fac49 | ||
|
a294bce82f | ||
|
76d1466b08 | ||
|
1888d3f7b3 | ||
|
c2787701cc | ||
|
52e1d0ccc4 | ||
|
10e3c4c221 | ||
|
68f2d273bf | ||
|
7c86c21662 | ||
|
ae1580d790 | ||
|
3215c50f25 | ||
|
36f73e8044 | ||
|
a4f3d779db | ||
|
d9aa2b784d | ||
|
cffcbc02de | ||
|
9347fddbfc | ||
|
037e9437e4 | ||
|
36e7a4ca2e | ||
|
ae6423d704 | ||
|
7105440cec | ||
|
c80b9cd280 | ||
|
171ca612af | ||
|
c3d64fc1b3 | ||
|
7c24ce225d | ||
|
08b38d5401 | ||
|
024c53694d | ||
|
7e6011101f | ||
|
c40feaba77 | ||
|
8367d3f3cb | ||
|
8708d76425 |
1
AUTHORS
1
AUTHORS
@@ -110,3 +110,4 @@ Shaya Goldberg
|
||||
Paul Hartmann
|
||||
Frans de Jonge
|
||||
Robin de Rooij
|
||||
Ryan Schmidt
|
||||
|
@@ -161,6 +161,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize (experimental) set file xattribute
|
||||
ytdl.filesize with expected filesize
|
||||
--hls-prefer-native (experimental) Use the native HLS
|
||||
downloader instead of ffmpeg.
|
||||
--external-downloader COMMAND (experimental) Use the specified external
|
||||
downloader. Currently supports
|
||||
aria2c,curl,wget
|
||||
@@ -397,6 +399,9 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading, similar to find's -exec
|
||||
syntax. Example: --exec 'adb push {}
|
||||
|
@@ -1,4 +1,5 @@
|
||||
# Supported sites
|
||||
- **1tv**: Первый канал
|
||||
- **1up.com**
|
||||
- **220.ro**
|
||||
- **24video**
|
||||
@@ -60,6 +61,8 @@
|
||||
- **Brightcove**
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **Canal13cl**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
|
||||
@@ -134,7 +137,6 @@
|
||||
- **fernsehkritik.tv:postecke**
|
||||
- **Firedrive**
|
||||
- **Firstpost**
|
||||
- **firsttv**: Видеоархив - Первый канал
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **Foxgay**
|
||||
@@ -174,6 +176,7 @@
|
||||
- **Helsinki**: helsinki.fi
|
||||
- **HentaiStigma**
|
||||
- **HistoricFilms**
|
||||
- **History**
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
- **HornBunny**
|
||||
@@ -287,6 +290,8 @@
|
||||
- **nowvideo**: NowVideo
|
||||
- **npo.nl**
|
||||
- **npo.nl:live**
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **NRK**
|
||||
- **NRKTV**
|
||||
- **ntv.ru**
|
||||
@@ -333,9 +338,9 @@
|
||||
- **Roxwel**
|
||||
- **RTBF**
|
||||
- **Rte**
|
||||
- **rtl.nl**: rtl.nl and rtlxl.nl
|
||||
- **RTL2**
|
||||
- **RTLnow**
|
||||
- **rtlxl.nl**
|
||||
- **RTP**
|
||||
- **RTS**: RTS.ch
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
@@ -527,6 +532,7 @@
|
||||
- **XVideos**
|
||||
- **XXXYMovies**
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **Yam**
|
||||
- **YesJapan**
|
||||
- **Ynet**
|
||||
- **YouJizz**
|
||||
|
@@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(len(subtitles.keys()), 5)
|
||||
self.assertTrue(len(subtitles.keys()) >= 6)
|
||||
|
||||
def test_list_subtitles(self):
|
||||
self.DL.expect_warning('Automatic Captions not supported by this server')
|
||||
@@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
|
||||
def test_subtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4')
|
||||
self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
|
||||
|
||||
def test_subtitles_lang(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
@@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['cs']))
|
||||
self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
|
||||
self.assertTrue(len(subtitles['cs']) > 20000)
|
||||
|
||||
def test_nosubtitles(self):
|
||||
self.DL.expect_warning('video doesn\'t have subtitles')
|
||||
|
@@ -225,7 +225,6 @@ class YoutubeDL(object):
|
||||
call_home: Boolean, true iff we are allowed to contact the
|
||||
youtube-dl servers for debugging.
|
||||
sleep_interval: Number of seconds to sleep before each download.
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
listformats: Print an overview of available video formats and exit.
|
||||
list_thumbnails: Print a table of all thumbnails and exit.
|
||||
match_filter: A function that gets called with the info_dict of
|
||||
@@ -235,6 +234,10 @@ class YoutubeDL(object):
|
||||
match_filter_func in utils.py is one example for this.
|
||||
no_color: Do not emit color codes in output.
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
None or unset for standard (built-in) downloader.
|
||||
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the FileDownloader:
|
||||
@@ -951,30 +954,9 @@ class YoutubeDL(object):
|
||||
return res
|
||||
|
||||
def _calc_cookies(self, info_dict):
|
||||
class _PseudoRequest(object):
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.headers = {}
|
||||
self.unverifiable = False
|
||||
|
||||
def add_unredirected_header(self, k, v):
|
||||
self.headers[k] = v
|
||||
|
||||
def get_full_url(self):
|
||||
return self.url
|
||||
|
||||
def is_unverifiable(self):
|
||||
return self.unverifiable
|
||||
|
||||
def has_header(self, h):
|
||||
return h in self.headers
|
||||
|
||||
def get_header(self, h, default=None):
|
||||
return self.headers.get(h, default)
|
||||
|
||||
pr = _PseudoRequest(info_dict['url'])
|
||||
pr = compat_urllib_request.Request(info_dict['url'])
|
||||
self.cookiejar.add_cookie_header(pr)
|
||||
return pr.headers.get('Cookie')
|
||||
return pr.get_header('Cookie')
|
||||
|
||||
def process_video_result(self, info_dict, download=True):
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
@@ -1298,7 +1280,7 @@ class YoutubeDL(object):
|
||||
downloaded = []
|
||||
success = True
|
||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||
if not merger._executable:
|
||||
if not merger.available:
|
||||
postprocessors = []
|
||||
self.report_warning('You have requested multiple '
|
||||
'formats but ffmpeg or avconv are not installed.'
|
||||
@@ -1647,7 +1629,7 @@ class YoutubeDL(object):
|
||||
self._write_string('[debug] Python version %s - %s\n' % (
|
||||
platform.python_version(), platform_name()))
|
||||
|
||||
exe_versions = FFmpegPostProcessor.get_versions()
|
||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
exe_str = ', '.join(
|
||||
'%s %s' % (exe, v)
|
||||
|
@@ -350,6 +350,8 @@ def _real_main(argv=None):
|
||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||
'match_filter': match_filter,
|
||||
'no_color': opts.no_color,
|
||||
'ffmpeg_location': opts.ffmpeg_location,
|
||||
'hls_prefer_native': opts.hls_prefer_native,
|
||||
}
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
|
@@ -34,6 +34,9 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
if ed.supports(info_dict):
|
||||
return ed
|
||||
|
||||
if protocol == 'm3u8' and params.get('hls_prefer_native'):
|
||||
return NativeHlsFD
|
||||
|
||||
return PROTOCOL_MAP.get(protocol, HttpFD)
|
||||
|
||||
|
||||
|
@@ -75,7 +75,7 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
class CurlFD(ExternalFD):
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-o', tmpfilename]
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', '%s: %s' % (key, val)]
|
||||
cmd += self._source_address('--interface')
|
||||
|
@@ -23,15 +23,14 @@ class HlsFD(FileDownloader):
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
ffpp = FFmpegPostProcessor(downloader=self)
|
||||
program = ffpp._executable
|
||||
if program is None:
|
||||
if not ffpp.available():
|
||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
||||
return False
|
||||
ffpp.check_version()
|
||||
|
||||
args = [
|
||||
encodeArgument(opt)
|
||||
for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
|
||||
args.append(encodeFilename(tmpfilename, True))
|
||||
|
||||
retval = subprocess.call(args)
|
||||
@@ -48,7 +47,7 @@ class HlsFD(FileDownloader):
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('%s exited with code %d' % (program, retval))
|
||||
self.report_error('%s exited with code %d' % (ffpp.basename, retval))
|
||||
return False
|
||||
|
||||
|
||||
|
@@ -49,6 +49,10 @@ from .brightcove import BrightcoveIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .canal13cl import Canal13clIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
@@ -185,6 +189,7 @@ from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .history import HistoryIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hornbunny import HornBunnyIE
|
||||
from .hostingbulk import HostingBulkIE
|
||||
@@ -314,6 +319,8 @@ from .nowvideo import NowVideoIE
|
||||
from .npo import (
|
||||
NPOIE,
|
||||
NPOLiveIE,
|
||||
NPORadioIE,
|
||||
NPORadioFragmentIE,
|
||||
TegenlichtVproIE,
|
||||
)
|
||||
from .nrk import (
|
||||
@@ -364,7 +371,7 @@ from .rottentomatoes import RottenTomatoesIE
|
||||
from .roxwel import RoxwelIE
|
||||
from .rtbf import RTBFIE
|
||||
from .rte import RteIE
|
||||
from .rtlnl import RtlXlIE
|
||||
from .rtlnl import RtlNlIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
@@ -572,6 +579,7 @@ from .yahoo import (
|
||||
YahooIE,
|
||||
YahooSearchIE,
|
||||
)
|
||||
from .yam import YamIE
|
||||
from .yesjapan import YesJapanIE
|
||||
from .ynet import YnetIE
|
||||
from .youjizz import YouJizzIE
|
||||
|
@@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor):
|
||||
'duration': int(info['length']),
|
||||
'view_count': int(info['views_total']),
|
||||
'uploader': info['username'],
|
||||
'uploader_id': info['uid'],
|
||||
'uploader_id': info['owner']['uid'],
|
||||
}
|
||||
|
||||
|
||||
|
@@ -273,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
|
@@ -9,7 +9,7 @@ class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': '634526ae978711f6b748fe0dd6c11f57',
|
||||
'md5': '1bff67111adb785c51d1b42959ec10e5',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'ext': 'mp4',
|
||||
|
153
youtube_dl/extractor/camdemy.py
Normal file
153
youtube_dl/extractor/camdemy.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class CamdemyIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# single file
|
||||
'url': 'http://www.camdemy.com/media/5181/',
|
||||
'md5': '5a5562b6a98b37873119102e052e311b',
|
||||
'info_dict': {
|
||||
'id': '5181',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': '',
|
||||
'creator': 'ss11spring',
|
||||
'upload_date': '20130114',
|
||||
'timestamp': 1358154556,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# With non-empty description
|
||||
'url': 'http://www.camdemy.com/media/13885',
|
||||
'md5': '4576a3bb2581f86c61044822adbd1249',
|
||||
'info_dict': {
|
||||
'id': '13885',
|
||||
'ext': 'mp4',
|
||||
'title': 'EverCam + Camdemy QuickStart',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
|
||||
'creator': 'evercam',
|
||||
'upload_date': '20140620',
|
||||
'timestamp': 1403271569,
|
||||
}
|
||||
}, {
|
||||
# External source
|
||||
'url': 'http://www.camdemy.com/media/14842',
|
||||
'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
|
||||
'info_dict': {
|
||||
'id': '2vsYQzNIsJo',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20130211',
|
||||
'uploader': 'Hun Kim',
|
||||
'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
|
||||
'uploader_id': 'hunkimtutorials',
|
||||
'title': 'Excel 2013 Tutorial - How to add Password Protection',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
src_from = self._html_search_regex(
|
||||
r"<div class='srcFrom'>Source: <a title='([^']+)'", page,
|
||||
'external source', default=None)
|
||||
if src_from:
|
||||
return self.url_result(src_from)
|
||||
|
||||
oembed_obj = self._download_json(
|
||||
'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
|
||||
|
||||
thumb_url = oembed_obj['thumbnail_url']
|
||||
video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
|
||||
file_list_doc = self._download_xml(
|
||||
compat_urlparse.urljoin(video_folder, 'fileList.xml'),
|
||||
video_id, 'Filelist XML')
|
||||
file_name = file_list_doc.find('./video/item/fileName').text
|
||||
video_url = compat_urlparse.urljoin(video_folder, file_name)
|
||||
|
||||
timestamp = parse_iso8601(self._html_search_regex(
|
||||
r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'creation time', fatal=False),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
|
||||
page, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': oembed_obj['title'],
|
||||
'thumbnail': thumb_url,
|
||||
'description': self._html_search_meta('description', page),
|
||||
'creator': oembed_obj['author_name'],
|
||||
'duration': oembed_obj['duration'],
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
|
||||
class CamdemyFolderIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# links with trailing slash
|
||||
'url': 'http://www.camdemy.com/folder/450',
|
||||
'info_dict': {
|
||||
'id': '450',
|
||||
'title': '信號與系統 2012 & 2011 (Signals and Systems)',
|
||||
},
|
||||
'playlist_mincount': 145
|
||||
}, {
|
||||
# links without trailing slash
|
||||
# and multi-page
|
||||
'url': 'http://www.camdemy.com/folder/853',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
}, {
|
||||
# with displayMode parameter. For testing the codes to add parameters
|
||||
'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
|
||||
'info_dict': {
|
||||
'id': '853',
|
||||
'title': '科學計算 - 使用 Matlab'
|
||||
},
|
||||
'playlist_mincount': 20
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
folder_id = self._match_id(url)
|
||||
|
||||
# Add displayMode=list so that all links are displayed in a single page
|
||||
parsed_url = list(compat_urlparse.urlparse(url))
|
||||
query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
|
||||
query.update({'displayMode': 'list'})
|
||||
parsed_url[4] = compat_urllib_parse.urlencode(query)
|
||||
final_url = compat_urlparse.urlunparse(parsed_url)
|
||||
|
||||
page = self._download_webpage(final_url, folder_id)
|
||||
matches = re.findall(r"href='(/media/\d+/?)'", page)
|
||||
|
||||
entries = [self.url_result('http://www.camdemy.com' + media_path)
|
||||
for media_path in matches]
|
||||
|
||||
folder_title = self._html_search_meta('keywords', page)
|
||||
|
||||
return self.playlist_result(entries, folder_id, folder_title)
|
@@ -665,7 +665,7 @@ class InfoExtractor(object):
|
||||
return RATING_TABLE.get(rating.lower(), None)
|
||||
|
||||
def _family_friendly_search(self, html):
|
||||
# See http://schema.org/VideoObj
|
||||
# See http://schema.org/VideoObject
|
||||
family_friendly = self._html_search_meta('isFamilyFriendly', html)
|
||||
|
||||
if not family_friendly:
|
||||
@@ -729,6 +729,7 @@ class InfoExtractor(object):
|
||||
f.get('language_preference') if f.get('language_preference') is not None else -1,
|
||||
f.get('quality') if f.get('quality') is not None else -1,
|
||||
f.get('tbr') if f.get('tbr') is not None else -1,
|
||||
f.get('filesize') if f.get('filesize') is not None else -1,
|
||||
f.get('vbr') if f.get('vbr') is not None else -1,
|
||||
f.get('height') if f.get('height') is not None else -1,
|
||||
f.get('width') if f.get('width') is not None else -1,
|
||||
@@ -736,7 +737,6 @@ class InfoExtractor(object):
|
||||
f.get('abr') if f.get('abr') is not None else -1,
|
||||
audio_ext_preference,
|
||||
f.get('fps') if f.get('fps') is not None else -1,
|
||||
f.get('filesize') if f.get('filesize') is not None else -1,
|
||||
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
|
||||
f.get('source_preference') if f.get('source_preference') is not None else -1,
|
||||
f.get('format_id'),
|
||||
|
@@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor):
|
||||
'id': '1740434',
|
||||
'display_id': 'hot-perky-blonde-naked-golf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot Perky Blonde Naked Golf',
|
||||
'title': 'hot perky blonde naked golf',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
@@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor):
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)\s*-\s*Free', webpage, 'title')
|
||||
[r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'],
|
||||
webpage, 'title')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster="([^"]+)"',
|
||||
|
@@ -1,52 +1,71 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
IE_NAME = 'firsttv'
|
||||
IE_DESC = 'Видеоархив - Первый канал'
|
||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)'
|
||||
IE_NAME = '1tv'
|
||||
IE_DESC = 'Первый канал'
|
||||
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.1tv.ru/videoarchive/73390',
|
||||
'md5': '3de6390cf0cca4a5eae1d1d83895e5ad',
|
||||
'md5': '777f525feeec4806130f4f764bc18a4f',
|
||||
'info_dict': {
|
||||
'id': '73390',
|
||||
'ext': 'mp4',
|
||||
'title': 'Олимпийские канатные дороги',
|
||||
'description': 'md5:cc730d2bf4215463e37fff6a1e277b13',
|
||||
'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'duration': 149,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930',
|
||||
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||
'info_dict': {
|
||||
'id': '35930',
|
||||
'ext': 'mp4',
|
||||
'title': 'Наедине со всеми. Людмила Сенчина',
|
||||
'description': 'md5:89553aed1d641416001fe8d450f06cb9',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'duration': 2694,
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL')
|
||||
r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''',
|
||||
webpage, 'video URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title')
|
||||
[r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"], webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False)
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
||||
duration = self._og_search_property(
|
||||
'video:duration', webpage,
|
||||
'video duration', fatal=False)
|
||||
|
||||
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'like count', fatal=False)
|
||||
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'dislike count', fatal=False)
|
||||
like_count = self._html_search_regex(
|
||||
r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'like count', default=None)
|
||||
dislike_count = self._html_search_regex(
|
||||
r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||
webpage, 'dislike count', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -537,6 +537,15 @@ class GenericIE(InfoExtractor):
|
||||
'uploader_id': 'NationalArchives08',
|
||||
'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
|
||||
},
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'aanslagen-kopenhagen',
|
||||
'title': 'Aanslagen Kopenhagen | RTL Nieuws',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -782,6 +791,13 @@ class GenericIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"',
|
||||
webpage)
|
||||
if matches:
|
||||
return _playlist_from_matches(matches, ie='RtlNl')
|
||||
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
|
||||
@@ -789,7 +805,6 @@ class GenericIE(InfoExtractor):
|
||||
player_url = unescapeHTML(mobj.group('url'))
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl)
|
||||
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
|
||||
|
31
youtube_dl/extractor/history.py
Normal file
31
youtube_dl/extractor/history.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class HistoryIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
|
||||
'md5': '6fe632d033c92aa10b8d4a9be047a7c5',
|
||||
'info_dict': {
|
||||
'id': 'bLx5Dv5Aka1G',
|
||||
'ext': 'mp4',
|
||||
'title': "Bet You Didn't Know: Valentine's Day",
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
|
||||
webpage, 'video url')
|
||||
|
||||
return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}))
|
@@ -1,7 +1,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
|
||||
((video/.+?/(?P<id>\d+))|
|
||||
(feature/[^/]+/(?P<title>.+)))
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
(?:feature|nightly-news)/[^/]+/(?P<title>.+))
|
||||
'''
|
||||
|
||||
_TESTS = [
|
||||
@@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor):
|
||||
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||
'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d',
|
||||
'info_dict': {
|
||||
'id': 'sekXqyTVnmN3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor):
|
||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||
}
|
||||
else:
|
||||
# "feature" pages use theplatform.com
|
||||
# "feature" and "nightly-news" pages use theplatform.com
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
bootstrap_json = self._search_regex(
|
||||
r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
|
||||
flags=re.MULTILINE)
|
||||
bootstrap = json.loads(bootstrap_json)
|
||||
r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
|
||||
webpage, 'bootstrap json', flags=re.MULTILINE)
|
||||
bootstrap = self._parse_json(bootstrap_json, video_id)
|
||||
info = bootstrap['results'][0]['video']
|
||||
mpxid = info['mpxId']
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
parse_duration,
|
||||
@@ -22,7 +23,7 @@ class NPOBaseIE(SubtitlesInfoExtractor):
|
||||
|
||||
class NPOIE(NPOBaseIE):
|
||||
IE_NAME = 'npo.nl'
|
||||
_VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -185,7 +186,7 @@ class NPOIE(NPOBaseIE):
|
||||
|
||||
class NPOLiveIE(NPOBaseIE):
|
||||
IE_NAME = 'npo.nl:live'
|
||||
_VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.npo.nl/live/npo-1',
|
||||
@@ -260,6 +261,84 @@ class NPOLiveIE(NPOBaseIE):
|
||||
}
|
||||
|
||||
|
||||
class NPORadioIE(InfoExtractor):
|
||||
IE_NAME = 'npo.nl:radio'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.npo.nl/radio/radio-1',
|
||||
'info_dict': {
|
||||
'id': 'radio-1',
|
||||
'ext': 'mp3',
|
||||
'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _html_get_attribute_regex(attribute):
|
||||
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
self._html_get_attribute_regex('data-channel'), webpage, 'title')
|
||||
|
||||
stream = self._parse_json(
|
||||
self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'),
|
||||
video_id)
|
||||
|
||||
codec = stream.get('codec')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': stream['url'],
|
||||
'title': self._live_title(title),
|
||||
'acodec': codec,
|
||||
'ext': codec,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class NPORadioFragmentIE(InfoExtractor):
|
||||
IE_NAME = 'npo.nl:radio:fragment'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
|
||||
'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
|
||||
'info_dict': {
|
||||
'id': '174356',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jubileumconcert Willeke Alberti',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id,
|
||||
webpage, 'title')
|
||||
|
||||
audio_url = self._search_regex(
|
||||
r"data-streams='([^']+)'", webpage, 'audio url')
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
}
|
||||
|
||||
|
||||
class TegenlichtVproIE(NPOIE):
|
||||
IE_NAME = 'tegenlicht.vpro.nl'
|
||||
_VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?'
|
||||
|
@@ -1,16 +1,25 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class RtlXlIE(InfoExtractor):
|
||||
IE_NAME = 'rtlxl.nl'
|
||||
_VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
||||
class RtlNlIE(InfoExtractor):
|
||||
IE_NAME = 'rtl.nl'
|
||||
IE_DESC = 'rtl.nl and rtlxl.nl'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(www\.)?
|
||||
(?:
|
||||
rtlxl\.nl/\#!/[^/]+/|
|
||||
rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid=
|
||||
)
|
||||
(?P<id>[0-9a-f-]+)'''
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||
'md5': 'cc16baa36a6c169391f0764fa6b16654',
|
||||
'info_dict': {
|
||||
@@ -22,21 +31,30 @@ class RtlXlIE(InfoExtractor):
|
||||
'upload_date': '20140814',
|
||||
'duration': 576.880,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
|
||||
'md5': 'dea7474214af1271d91ef332fb8be7ea',
|
||||
'info_dict': {
|
||||
'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1424039400,
|
||||
'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag',
|
||||
'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$',
|
||||
'upload_date': '20150215',
|
||||
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uuid = mobj.group('uuid')
|
||||
|
||||
uuid = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||
uuid)
|
||||
|
||||
material = info['material'][0]
|
||||
episode_info = info['episodes'][0]
|
||||
|
||||
progname = info['abstracts'][0]['name']
|
||||
subtitle = material['title'] or info['episodes'][0]['name']
|
||||
description = material.get('synopsis') or info['episodes'][0]['synopsis']
|
||||
|
||||
# Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118)
|
||||
videopath = material['videopath'].replace('.f4m', '.m3u8')
|
||||
@@ -58,14 +76,29 @@ class RtlXlIE(InfoExtractor):
|
||||
'quality': 0,
|
||||
}
|
||||
])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
meta = info.get('meta', {})
|
||||
for p in ('poster_base_url', '"thumb_base_url"'):
|
||||
if not meta.get(p):
|
||||
continue
|
||||
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(meta[p] + uuid),
|
||||
'width': int_or_none(self._search_regex(
|
||||
r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'/sz=[0-9]+x([0-9]+)',
|
||||
meta[p], 'thumbnail height', fatal=False))
|
||||
})
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'title': '%s - %s' % (progname, subtitle),
|
||||
'formats': formats,
|
||||
'timestamp': material['original_date'],
|
||||
'description': episode_info['synopsis'],
|
||||
'description': description,
|
||||
'duration': parse_duration(material.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
@@ -1,14 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
def _get_api_key(api_path):
|
||||
if api_path.endswith('?'):
|
||||
api_path = api_path[:-1]
|
||||
|
||||
api_key = 'fb5f58a820353bd7095de526253c14fd'
|
||||
a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
|
||||
return hashlib.md5(a.encode('ascii')).hexdigest()
|
||||
|
||||
|
||||
class StreamCZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
|
||||
_API_URL = 'http://www.stream.cz/API'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||
@@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://www.stream.cz/API/episode/%s' % video_id, video_id)
|
||||
api_path = '/episode/%s' % video_id
|
||||
|
||||
req = compat_urllib_request.Request(self._API_URL + api_path)
|
||||
req.add_header('Api-Password', _get_api_key(api_path))
|
||||
data = self._download_json(req, video_id)
|
||||
|
||||
formats = []
|
||||
for quality, video in enumerate(data['video_qualities']):
|
||||
|
@@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
quality = qualities(['mp4', 'flv'])
|
||||
for video_url in re.findall(r'<source src="([^"]+)"', webpage):
|
||||
for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
|
||||
video_ext = determine_ext(video_url)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@@ -2,6 +2,11 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
import hmac
|
||||
import binascii
|
||||
import hashlib
|
||||
|
||||
|
||||
from .subtitles import SubtitlesInfoExtractor
|
||||
from ..compat import (
|
||||
@@ -11,6 +16,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
xpath_with_ns,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
|
||||
@@ -18,7 +24,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language
|
||||
|
||||
class ThePlatformIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
|
||||
|theplatform:)(?P<id>[^/\?&]+)'''
|
||||
|
||||
@@ -38,9 +44,33 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
|
||||
flags = '10' if include_qs else '00'
|
||||
expiration_date = '%x' % (int(time.time()) + life)
|
||||
|
||||
def str_to_hex(str):
|
||||
return binascii.b2a_hex(str.encode('ascii')).decode('ascii')
|
||||
|
||||
def hex_to_str(hex):
|
||||
return binascii.a2b_hex(hex)
|
||||
|
||||
relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0]
|
||||
clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path))
|
||||
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
|
||||
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
|
||||
return '%s&sig=%s' % (url, sig)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
provider_id = mobj.group('provider_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if not provider_id:
|
||||
provider_id = 'dJ5BDC'
|
||||
|
||||
if mobj.group('config'):
|
||||
config_url = url + '&form=json'
|
||||
config_url = config_url.replace('swf/', 'config/')
|
||||
@@ -48,8 +78,12 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
||||
config = self._download_json(config_url, video_id, 'Downloading config')
|
||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||
else:
|
||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(video_id))
|
||||
smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?'
|
||||
'format=smil&mbr=true'.format(provider_id, video_id))
|
||||
|
||||
sig = smuggled_data.get('sig')
|
||||
if sig:
|
||||
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
|
||||
|
||||
meta = self._download_xml(smil_url, video_id)
|
||||
try:
|
||||
@@ -62,7 +96,7 @@ class ThePlatformIE(SubtitlesInfoExtractor):
|
||||
else:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
|
||||
info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id)
|
||||
info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id)
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)
|
||||
|
||||
|
81
youtube_dl/extractor/yam.py
Normal file
81
youtube_dl/extractor/yam.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
month_by_abbreviation,
|
||||
)
|
||||
|
||||
|
||||
class YamIE(InfoExtractor):
|
||||
_VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
# An audio hosted on Yam
|
||||
'url': 'http://mymedia.yam.com/m/2283921',
|
||||
'md5': 'c011b8e262a52d5473d9c2e3c9963b9c',
|
||||
'info_dict': {
|
||||
'id': '2283921',
|
||||
'ext': 'mp3',
|
||||
'title': '發現 - 趙薇 京華煙雲主題曲',
|
||||
'uploader_id': 'princekt',
|
||||
'upload_date': '20080807',
|
||||
'duration': 313.0,
|
||||
}
|
||||
}, {
|
||||
# An external video hosted on YouTube
|
||||
'url': 'http://mymedia.yam.com/m/3598173',
|
||||
'md5': '0238ceec479c654e8c2f1223755bf3e9',
|
||||
'info_dict': {
|
||||
'id': 'pJ2Deys283c',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20150202',
|
||||
'uploader': '新莊社大瑜伽社',
|
||||
'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
|
||||
'uploader_id': '2323agoy',
|
||||
'title': '外婆的澎湖灣KTV-潘安邦',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
# Is it hosted externally on YouTube?
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<embed src="(http://www.youtube.com/[^"]+)"',
|
||||
page, 'YouTube url', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
api_page = self._download_webpage(
|
||||
'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id,
|
||||
note='Downloading API page')
|
||||
api_result_obj = compat_urlparse.parse_qs(api_page)
|
||||
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"',
|
||||
page, 'uploader id', fatal=False)
|
||||
mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2}) ' +
|
||||
r'(?P<day>\d{1,2}), (?P<year>\d{4})', page)
|
||||
if mobj:
|
||||
upload_date = '%s%02d%02d' % (
|
||||
mobj.group('year'),
|
||||
month_by_abbreviation(mobj.group('mon')),
|
||||
int(mobj.group('day')))
|
||||
else:
|
||||
upload_date = None
|
||||
duration = float_or_none(api_result_obj['totaltime'][0], scale=1000)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': api_result_obj['mp3file'][0],
|
||||
'title': self._html_search_meta('description', page),
|
||||
'duration': duration,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
}
|
@@ -424,6 +424,10 @@ def parseOpts(overrideArguments=None):
|
||||
'--xattr-set-filesize',
|
||||
dest='xattr_set_filesize', action='store_true',
|
||||
help='(experimental) set file xattribute ytdl.filesize with expected filesize')
|
||||
downloader.add_option(
|
||||
'--hls-prefer-native',
|
||||
dest='hls_prefer_native', action='store_true',
|
||||
help='(experimental) Use the native HLS downloader instead of ffmpeg.')
|
||||
downloader.add_option(
|
||||
'--external-downloader',
|
||||
dest='external_downloader', metavar='COMMAND',
|
||||
@@ -735,6 +739,10 @@ def parseOpts(overrideArguments=None):
|
||||
'--prefer-ffmpeg',
|
||||
action='store_true', dest='prefer_ffmpeg',
|
||||
help='Prefer ffmpeg over avconv for running the postprocessors')
|
||||
postproc.add_option(
|
||||
'--ffmpeg-location', '--avconv-location', metavar='PATH',
|
||||
dest='ffmpeg_location',
|
||||
help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
|
||||
postproc.add_option(
|
||||
'--exec',
|
||||
metavar='CMD', dest='exec_cmd',
|
||||
|
@@ -30,54 +30,95 @@ class FFmpegPostProcessorError(PostProcessingError):
|
||||
class FFmpegPostProcessor(PostProcessor):
|
||||
def __init__(self, downloader=None, deletetempfiles=False):
|
||||
PostProcessor.__init__(self, downloader)
|
||||
self._versions = self.get_versions()
|
||||
self._deletetempfiles = deletetempfiles
|
||||
self._determine_executables()
|
||||
|
||||
def check_version(self):
|
||||
if not self._executable:
|
||||
if not self.available:
|
||||
raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
|
||||
|
||||
required_version = '10-0' if self._uses_avconv() else '1.0'
|
||||
required_version = '10-0' if self.basename == 'avconv' else '1.0'
|
||||
if is_outdated_version(
|
||||
self._versions[self._executable], required_version):
|
||||
self._versions[self.basename], required_version):
|
||||
warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
|
||||
self._executable, self._executable, required_version)
|
||||
self.basename, self.basename, required_version)
|
||||
if self._downloader:
|
||||
self._downloader.report_warning(warning)
|
||||
|
||||
@staticmethod
|
||||
def get_versions():
|
||||
def get_versions(downloader=None):
|
||||
return FFmpegPostProcessor(downloader)._versions
|
||||
|
||||
def _determine_executables(self):
|
||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
||||
return dict((p, get_exe_version(p, args=['-version'])) for p in programs)
|
||||
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
|
||||
|
||||
@property
|
||||
def available(self):
|
||||
return self._executable is not None
|
||||
self.basename = None
|
||||
self.probe_basename = None
|
||||
|
||||
@property
|
||||
def _executable(self):
|
||||
if self._downloader.params.get('prefer_ffmpeg', False):
|
||||
self._paths = None
|
||||
self._versions = None
|
||||
if self._downloader:
|
||||
location = self._downloader.params.get('ffmpeg_location')
|
||||
if location is not None:
|
||||
if not os.path.exists(location):
|
||||
self._downloader.report_warning(
|
||||
'ffmpeg-location %s does not exist! '
|
||||
'Continuing without avconv/ffmpeg.' % (location))
|
||||
self._versions = {}
|
||||
return
|
||||
elif not os.path.isdir(location):
|
||||
basename = os.path.splitext(os.path.basename(location))[0]
|
||||
if basename not in programs:
|
||||
self._downloader.report_warning(
|
||||
'Cannot identify executable %s, its basename should be one of %s. '
|
||||
'Continuing without avconv/ffmpeg.' %
|
||||
(location, ', '.join(programs)))
|
||||
self._versions = {}
|
||||
return None
|
||||
location = os.path.dirname(os.path.abspath(location))
|
||||
if basename in ('ffmpeg', 'ffprobe'):
|
||||
prefer_ffmpeg = True
|
||||
|
||||
self._paths = dict(
|
||||
(p, os.path.join(location, p)) for p in programs)
|
||||
self._versions = dict(
|
||||
(p, get_exe_version(self._paths[p], args=['-version']))
|
||||
for p in programs)
|
||||
if self._versions is None:
|
||||
self._versions = dict(
|
||||
(p, get_exe_version(p, args=['-version'])) for p in programs)
|
||||
self._paths = dict((p, p) for p in programs)
|
||||
|
||||
if prefer_ffmpeg:
|
||||
prefs = ('ffmpeg', 'avconv')
|
||||
else:
|
||||
prefs = ('avconv', 'ffmpeg')
|
||||
for p in prefs:
|
||||
if self._versions[p]:
|
||||
return p
|
||||
return None
|
||||
self.basename = p
|
||||
break
|
||||
|
||||
@property
|
||||
def _probe_executable(self):
|
||||
if self._downloader.params.get('prefer_ffmpeg', False):
|
||||
if prefer_ffmpeg:
|
||||
prefs = ('ffprobe', 'avprobe')
|
||||
else:
|
||||
prefs = ('avprobe', 'ffprobe')
|
||||
for p in prefs:
|
||||
if self._versions[p]:
|
||||
return p
|
||||
return None
|
||||
self.probe_basename = p
|
||||
break
|
||||
|
||||
def _uses_avconv(self):
|
||||
return self._executable == 'avconv'
|
||||
@property
|
||||
def available(self):
|
||||
return self.basename is not None
|
||||
|
||||
@property
|
||||
def executable(self):
|
||||
return self._paths[self.basename]
|
||||
|
||||
@property
|
||||
def probe_executable(self):
|
||||
return self._paths[self.probe_basename]
|
||||
|
||||
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
|
||||
self.check_version()
|
||||
@@ -88,14 +129,14 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
files_cmd = []
|
||||
for path in input_paths:
|
||||
files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
|
||||
cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] +
|
||||
cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
|
||||
files_cmd +
|
||||
[encodeArgument(o) for o in opts] +
|
||||
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
|
||||
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
stdout, stderr = p.communicate()
|
||||
if p.returncode != 0:
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
@@ -127,14 +168,16 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
|
||||
def get_audio_codec(self, path):
|
||||
|
||||
if not self._probe_executable:
|
||||
if not self.probe_executable:
|
||||
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
|
||||
try:
|
||||
cmd = [
|
||||
encodeFilename(self._probe_executable, True),
|
||||
encodeFilename(self.probe_executable, True),
|
||||
encodeArgument('-show_streams'),
|
||||
encodeFilename(self._ffmpeg_filename_argument(path), True)]
|
||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
|
||||
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
output = handle.communicate()[0]
|
||||
if handle.wait() != 0:
|
||||
return None
|
||||
@@ -223,14 +266,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
|
||||
self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path)
|
||||
else:
|
||||
self._downloader.to_screen('[' + self._executable + '] Destination: ' + new_path)
|
||||
self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
|
||||
self.run_ffmpeg(path, new_path, acodec, more_opts)
|
||||
except:
|
||||
etype, e, tb = sys.exc_info()
|
||||
if isinstance(e, AudioConversionError):
|
||||
msg = 'audio conversion failed: ' + e.msg
|
||||
else:
|
||||
msg = 'error running ' + self._executable
|
||||
msg = 'error running ' + self.basename
|
||||
raise PostProcessingError(msg)
|
||||
|
||||
# Try to update the date time for extracted audio file.
|
||||
|
@@ -62,6 +62,11 @@ std_headers = {
|
||||
}
|
||||
|
||||
|
||||
ENGLISH_MONTH_NAMES = [
|
||||
'January', 'February', 'March', 'April', 'May', 'June',
|
||||
'July', 'August', 'September', 'October', 'November', 'December']
|
||||
|
||||
|
||||
def preferredencoding():
|
||||
"""Get preferred encoding.
|
||||
|
||||
@@ -666,26 +671,27 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
||||
req, **kwargs)
|
||||
|
||||
|
||||
def parse_iso8601(date_str, delimiter='T'):
|
||||
def parse_iso8601(date_str, delimiter='T', timezone=None):
|
||||
""" Return a UNIX timestamp from the given date """
|
||||
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
m = re.search(
|
||||
r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
else:
|
||||
date_str = date_str[:-len(m.group(0))]
|
||||
if not m.group('sign'):
|
||||
if timezone is None:
|
||||
m = re.search(
|
||||
r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
else:
|
||||
sign = 1 if m.group('sign') == '+' else -1
|
||||
timezone = datetime.timedelta(
|
||||
hours=sign * int(m.group('hours')),
|
||||
minutes=sign * int(m.group('minutes')))
|
||||
date_str = date_str[:-len(m.group(0))]
|
||||
if not m.group('sign'):
|
||||
timezone = datetime.timedelta()
|
||||
else:
|
||||
sign = 1 if m.group('sign') == '+' else -1
|
||||
timezone = datetime.timedelta(
|
||||
hours=sign * int(m.group('hours')),
|
||||
minutes=sign * int(m.group('minutes')))
|
||||
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
|
||||
dt = datetime.datetime.strptime(date_str, date_format) - timezone
|
||||
return calendar.timegm(dt.timetuple())
|
||||
@@ -1184,11 +1190,18 @@ def get_term_width():
|
||||
def month_by_name(name):
|
||||
""" Return the number of a month by (locale-independently) English name """
|
||||
|
||||
ENGLISH_NAMES = [
|
||||
'January', 'February', 'March', 'April', 'May', 'June',
|
||||
'July', 'August', 'September', 'October', 'November', 'December']
|
||||
try:
|
||||
return ENGLISH_NAMES.index(name) + 1
|
||||
return ENGLISH_MONTH_NAMES.index(name) + 1
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def month_by_abbreviation(abbrev):
|
||||
""" Return the number of a month by (locale-independently) English
|
||||
abbreviations """
|
||||
|
||||
try:
|
||||
return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2015.02.11'
|
||||
__version__ = '2015.02.17'
|
||||
|
Reference in New Issue
Block a user