Compare commits
38 Commits
2014.02.20
...
2014.02.24
Author | SHA1 | Date | |
---|---|---|---|
0bf5cf9886 | |||
919052d094 | |||
a2dafe2887 | |||
92661c994b | |||
ffe8fe356a | |||
bc2f773b4f | |||
f919201ecc | |||
7ff5d5c2e2 | |||
9b77f951c7 | |||
a25f2f990a | |||
78b373975d | |||
2fcc873c4c | |||
23c2baadb3 | |||
521ee82334 | |||
1df96e59ce | |||
3e123c1e28 | |||
f38da66731 | |||
06aabfc422 | |||
1052d2bfec | |||
5e0b652344 | |||
0f8f097183 | |||
491ed3dda2 | |||
af284c6d1b | |||
41d3ec5fba | |||
0568c352f3 | |||
2e7b4cb714 | |||
9767726b66 | |||
9ddfd84e41 | |||
1cf563d84b | |||
7928024f57 | |||
3eb38acb43 | |||
f7300c5c90 | |||
3489b7d26c | |||
acd2bcc384 | |||
43e77ca455 | |||
da36297988 | |||
dbb94fb044 | |||
d68f0cdb23 |
@ -14,9 +14,9 @@
|
|||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
skip_tests=false
|
skip_tests=true
|
||||||
if [ "$1" = '--skip-test' ]; then
|
if [ "$1" = '--run-tests' ]; then
|
||||||
skip_tests=true
|
skip_tests=false
|
||||||
shift
|
shift
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ from test.helper import (
|
|||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
@ -137,12 +138,21 @@ def generator(test_case):
|
|||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with io.open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
for (info_field, expected) in tc.get('info_dict', {}).items():
|
for (info_field, expected) in tc.get('info_dict', {}).items():
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('re:'):
|
||||||
got = 'md5:' + md5(info_dict.get(info_field))
|
|
||||||
else:
|
|
||||||
got = info_dict.get(info_field)
|
got = info_dict.get(info_field)
|
||||||
self.assertEqual(expected, got,
|
match_str = expected[len('re:'):]
|
||||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
match_rex = re.compile(match_str)
|
||||||
|
|
||||||
|
self.assertTrue(
|
||||||
|
isinstance(got, compat_str) and match_rex.match(got),
|
||||||
|
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
|
else:
|
||||||
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
|
got = 'md5:' + md5(info_dict.get(info_field))
|
||||||
|
else:
|
||||||
|
got = info_dict.get(info_field)
|
||||||
|
self.assertEqual(expected, got,
|
||||||
|
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||||
|
|
||||||
# If checkable fields are missing from the test case, print the info_dict
|
# If checkable fields are missing from the test case, print the info_dict
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
|
@ -46,6 +46,7 @@ __authors__ = (
|
|||||||
'Andreas Schmitz',
|
'Andreas Schmitz',
|
||||||
'Michael Kaiser',
|
'Michael Kaiser',
|
||||||
'Niklas Laxström',
|
'Niklas Laxström',
|
||||||
|
'David Triendl',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
@ -12,7 +12,6 @@ from .http import HttpFD
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_pack,
|
struct_pack,
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
compat_urllib_request,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
|
|||||||
self.read_unsigned_char()
|
self.read_unsigned_char()
|
||||||
# flags
|
# flags
|
||||||
self.read(3)
|
self.read(3)
|
||||||
# BootstrapinfoVersion
|
|
||||||
bootstrap_info_version = self.read_unsigned_int()
|
self.read_unsigned_int() # BootstrapinfoVersion
|
||||||
# Profile,Live,Update,Reserved
|
# Profile,Live,Update,Reserved
|
||||||
self.read(1)
|
self.read(1)
|
||||||
# time scale
|
# time scale
|
||||||
@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
|
|||||||
self.read_unsigned_long_long()
|
self.read_unsigned_long_long()
|
||||||
# SmpteTimeCodeOffset
|
# SmpteTimeCodeOffset
|
||||||
self.read_unsigned_long_long()
|
self.read_unsigned_long_long()
|
||||||
# MovieIdentifier
|
|
||||||
movie_identifier = self.read_string()
|
self.read_string() # MovieIdentifier
|
||||||
server_count = self.read_unsigned_char()
|
server_count = self.read_unsigned_char()
|
||||||
# ServerEntryTable
|
# ServerEntryTable
|
||||||
for i in range(server_count):
|
for i in range(server_count):
|
||||||
self.read_string()
|
self.read_string()
|
||||||
quality_count = self.read_unsigned_char()
|
quality_count = self.read_unsigned_char()
|
||||||
# QualityEntryTable
|
# QualityEntryTable
|
||||||
for i in range(server_count):
|
for i in range(quality_count):
|
||||||
self.read_string()
|
self.read_string()
|
||||||
# DrmData
|
# DrmData
|
||||||
self.read_string()
|
self.read_string()
|
||||||
|
@ -19,6 +19,7 @@ from .bbccouk import BBCCoUkIE
|
|||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bliptv import BlipTVIE, BlipTVUserIE
|
from .bliptv import BlipTVIE, BlipTVUserIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
|
from .br import BRIE
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
@ -186,6 +187,7 @@ from .rutube import (
|
|||||||
RutubeMovieIE,
|
RutubeMovieIE,
|
||||||
RutubePersonIE,
|
RutubePersonIE,
|
||||||
)
|
)
|
||||||
|
from .savefrom import SaveFromIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .slashdot import SlashdotIE
|
from .slashdot import SlashdotIE
|
||||||
@ -224,6 +226,7 @@ from .tinypic import TinyPicIE
|
|||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
from .traileraddict import TrailerAddictIE
|
from .traileraddict import TrailerAddictIE
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
|
from .trutube import TruTubeIE
|
||||||
from .tube8 import Tube8IE
|
from .tube8 import Tube8IE
|
||||||
from .tudou import TudouIE
|
from .tudou import TudouIE
|
||||||
from .tumblr import TumblrIE
|
from .tumblr import TumblrIE
|
||||||
@ -238,6 +241,7 @@ from .vesti import VestiIE
|
|||||||
from .vevo import VevoIE
|
from .vevo import VevoIE
|
||||||
from .vice import ViceIE
|
from .vice import ViceIE
|
||||||
from .viddler import ViddlerIE
|
from .viddler import ViddlerIE
|
||||||
|
from .videobam import VideoBamIE
|
||||||
from .videodetective import VideoDetectiveIE
|
from .videodetective import VideoDetectiveIE
|
||||||
from .videofyme import VideofyMeIE
|
from .videofyme import VideofyMeIE
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
|
80
youtube_dl/extractor/br.py
Normal file
80
youtube_dl/extractor/br.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class BRIE(InfoExtractor):
|
||||||
|
IE_DESC = "Bayerischer Rundfunk Mediathek"
|
||||||
|
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
|
||||||
|
_BASE_URL = "http://www.br.de"
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
|
||||||
|
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
|
||||||
|
"info_dict": {
|
||||||
|
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
|
||||||
|
"ext": "mp4",
|
||||||
|
"title": "Feiern und Verzichten",
|
||||||
|
"description": "Anselm Grün: Feiern und Verzichten",
|
||||||
|
"uploader": "BR/Birgit Baier",
|
||||||
|
"upload_date": "20140301"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
page = self._download_webpage(url, display_id)
|
||||||
|
xml_url = self._search_regex(
|
||||||
|
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
|
||||||
|
xml = self._download_xml(self._BASE_URL + xml_url, None)
|
||||||
|
|
||||||
|
videos = [{
|
||||||
|
"id": xml_video.get("externalId"),
|
||||||
|
"title": xml_video.find("title").text,
|
||||||
|
"formats": self._extract_formats(xml_video.find("assets")),
|
||||||
|
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
|
||||||
|
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
|
||||||
|
"uploader": xml_video.find("author").text,
|
||||||
|
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
|
||||||
|
"webpage_url": xml_video.find("permalink").text,
|
||||||
|
} for xml_video in xml.findall("video")]
|
||||||
|
|
||||||
|
if len(videos) > 1:
|
||||||
|
self._downloader.report_warning(
|
||||||
|
'found multiple videos; please '
|
||||||
|
'report this with the video URL to http://yt-dl.org/bug')
|
||||||
|
if not videos:
|
||||||
|
raise ExtractorError('No video entries found')
|
||||||
|
return videos[0]
|
||||||
|
|
||||||
|
def _extract_formats(self, assets):
|
||||||
|
formats = [{
|
||||||
|
"url": asset.find("downloadUrl").text,
|
||||||
|
"ext": asset.find("mediaType").text,
|
||||||
|
"format_id": asset.get("type"),
|
||||||
|
"width": int(asset.find("frameWidth").text),
|
||||||
|
"height": int(asset.find("frameHeight").text),
|
||||||
|
"tbr": int(asset.find("bitrateVideo").text),
|
||||||
|
"abr": int(asset.find("bitrateAudio").text),
|
||||||
|
"vcodec": asset.find("codecVideo").text,
|
||||||
|
"container": asset.find("mediaType").text,
|
||||||
|
"filesize": int(asset.find("size").text),
|
||||||
|
} for asset in assets.findall("asset")
|
||||||
|
if asset.find("downloadUrl") is not None]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_thumbnails(self, variants):
|
||||||
|
thumbnails = [{
|
||||||
|
"url": self._BASE_URL + variant.find("url").text,
|
||||||
|
"width": int(variant.find("width").text),
|
||||||
|
"height": int(variant.find("height").text),
|
||||||
|
} for variant in variants.findall("variant")]
|
||||||
|
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
|
||||||
|
return thumbnails
|
@ -23,8 +23,8 @@ class BreakIE(InfoExtractor):
|
|||||||
video_id = mobj.group(1).split("-")[-1]
|
video_id = mobj.group(1).split("-")[-1]
|
||||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
|
||||||
'info json', flags=re.DOTALL)
|
webpage, 'info json', flags=re.DOTALL)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
video_url = info['videoUri']
|
video_url = info['videoUri']
|
||||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||||
u'file': u'12163.mp4',
|
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||||
u'md5': u'060158428b650f896c542dfbb3d6487f',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': '12163',
|
||||||
u'title': u'Terrasses du Numérique'
|
'ext': 'mp4',
|
||||||
|
'title': 'Terrasses du Numérique'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
|
|||||||
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'class="evenement8">(.*?)</a>', webpage, u'title')
|
r'class="evenement8">(.*?)</a>', webpage, 'title')
|
||||||
|
|
||||||
return {'id': video_id,
|
return {
|
||||||
'ext': 'mp4',
|
'id': video_id,
|
||||||
'url': video_url,
|
'ext': 'mp4',
|
||||||
'title': title,
|
'url': video_url,
|
||||||
}
|
'title': title,
|
||||||
|
}
|
||||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
|||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
compat_xml_parse_error,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
@ -241,10 +242,10 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Is it an RSS feed?
|
# Is it an RSS feed?
|
||||||
try:
|
try:
|
||||||
doc = xml.etree.ElementTree.fromstring(webpage)
|
doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
return self._extract_rss(url, video_id, doc)
|
return self._extract_rss(url, video_id, doc)
|
||||||
except xml.etree.ElementTree.ParseError:
|
except compat_xml_parse_error:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
# it's tempting to parse this further, but you would
|
||||||
|
@ -1,61 +1,51 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NormalbootsIE(InfoExtractor):
|
class NormalbootsIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
|
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
|
||||||
u'file': u'home-alone-games-jontron.mp4',
|
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
|
||||||
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'home-alone-games-jontron',
|
||||||
u'title': u'Home Alone Games - JonTron - NormalBoots',
|
'ext': 'mp4',
|
||||||
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
|
'title': 'Home Alone Games - JonTron - NormalBoots',
|
||||||
u'uploader': u'JonTron',
|
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
|
||||||
u'upload_date': u'20140125',
|
'uploader': 'JonTron',
|
||||||
|
'upload_date': '20140125',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
if mobj is None:
|
|
||||||
raise ExtractorError(u'Invalid URL: %s' % url)
|
|
||||||
video_id = mobj.group('videoid')
|
video_id = mobj.group('videoid')
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'uploader': None,
|
|
||||||
'upload_date': None,
|
|
||||||
}
|
|
||||||
|
|
||||||
if url[:4] != 'http':
|
|
||||||
url = 'http://' + url
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_title = self._og_search_title(webpage)
|
|
||||||
video_description = self._og_search_description(webpage)
|
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
webpage, 'uploader')
|
webpage, 'uploader')
|
||||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
webpage, 'date')
|
webpage, 'date')
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
|
||||||
|
|
||||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||||
player_page = self._download_webpage(player_url, video_id)
|
player_page = self._download_webpage(player_url, video_id)
|
||||||
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
|
video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
|
||||||
|
|
||||||
info['url'] = video_url
|
return {
|
||||||
info['title'] = video_title
|
'id': video_id,
|
||||||
info['description'] = video_description
|
'url': video_url,
|
||||||
info['thumbnail'] = video_thumbnail
|
'title': self._og_search_title(webpage),
|
||||||
info['uploader'] = video_uploader
|
'description': self._og_search_description(webpage),
|
||||||
info['upload_date'] = video_upload_date
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'uploader': video_uploader,
|
||||||
return info
|
'upload_date': video_upload_date,
|
||||||
|
}
|
||||||
|
37
youtube_dl/extractor/savefrom.py
Normal file
37
youtube_dl/extractor/savefrom.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class SaveFromIE(InfoExtractor):
|
||||||
|
IE_NAME = 'savefrom.net'
|
||||||
|
_VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UlVRAPW2WJY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'About Team Radical MMA | MMA Fighting',
|
||||||
|
'upload_date': '20120816',
|
||||||
|
'uploader': 'Howcast',
|
||||||
|
'uploader_id': 'Howcast',
|
||||||
|
'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||||
|
return {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
}
|
@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -8,23 +7,27 @@ from ..utils import (
|
|||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
unified_strdate,
|
||||||
|
str_to_int,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import aes_decrypt_text
|
||||||
aes_decrypt_text
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SpankwireIE(InfoExtractor):
|
class SpankwireIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
|
||||||
'file': '103545.mp4',
|
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
|
||||||
'md5': '1b3f55e345500552dbc252a3e9c1af43',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"uploader": "oreusz",
|
'id': '103545',
|
||||||
"title": "Buckcherry`s X Rated Music Video Crazy Bitch",
|
'ext': 'mp4',
|
||||||
"description": "Crazy Bitch X rated music video.",
|
'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
|
||||||
"age_limit": 18,
|
'description': 'Crazy Bitch X rated music video.',
|
||||||
|
'uploader': 'oreusz',
|
||||||
|
'uploader_id': '124697',
|
||||||
|
'upload_date': '20070508',
|
||||||
|
'age_limit': 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
|
|||||||
req.add_header('Cookie', 'age_verified=1')
|
req.add_header('Cookie', 'age_verified=1')
|
||||||
webpage = self._download_webpage(req, video_id)
|
webpage = self._download_webpage(req, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
||||||
video_uploader = self._html_search_regex(
|
|
||||||
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
|
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
|
||||||
|
uploader_id = self._html_search_regex(
|
||||||
|
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
|
||||||
|
upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
|
||||||
|
if upload_date:
|
||||||
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
|
view_count = self._html_search_regex(
|
||||||
|
r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
|
||||||
|
if view_count:
|
||||||
|
view_count = str_to_int(view_count)
|
||||||
|
comment_count = int_or_none(self._html_search_regex(
|
||||||
|
r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
|
||||||
|
|
||||||
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
|
||||||
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
if webpage.find('flashvars\.encrypted = "true"') != -1:
|
||||||
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for video_url in video_urls:
|
for video_url in video_urls:
|
||||||
path = compat_urllib_parse_urlparse(video_url).path
|
path = compat_urllib_parse_urlparse(video_url).path
|
||||||
extension = os.path.splitext(path)[1][1:]
|
|
||||||
format = path.split('/')[4].split('_')[:2]
|
format = path.split('/')[4].split('_')[:2]
|
||||||
resolution, bitrate_str = format
|
resolution, bitrate_str = format
|
||||||
format = "-".join(format)
|
format = "-".join(format)
|
||||||
height = int(resolution.rstrip('P'))
|
height = int(resolution.rstrip('Pp'))
|
||||||
tbr = int(bitrate_str.rstrip('K'))
|
tbr = int(bitrate_str.rstrip('Kk'))
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': extension,
|
|
||||||
'resolution': resolution,
|
'resolution': resolution,
|
||||||
'format': format,
|
'format': format,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'uploader': video_uploader,
|
'title': title,
|
||||||
'title': video_title,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'view_count': view_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
}
|
}
|
||||||
|
44
youtube_dl/extractor/trutube.py
Normal file
44
youtube_dl/extractor/trutube.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class TruTubeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
|
||||||
|
'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '14880',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
|
||||||
|
'thumbnail': 're:^http:.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_title = self._og_search_title(webpage).strip()
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
all_formats = re.finditer(
|
||||||
|
r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
|
||||||
|
formats = [{
|
||||||
|
'format_id': m.group('key'),
|
||||||
|
'quality': -i,
|
||||||
|
'url': m.group('url'),
|
||||||
|
} for i, m in enumerate(all_formats)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
80
youtube_dl/extractor/videobam.py
Normal file
80
youtube_dl/extractor/videobam.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class VideoBamIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
|
||||||
|
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://videobam.com/OiJQM',
|
||||||
|
'md5': 'db471f27763a531f10416a0c58b5a1e0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OiJQM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Is Alcohol Worse Than Ecstasy?',
|
||||||
|
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
|
||||||
|
'uploader': 'frihetsvinge',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://videobam.com/pqLvq',
|
||||||
|
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
|
||||||
|
'note': 'HD video',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pqLvq',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
for preference, format_id in enumerate(['low', 'high']):
|
||||||
|
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': format_id,
|
||||||
|
'preference': preference,
|
||||||
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
|
||||||
|
formats = [{
|
||||||
|
'url': item['url'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
} for item in player_config['playlist'] if 'autoPlay' in item]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(page, default='VideoBam', fatal=False)
|
||||||
|
description = self._og_search_description(page, default=None)
|
||||||
|
thumbnail = self._og_search_thumbnail(page)
|
||||||
|
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
|
||||||
|
view_count = int_or_none(
|
||||||
|
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'view_count': view_count,
|
||||||
|
'formats': formats,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@ -1,8 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unified_strdate
|
||||||
|
|
||||||
|
|
||||||
class VineIE(InfoExtractor):
|
class VineIE(InfoExtractor):
|
||||||
@ -13,31 +15,46 @@ class VineIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b9KOOWX7HUx',
|
'id': 'b9KOOWX7HUx',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'Jack Dorsey',
|
|
||||||
'title': 'Chicken.',
|
'title': 'Chicken.',
|
||||||
|
'description': 'Chicken.',
|
||||||
|
'upload_date': '20130519',
|
||||||
|
'uploader': 'Jack Dorsey',
|
||||||
|
'uploader_id': '76',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage_url = 'https://vine.co/v/' + video_id
|
|
||||||
webpage = self._download_webpage(webpage_url, video_id)
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
|
||||||
|
|
||||||
video_url = self._html_search_meta('twitter:player:stream', webpage,
|
data = json.loads(self._html_search_regex(
|
||||||
'video URL')
|
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
|
formats = [
|
||||||
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
{
|
||||||
|
'url': data['videoLowURL'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'low',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': data['videoUrl'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'standard',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'description': data['description'],
|
||||||
'uploader': uploader,
|
'thumbnail': data['thumbnailUrl'],
|
||||||
}
|
'upload_date': unified_strdate(data['created']),
|
||||||
|
'uploader': data['username'],
|
||||||
|
'uploader_id': data['userIdStr'],
|
||||||
|
'like_count': data['likes']['count'],
|
||||||
|
'comment_count': data['comments']['count'],
|
||||||
|
'repost_count': data['reposts']['count'],
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -6,14 +6,15 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class WimpIE(InfoExtractor):
|
class WimpIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
|
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.wimp.com/deerfence/',
|
'url': 'http://www.wimp.com/maruexhausted/',
|
||||||
'file': 'deerfence.flv',
|
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
|
||||||
'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
"title": "Watch Till End: Herd of deer jump over a fence.",
|
'id': 'maruexhausted',
|
||||||
"description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
|
'ext': 'flv',
|
||||||
|
'title': 'Maru is exhausted.',
|
||||||
|
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,4 +31,4 @@ class WimpIE(InfoExtractor):
|
|||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
}
|
}
|
@ -22,8 +22,8 @@ class WorldStarHipHopIE(InfoExtractor):
|
|||||||
webpage_src = self._download_webpage(url, video_id)
|
webpage_src = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
m_vevo_id = re.search(r'videoId=(.*?)&?',
|
||||||
webpage_src)
|
webpage_src)
|
||||||
|
|
||||||
if m_vevo_id is not None:
|
if m_vevo_id is not None:
|
||||||
self.to_screen(u'Vevo video detected:')
|
self.to_screen(u'Vevo video detected:')
|
||||||
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
|
||||||
|
@ -29,7 +29,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
PagedList,
|
PagedList,
|
||||||
RegexNotFoundError,
|
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
@ -297,6 +296,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
u"format": "141",
|
u"format": "141",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# DASH manifest with encrypted signature
|
||||||
|
{
|
||||||
|
u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
|
||||||
|
u'info_dict': {
|
||||||
|
u'id': u'IB3lcPjvWLA',
|
||||||
|
u'ext': u'm4a',
|
||||||
|
u'title': u'Afrojack - The Spark ft. Spree Wilson',
|
||||||
|
u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
|
||||||
|
u'uploader': u'AfrojackVEVO',
|
||||||
|
u'uploader_id': u'AfrojackVEVO',
|
||||||
|
u'upload_date': u'20131011',
|
||||||
|
},
|
||||||
|
u"params": {
|
||||||
|
u'youtube_include_dash_manifest': True,
|
||||||
|
u'format': '141',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -1272,8 +1288,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
|
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
raise ValueError('Could not find vevo ID')
|
raise ValueError('Could not find vevo ID')
|
||||||
info = json.loads(mobj.group(1))
|
ytplayer_config = json.loads(mobj.group(1))
|
||||||
args = info['args']
|
args = ytplayer_config['args']
|
||||||
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
|
||||||
# this signatures are encrypted
|
# this signatures are encrypted
|
||||||
if 'url_encoded_fmt_stream_map' not in args:
|
if 'url_encoded_fmt_stream_map' not in args:
|
||||||
@ -1366,12 +1382,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||||
|
|
||||||
# Look for the DASH manifest
|
# Look for the DASH manifest
|
||||||
dash_manifest_url_lst = video_info.get('dashmpd')
|
if (self._downloader.params.get('youtube_include_dash_manifest', False)):
|
||||||
if (dash_manifest_url_lst and dash_manifest_url_lst[0] and
|
|
||||||
self._downloader.params.get('youtube_include_dash_manifest', False)):
|
|
||||||
try:
|
try:
|
||||||
|
# The DASH manifest used needs to be the one from the original video_webpage.
|
||||||
|
# The one found in get_video_info seems to be using different signatures.
|
||||||
|
# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
|
||||||
|
# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
|
||||||
|
# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
|
||||||
|
if age_gate:
|
||||||
|
dash_manifest_url = video_info.get('dashmpd')[0]
|
||||||
|
else:
|
||||||
|
dash_manifest_url = ytplayer_config['args']['dashmpd']
|
||||||
|
def decrypt_sig(mobj):
|
||||||
|
s = mobj.group(1)
|
||||||
|
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
|
||||||
|
return '/signature/%s' % dec_s
|
||||||
|
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
|
||||||
dash_doc = self._download_xml(
|
dash_doc = self._download_xml(
|
||||||
dash_manifest_url_lst[0], video_id,
|
dash_manifest_url, video_id,
|
||||||
note=u'Downloading DASH manifest',
|
note=u'Downloading DASH manifest',
|
||||||
errnote=u'Could not download DASH manifest')
|
errnote=u'Could not download DASH manifest')
|
||||||
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
|
||||||
@ -1443,9 +1471,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
|
|
||||||
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
|
||||||
)"""
|
)"""
|
||||||
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
|
||||||
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||||
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
@ -1460,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
# the id of the playlist is just 'RD' + video_id
|
# the id of the playlist is just 'RD' + video_id
|
||||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
|
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
|
||||||
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
|
||||||
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
|
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
|
||||||
get_element_by_attribute('class', 'title ', webpage))
|
title_span = (search_title('playlist-title') or
|
||||||
|
search_title('title long-title') or search_title('title'))
|
||||||
title = clean_html(title_span)
|
title = clean_html(title_span)
|
||||||
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id)
|
video_re = r'''(?x)data-video-username="(.*?)".*?
|
||||||
ids = orderedSet(re.findall(video_re, webpage))
|
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id)
|
||||||
|
matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
|
||||||
|
# Some of the videos may have been deleted, their username field is empty
|
||||||
|
ids = [video_id for (username, video_id) in matches if username]
|
||||||
url_results = self._ids_to_results(ids)
|
url_results = self._ids_to_results(ids)
|
||||||
|
|
||||||
return self.playlist_result(url_results, playlist_id, title)
|
return self.playlist_result(url_results, playlist_id, title)
|
||||||
@ -1493,29 +1525,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError(u'For downloading YouTube.com top lists, use '
|
raise ExtractorError(u'For downloading YouTube.com top lists, use '
|
||||||
u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
|
||||||
|
|
||||||
|
url = self._TEMPLATE_URL % playlist_id
|
||||||
|
page = self._download_webpage(url, playlist_id)
|
||||||
|
more_widget_html = content_html = page
|
||||||
|
|
||||||
# Extract the video ids from the playlist pages
|
# Extract the video ids from the playlist pages
|
||||||
ids = []
|
ids = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
matches = re.finditer(self._VIDEO_RE, content_html)
|
||||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
|
||||||
matches = re.finditer(self._VIDEO_RE, page)
|
|
||||||
# We remove the duplicates and the link with index 0
|
# We remove the duplicates and the link with index 0
|
||||||
# (it's not the first video of the playlist)
|
# (it's not the first video of the playlist)
|
||||||
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
|
||||||
ids.extend(new_ids)
|
ids.extend(new_ids)
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||||
|
if not mobj:
|
||||||
break
|
break
|
||||||
|
|
||||||
try:
|
more = self._download_json(
|
||||||
playlist_title = self._og_search_title(page)
|
'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
|
||||||
except RegexNotFoundError:
|
content_html = more['content_html']
|
||||||
self.report_warning(
|
more_widget_html = more['load_more_widget_html']
|
||||||
u'Playlist page is missing OpenGraph title, falling back ...',
|
|
||||||
playlist_id)
|
playlist_title = self._html_search_regex(
|
||||||
playlist_title = self._html_search_regex(
|
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
|
||||||
r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
|
|
||||||
|
|
||||||
url_results = self._ids_to_results(ids)
|
url_results = self._ids_to_results(ids)
|
||||||
return self.playlist_result(url_results, playlist_id, playlist_title)
|
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
|
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
|
'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
|
||||||
u"file": u"2037704.webm",
|
'info_dict': {
|
||||||
u"info_dict": {
|
'id': '2037704',
|
||||||
u"upload_date": u"20131127",
|
'ext': 'webm',
|
||||||
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
|
'title': 'ZDFspezial - Ende des Machtpokers',
|
||||||
u"uploader": u"spezial",
|
'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
|
||||||
u"title": u"ZDFspezial - Ende des Machtpokers"
|
'duration': 1022,
|
||||||
|
'uploader': 'spezial',
|
||||||
|
'uploader_id': '225948',
|
||||||
|
'upload_date': '20131127',
|
||||||
},
|
},
|
||||||
u"skip": u"Videos on ZDF.de are depublicised in short order",
|
'skip': 'Videos on ZDF.de are depublicised in short order',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('video_id')
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||||
doc = self._download_xml(
|
doc = self._download_xml(
|
||||||
xml_url, video_id,
|
xml_url, video_id,
|
||||||
note=u'Downloading video info',
|
note='Downloading video info',
|
||||||
errnote=u'Failed to download video info')
|
errnote='Failed to download video info')
|
||||||
|
|
||||||
title = doc.find('.//information/title').text
|
title = doc.find('.//information/title').text
|
||||||
description = doc.find('.//information/detail').text
|
description = doc.find('.//information/detail').text
|
||||||
|
duration = int(doc.find('.//details/lengthSec').text)
|
||||||
uploader_node = doc.find('.//details/originChannelTitle')
|
uploader_node = doc.find('.//details/originChannelTitle')
|
||||||
uploader = None if uploader_node is None else uploader_node.text
|
uploader = None if uploader_node is None else uploader_node.text
|
||||||
duration_str = doc.find('.//details/length').text
|
uploader_id_node = doc.find('.//details/originChannelId')
|
||||||
duration_m = re.match(r'''(?x)^
|
uploader_id = None if uploader_id_node is None else uploader_id_node.text
|
||||||
(?P<hours>[0-9]{2})
|
|
||||||
:(?P<minutes>[0-9]{2})
|
|
||||||
:(?P<seconds>[0-9]{2})
|
|
||||||
(?:\.(?P<ms>[0-9]+)?)
|
|
||||||
''', duration_str)
|
|
||||||
duration = (
|
|
||||||
(
|
|
||||||
(int(duration_m.group('hours')) * 60 * 60) +
|
|
||||||
(int(duration_m.group('minutes')) * 60) +
|
|
||||||
int(duration_m.group('seconds'))
|
|
||||||
)
|
|
||||||
if duration_m
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
upload_date = unified_strdate(doc.find('.//details/airtime').text)
|
upload_date = unified_strdate(doc.find('.//details/airtime').text)
|
||||||
|
|
||||||
def xml_to_format(fnode):
|
def xml_to_format(fnode):
|
||||||
video_url = fnode.find('url').text
|
video_url = fnode.find('url').text
|
||||||
is_available = u'http://www.metafilegenerator' not in video_url
|
is_available = 'http://www.metafilegenerator' not in video_url
|
||||||
|
|
||||||
format_id = fnode.attrib['basetype']
|
format_id = fnode.attrib['basetype']
|
||||||
format_m = re.match(r'''(?x)
|
format_m = re.match(r'''(?x)
|
||||||
@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor):
|
|||||||
|
|
||||||
quality = fnode.find('./quality').text
|
quality = fnode.find('./quality').text
|
||||||
abr = int(fnode.find('./audioBitrate').text) // 1000
|
abr = int(fnode.find('./audioBitrate').text) // 1000
|
||||||
vbr = int(fnode.find('./videoBitrate').text) // 1000
|
vbr_node = fnode.find('./videoBitrate')
|
||||||
|
vbr = None if vbr_node is None else int(vbr_node.text) // 1000
|
||||||
|
|
||||||
format_note = u''
|
width_node = fnode.find('./width')
|
||||||
|
width = None if width_node is None else int_or_none(width_node.text)
|
||||||
|
height_node = fnode.find('./height')
|
||||||
|
height = None if height_node is None else int_or_none(height_node.text)
|
||||||
|
|
||||||
|
format_note = ''
|
||||||
if not format_note:
|
if not format_note:
|
||||||
format_note = None
|
format_note = None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'format_id': format_id + u'-' + quality,
|
'format_id': format_id + '-' + quality,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'acodec': format_m.group('acodec'),
|
'acodec': format_m.group('acodec'),
|
||||||
'vcodec': format_m.group('vcodec'),
|
'vcodec': format_m.group('vcodec'),
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'width': int_or_none(fnode.find('./width').text),
|
'width': width,
|
||||||
'height': int_or_none(fnode.find('./height').text),
|
'height': height,
|
||||||
'filesize': int_or_none(fnode.find('./filesize').text),
|
'filesize': int_or_none(fnode.find('./filesize').text),
|
||||||
'format_note': format_note,
|
'format_note': format_note,
|
||||||
'protocol': proto,
|
'protocol': proto,
|
||||||
@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': uploader,
|
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
}
|
'formats': formats,
|
||||||
|
}
|
@ -174,6 +174,11 @@ try:
|
|||||||
except NameError:
|
except NameError:
|
||||||
compat_chr = chr
|
compat_chr = chr
|
||||||
|
|
||||||
|
try:
|
||||||
|
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||||
|
except ImportError: # Python 2.6
|
||||||
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
if type(c) is int: return c
|
if type(c) is int: return c
|
||||||
else: return ord(c)
|
else: return ord(c)
|
||||||
@ -774,6 +779,7 @@ def unified_strdate(date_str):
|
|||||||
'%Y-%m-%dT%H:%M:%S.%fZ',
|
'%Y-%m-%dT%H:%M:%S.%fZ',
|
||||||
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
'%Y-%m-%dT%H:%M:%S.%f0Z',
|
||||||
'%Y-%m-%dT%H:%M:%S',
|
'%Y-%m-%dT%H:%M:%S',
|
||||||
|
'%Y-%m-%dT%H:%M:%S.%f',
|
||||||
'%Y-%m-%dT%H:%M',
|
'%Y-%m-%dT%H:%M',
|
||||||
]
|
]
|
||||||
for expression in format_expressions:
|
for expression in format_expressions:
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.02.20'
|
__version__ = '2014.02.24'
|
||||||
|
Reference in New Issue
Block a user