Compare commits

...

30 Commits

Author SHA1 Message Date
0bf5cf9886 release 2014.02.24 2014-02-24 09:44:22 +01:00
919052d094 [zdf] Fix podcast extraction and use unicode literals (Closes #2446) 2014-02-24 13:47:47 +07:00
a2dafe2887 [youtube] Fix mix video regex
Attributes' order in <li> is arbitrary and changes every time playlist
page is fetched, so we can't rely on `data-index` to be before
`data-video-username`.
2014-02-24 12:52:02 +07:00
92661c994b [normalboots] Modernize and simplify 2014-02-23 18:28:22 +01:00
ffe8fe356a [normalboots] Fix video url extraction 2014-02-23 18:06:51 +01:00
bc2f773b4f [youtube:playlist] Fix mixes extraction (fixes #2444) 2014-02-23 17:17:36 +01:00
f919201ecc [vine] Extract more metadata and support low format 2014-02-23 19:02:31 +07:00
7ff5d5c2e2 Add one more format to unified_strdate 2014-02-23 19:00:51 +07:00
9b77f951c7 [breakcom] Fix error when calling _search_regex
I passed `’webpage’` instead of the variable `webpage`.
2014-02-23 12:28:44 +01:00
a25f2f990a [breakcom] Fix info json extraction 2014-02-23 12:20:58 +01:00
78b373975d [vine] Fix uploader extraction 2014-02-23 12:08:30 +01:00
2fcc873c4c release 2014.02.22.1 2014-02-22 23:17:56 +01:00
23c2baadb3 [videobam] Set age_limit to 18
From [their ToS](http://videobam.com/terms): "User must be eighteen 18[sic] years of age or older to use or access this web site."
2014-02-22 23:15:41 +01:00
521ee82334 Fix imports 2014-02-22 23:03:12 +01:00
1df96e59ce [f4m] Clean up 2014-02-22 23:03:00 +01:00
3e123c1e28 [videobam] Add support for videobam.com (Closes #2411) 2014-02-23 04:50:05 +07:00
f38da66731 Credit @soult for br 2014-02-22 20:19:41 +01:00
06aabfc422 [br] Simplify 2014-02-22 20:17:26 +01:00
1052d2bfec Merge remote-tracking branch 'soult/br' 2014-02-22 17:14:47 +01:00
5e0b652344 release 2014.02.22 2014-02-22 15:07:25 +01:00
0f8f097183 [release.sh] Do not run tests by default
We are at the point that testing takes waay too long for a release cycle, and fails way too often.
Tests through travis are a better indicator than testing just before release.
2014-02-22 15:06:07 +01:00
491ed3dda2 [trutube] Support multiple formats (#2433) 2014-02-22 15:05:30 +01:00
af284c6d1b Merge remote-tracking branch 'JohnyMoSwag/master' 2014-02-22 14:38:42 +01:00
41d3ec5fba [savefrom] Add extractor (Fixes #2434) 2014-02-22 14:36:16 +01:00
0568c352f3 [canalc2] Modernize 2014-02-22 14:27:09 +01:00
2e7b4cb714 [spankwire] Fix uploader id regex 2014-02-22 16:50:08 +07:00
9767726b66 [spankwire] Improve and modernize 2014-02-22 16:45:03 +07:00
9ddfd84e41 added trutubeIE 2014-02-22 00:11:57 -08:00
7928024f57 [BR] Add basic test 2014-02-21 18:00:05 +01:00
3eb38acb43 [BR] Add "BR" extractor
Extractor for videos from the Bayerischer Rundfunk Mediathek[1]. Currently only
supports videos. Audio and podcasts do not work yet with this extractor.

1: http://br.de/mediathek
2014-02-21 17:58:52 +01:00
19 changed files with 434 additions and 149 deletions

View File

@ -14,9 +14,9 @@
set -e set -e
skip_tests=false skip_tests=true
if [ "$1" = '--skip-test' ]; then if [ "$1" = '--run-tests' ]; then
skip_tests=true skip_tests=false
shift shift
fi fi

View File

@ -18,6 +18,7 @@ from test.helper import (
import hashlib import hashlib
import io import io
import json import json
import re
import socket import socket
import youtube_dl.YoutubeDL import youtube_dl.YoutubeDL
@ -137,12 +138,21 @@ def generator(test_case):
with io.open(info_json_fn, encoding='utf-8') as infof: with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items(): for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'): if isinstance(expected, compat_str) and expected.startswith('re:'):
got = 'md5:' + md5(info_dict.get(info_field))
else:
got = info_dict.get(info_field) got = info_dict.get(info_field)
self.assertEqual(expected, got, match_str = expected[len('re:'):]
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) match_rex = re.compile(match_str)
self.assertTrue(
isinstance(got, compat_str) and match_rex.match(got),
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
else:
if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(info_dict.get(info_field))
else:
got = info_dict.get(info_field)
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict # If checkable fields are missing from the test case, print the info_dict
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))

View File

@ -46,6 +46,7 @@ __authors__ = (
'Andreas Schmitz', 'Andreas Schmitz',
'Michael Kaiser', 'Michael Kaiser',
'Niklas Laxström', 'Niklas Laxström',
'David Triendl',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'

View File

@ -12,7 +12,6 @@ from .http import HttpFD
from ..utils import ( from ..utils import (
struct_pack, struct_pack,
struct_unpack, struct_unpack,
compat_urllib_request,
compat_urlparse, compat_urlparse,
format_bytes, format_bytes,
encodeFilename, encodeFilename,
@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_char() self.read_unsigned_char()
# flags # flags
self.read(3) self.read(3)
# BootstrapinfoVersion
bootstrap_info_version = self.read_unsigned_int() self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved # Profile,Live,Update,Reserved
self.read(1) self.read(1)
# time scale # time scale
@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
self.read_unsigned_long_long() self.read_unsigned_long_long()
# SmpteTimeCodeOffset # SmpteTimeCodeOffset
self.read_unsigned_long_long() self.read_unsigned_long_long()
# MovieIdentifier
movie_identifier = self.read_string() self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char() server_count = self.read_unsigned_char()
# ServerEntryTable # ServerEntryTable
for i in range(server_count): for i in range(server_count):
self.read_string() self.read_string()
quality_count = self.read_unsigned_char() quality_count = self.read_unsigned_char()
# QualityEntryTable # QualityEntryTable
for i in range(server_count): for i in range(quality_count):
self.read_string() self.read_string()
# DrmData # DrmData
self.read_string() self.read_string()

View File

@ -19,6 +19,7 @@ from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .br import BRIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .c56 import C56IE from .c56 import C56IE
@ -186,6 +187,7 @@ from .rutube import (
RutubeMovieIE, RutubeMovieIE,
RutubePersonIE, RutubePersonIE,
) )
from .savefrom import SaveFromIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE from .slashdot import SlashdotIE
@ -224,6 +226,7 @@ from .tinypic import TinyPicIE
from .toutv import TouTvIE from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE from .trilulilu import TriluliluIE
from .trutube import TruTubeIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tudou import TudouIE from .tudou import TudouIE
from .tumblr import TumblrIE from .tumblr import TumblrIE
@ -238,6 +241,7 @@ from .vesti import VestiIE
from .vevo import VevoIE from .vevo import VevoIE
from .vice import ViceIE from .vice import ViceIE
from .viddler import ViddlerIE from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE from .videopremium import VideoPremiumIE

View File

@ -0,0 +1,80 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class BRIE(InfoExtractor):
IE_DESC = "Bayerischer Rundfunk Mediathek"
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
_BASE_URL = "http://www.br.de"
_TEST = {
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
"info_dict": {
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
"ext": "mp4",
"title": "Feiern und Verzichten",
"description": "Anselm Grün: Feiern und Verzichten",
"uploader": "BR/Birgit Baier",
"upload_date": "20140301"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = [{
"id": xml_video.get("externalId"),
"title": xml_video.find("title").text,
"formats": self._extract_formats(xml_video.find("assets")),
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
"uploader": xml_video.find("author").text,
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
"webpage_url": xml_video.find("permalink").text,
} for xml_video in xml.findall("video")]
if len(videos) > 1:
self._downloader.report_warning(
'found multiple videos; please '
'report this with the video URL to http://yt-dl.org/bug')
if not videos:
raise ExtractorError('No video entries found')
return videos[0]
def _extract_formats(self, assets):
formats = [{
"url": asset.find("downloadUrl").text,
"ext": asset.find("mediaType").text,
"format_id": asset.get("type"),
"width": int(asset.find("frameWidth").text),
"height": int(asset.find("frameHeight").text),
"tbr": int(asset.find("bitrateVideo").text),
"abr": int(asset.find("bitrateAudio").text),
"vcodec": asset.find("codecVideo").text,
"container": asset.find("mediaType").text,
"filesize": int(asset.find("size").text),
} for asset in assets.findall("asset")
if asset.find("downloadUrl") is not None]
self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants):
thumbnails = [{
"url": self._BASE_URL + variant.find("url").text,
"width": int(variant.find("width").text),
"height": int(variant.find("height").text),
} for variant in variants.findall("variant")]
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
return thumbnails

View File

@ -23,8 +23,8 @@ class BreakIE(InfoExtractor):
video_id = mobj.group(1).split("-")[-1] video_id = mobj.group(1).split("-")[-1]
embed_url = 'http://www.break.com/embed/%s' % video_id embed_url = 'http://www.break.com/embed/%s' % video_id
webpage = self._download_webpage(embed_url, video_id) webpage = self._download_webpage(embed_url, video_id)
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage, info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
'info json', flags=re.DOTALL) webpage, 'info json', flags=re.DOTALL)
info = json.loads(info_json) info = json.loads(info_json)
video_url = info['videoUri'] video_url = info['videoUri']
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url) m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)

View File

@ -1,4 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)' _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
_TEST = { _TEST = {
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui', 'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
u'file': u'12163.mp4', 'md5': '060158428b650f896c542dfbb3d6487f',
u'md5': u'060158428b650f896c542dfbb3d6487f', 'info_dict': {
u'info_dict': { 'id': '12163',
u'title': u'Terrasses du Numérique' 'ext': 'mp4',
'title': 'Terrasses du Numérique'
} }
} }
@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
title = self._html_search_regex( title = self._html_search_regex(
r'class="evenement8">(.*?)</a>', webpage, u'title') r'class="evenement8">(.*?)</a>', webpage, 'title')
return {'id': video_id, return {
'ext': 'mp4', 'id': video_id,
'url': video_url, 'ext': 'mp4',
'title': title, 'url': video_url,
} 'title': title,
}

View File

@ -1,61 +1,51 @@
# encoding: utf-8
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
unified_strdate, unified_strdate,
) )
class NormalbootsIE(InfoExtractor): class NormalbootsIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$' _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
_TEST = { _TEST = {
u'url': u'http://normalboots.com/video/home-alone-games-jontron/', 'url': 'http://normalboots.com/video/home-alone-games-jontron/',
u'file': u'home-alone-games-jontron.mp4', 'md5': '8bf6de238915dd501105b44ef5f1e0f6',
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6', 'info_dict': {
u'info_dict': { 'id': 'home-alone-games-jontron',
u'title': u'Home Alone Games - JonTron - NormalBoots', 'ext': 'mp4',
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/', 'title': 'Home Alone Games - JonTron - NormalBoots',
u'uploader': u'JonTron', 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for Tense Battle Theme:\xa0http://www.youtube.com/Kiamet/',
u'upload_date': u'20140125', 'uploader': 'JonTron',
'upload_date': '20140125',
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
info = {
'id': video_id,
'uploader': None,
'upload_date': None,
}
if url[:4] != 'http':
url = 'http://' + url
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
video_thumbnail = self._og_search_thumbnail(webpage)
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>', video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
webpage, 'uploader') webpage, 'uploader')
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date') webpage, 'date')
video_upload_date = unified_strdate(raw_upload_date) video_upload_date = unified_strdate(raw_upload_date)
video_upload_date = unified_strdate(raw_upload_date)
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url') player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
player_page = self._download_webpage(player_url, video_id) player_page = self._download_webpage(player_url, video_id)
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file') video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
info['url'] = video_url return {
info['title'] = video_title 'id': video_id,
info['description'] = video_description 'url': video_url,
info['thumbnail'] = video_thumbnail 'title': self._og_search_title(webpage),
info['uploader'] = video_uploader 'description': self._og_search_description(webpage),
info['upload_date'] = video_upload_date 'thumbnail': self._og_search_thumbnail(webpage),
'uploader': video_uploader,
return info 'upload_date': video_upload_date,
}

View File

@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
import os.path
import re
from .common import InfoExtractor
class SaveFromIE(InfoExtractor):
IE_NAME = 'savefrom.net'
_VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
_TEST = {
'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
'info_dict': {
'id': 'UlVRAPW2WJY',
'ext': 'mp4',
'title': 'About Team Radical MMA | MMA Fighting',
'upload_date': '20120816',
'uploader': 'Howcast',
'uploader_id': 'Howcast',
'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
},
'params': {
'skip_download': True
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = os.path.splitext(url.split('/')[-1])[0]
return {
'_type': 'url',
'id': video_id,
'url': mobj.group('url'),
}

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -8,23 +7,27 @@ from ..utils import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urllib_request, compat_urllib_request,
compat_urllib_parse, compat_urllib_parse,
unified_strdate,
str_to_int,
int_or_none,
) )
from ..aes import ( from ..aes import aes_decrypt_text
aes_decrypt_text
)
class SpankwireIE(InfoExtractor): class SpankwireIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)' _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
_TEST = { _TEST = {
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
'file': '103545.mp4', 'md5': '8bbfde12b101204b39e4b9fe7eb67095',
'md5': '1b3f55e345500552dbc252a3e9c1af43',
'info_dict': { 'info_dict': {
"uploader": "oreusz", 'id': '103545',
"title": "Buckcherry`s X Rated Music Video Crazy Bitch", 'ext': 'mp4',
"description": "Crazy Bitch X rated music video.", 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
"age_limit": 18, 'description': 'Crazy Bitch X rated music video.',
'uploader': 'oreusz',
'uploader_id': '124697',
'upload_date': '20070508',
'age_limit': 18,
} }
} }
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title') title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
description = self._html_search_regex( description = self._html_search_regex(
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False) r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
uploader_id = self._html_search_regex(
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
if upload_date:
upload_date = unified_strdate(upload_date)
view_count = self._html_search_regex(
r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
comment_count = int_or_none(self._html_search_regex(
r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage))) video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1: if webpage.find('flashvars\.encrypted = "true"') != -1:
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
formats = [] formats = []
for video_url in video_urls: for video_url in video_urls:
path = compat_urllib_parse_urlparse(video_url).path path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2] format = path.split('/')[4].split('_')[:2]
resolution, bitrate_str = format resolution, bitrate_str = format
format = "-".join(format) format = "-".join(format)
height = int(resolution.rstrip('P')) height = int(resolution.rstrip('Pp'))
tbr = int(bitrate_str.rstrip('K')) tbr = int(bitrate_str.rstrip('Kk'))
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'ext': extension,
'resolution': resolution, 'resolution': resolution,
'format': format, 'format': format,
'tbr': tbr, 'tbr': tbr,
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'uploader': video_uploader, 'title': title,
'title': video_title,
'thumbnail': thumbnail,
'description': description, 'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
'formats': formats, 'formats': formats,
'age_limit': age_limit, 'age_limit': age_limit,
} }

View File

@ -0,0 +1,44 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TruTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
_TEST = {
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
'info_dict': {
'id': '14880',
'ext': 'flv',
'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
'thumbnail': 're:^http:.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage).strip()
thumbnail = self._search_regex(
r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
all_formats = re.finditer(
r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
formats = [{
'format_id': m.group('key'),
'quality': -i,
'url': m.group('url'),
} for i, m in enumerate(all_formats)]
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -0,0 +1,80 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import int_or_none
class VideoBamIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
_TESTS = [
{
'url': 'http://videobam.com/OiJQM',
'md5': 'db471f27763a531f10416a0c58b5a1e0',
'info_dict': {
'id': 'OiJQM',
'ext': 'mp4',
'title': 'Is Alcohol Worse Than Ecstasy?',
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
'uploader': 'frihetsvinge',
},
},
{
'url': 'http://videobam.com/pqLvq',
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
'note': 'HD video',
'info_dict': {
'id': 'pqLvq',
'ext': 'mp4',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
formats = []
for preference, format_id in enumerate(['low', 'high']):
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
if not mobj:
continue
formats.append({
'url': mobj.group('url'),
'ext': 'mp4',
'format_id': format_id,
'preference': preference,
})
if not formats:
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
formats = [{
'url': item['url'],
'ext': 'mp4',
} for item in player_config['playlist'] if 'autoPlay' in item]
self._sort_formats(formats)
title = self._og_search_title(page, default='VideoBam', fatal=False)
description = self._og_search_description(page, default=None)
thumbnail = self._og_search_thumbnail(page)
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
view_count = int_or_none(
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'view_count': view_count,
'formats': formats,
'age_limit': 18,
}

View File

@ -1,8 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_strdate
class VineIE(InfoExtractor): class VineIE(InfoExtractor):
@ -13,31 +15,46 @@ class VineIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'b9KOOWX7HUx', 'id': 'b9KOOWX7HUx',
'ext': 'mp4', 'ext': 'mp4',
'uploader': 'Jack Dorsey',
'title': 'Chicken.', 'title': 'Chicken.',
'description': 'Chicken.',
'upload_date': '20130519',
'uploader': 'Jack Dorsey',
'uploader_id': '76',
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage_url = 'https://vine.co/v/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id) webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
video_url = self._html_search_meta('twitter:player:stream', webpage, data = json.loads(self._html_search_regex(
'video URL') r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>', formats = [
webpage, 'uploader', fatal=False, flags=re.DOTALL) {
'url': data['videoLowURL'],
'ext': 'mp4',
'format_id': 'low',
},
{
'url': data['videoUrl'],
'ext': 'mp4',
'format_id': 'standard',
}
]
return { return {
'id': video_id, 'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'description': data['description'],
'uploader': uploader, 'thumbnail': data['thumbnailUrl'],
} 'upload_date': unified_strdate(data['created']),
'uploader': data['username'],
'uploader_id': data['userIdStr'],
'like_count': data['likes']['count'],
'comment_count': data['comments']['count'],
'repost_count': data['reposts']['count'],
'formats': formats,
}

View File

@ -22,8 +22,8 @@ class WorldStarHipHopIE(InfoExtractor):
webpage_src = self._download_webpage(url, video_id) webpage_src = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?', m_vevo_id = re.search(r'videoId=(.*?)&amp?',
webpage_src) webpage_src)
if m_vevo_id is not None: if m_vevo_id is not None:
self.to_screen(u'Vevo video detected:') self.to_screen(u'Vevo video detected:')
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo') return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')

View File

@ -29,7 +29,6 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
PagedList, PagedList,
RegexNotFoundError,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
orderedSet, orderedSet,
@ -1489,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
# the id of the playlist is just 'RD' + video_id # the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
get_element_by_attribute('class', 'title ', webpage)) title_span = (search_title('playlist-title') or
search_title('title long-title') or search_title('title'))
title = clean_html(title_span) title = clean_html(title_span)
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id) video_re = r'''(?x)data-video-username="(.*?)".*?
ids = orderedSet(re.findall(video_re, webpage)) href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
# Some of the videos may have been deleted, their username field is empty
ids = [video_id for (username, video_id) in matches if username]
url_results = self._ids_to_results(ids) url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title) return self.playlist_result(url_results, playlist_id, title)

View File

@ -1,4 +1,5 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
import re import re
@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor):
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?' _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_TEST = { _TEST = {
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt", 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
u"file": u"2037704.webm", 'info_dict': {
u"info_dict": { 'id': '2037704',
u"upload_date": u"20131127", 'ext': 'webm',
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".", 'title': 'ZDFspezial - Ende des Machtpokers',
u"uploader": u"spezial", 'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
u"title": u"ZDFspezial - Ende des Machtpokers" 'duration': 1022,
'uploader': 'spezial',
'uploader_id': '225948',
'upload_date': '20131127',
}, },
u"skip": u"Videos on ZDF.de are depublicised in short order", 'skip': 'Videos on ZDF.de are depublicised in short order',
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id') video_id = mobj.group('video_id')
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
doc = self._download_xml( doc = self._download_xml(
xml_url, video_id, xml_url, video_id,
note=u'Downloading video info', note='Downloading video info',
errnote=u'Failed to download video info') errnote='Failed to download video info')
title = doc.find('.//information/title').text title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text description = doc.find('.//information/detail').text
duration = int(doc.find('.//details/lengthSec').text)
uploader_node = doc.find('.//details/originChannelTitle') uploader_node = doc.find('.//details/originChannelTitle')
uploader = None if uploader_node is None else uploader_node.text uploader = None if uploader_node is None else uploader_node.text
duration_str = doc.find('.//details/length').text uploader_id_node = doc.find('.//details/originChannelId')
duration_m = re.match(r'''(?x)^ uploader_id = None if uploader_id_node is None else uploader_id_node.text
(?P<hours>[0-9]{2})
:(?P<minutes>[0-9]{2})
:(?P<seconds>[0-9]{2})
(?:\.(?P<ms>[0-9]+)?)
''', duration_str)
duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m
else None
)
upload_date = unified_strdate(doc.find('.//details/airtime').text) upload_date = unified_strdate(doc.find('.//details/airtime').text)
def xml_to_format(fnode): def xml_to_format(fnode):
video_url = fnode.find('url').text video_url = fnode.find('url').text
is_available = u'http://www.metafilegenerator' not in video_url is_available = 'http://www.metafilegenerator' not in video_url
format_id = fnode.attrib['basetype'] format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x) format_m = re.match(r'''(?x)
@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor):
quality = fnode.find('./quality').text quality = fnode.find('./quality').text
abr = int(fnode.find('./audioBitrate').text) // 1000 abr = int(fnode.find('./audioBitrate').text) // 1000
vbr = int(fnode.find('./videoBitrate').text) // 1000 vbr_node = fnode.find('./videoBitrate')
vbr = None if vbr_node is None else int(vbr_node.text) // 1000
format_note = u'' width_node = fnode.find('./width')
width = None if width_node is None else int_or_none(width_node.text)
height_node = fnode.find('./height')
height = None if height_node is None else int_or_none(height_node.text)
format_note = ''
if not format_note: if not format_note:
format_note = None format_note = None
return { return {
'format_id': format_id + u'-' + quality, 'format_id': format_id + '-' + quality,
'url': video_url, 'url': video_url,
'ext': ext, 'ext': ext,
'acodec': format_m.group('acodec'), 'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'), 'vcodec': format_m.group('vcodec'),
'abr': abr, 'abr': abr,
'vbr': vbr, 'vbr': vbr,
'width': int_or_none(fnode.find('./width').text), 'width': width,
'height': int_or_none(fnode.find('./height').text), 'height': height,
'filesize': int_or_none(fnode.find('./filesize').text), 'filesize': int_or_none(fnode.find('./filesize').text),
'format_note': format_note, 'format_note': format_note,
'protocol': proto, 'protocol': proto,
@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats,
'description': description, 'description': description,
'uploader': uploader,
'duration': duration, 'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date, 'upload_date': upload_date,
} 'formats': formats,
}

View File

@ -779,6 +779,7 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%fZ', '%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z', '%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M', '%Y-%m-%dT%H:%M',
] ]
for expression in format_expressions: for expression in format_expressions:

View File

@ -1,2 +1,2 @@
__version__ = '2014.02.21.1' __version__ = '2014.02.24'