Compare commits

...

49 Commits

Author SHA1 Message Date
0bf5cf9886 release 2014.02.24 2014-02-24 09:44:22 +01:00
919052d094 [zdf] Fix podcast extraction and use unicode literals (Closes #2446) 2014-02-24 13:47:47 +07:00
a2dafe2887 [youtube] Fix mix video regex
Attributes' order in <li> is arbitrary and changes every time playlist
page is fetched, so we can't rely on `data-index` to be before
`data-video-username`.
2014-02-24 12:52:02 +07:00
92661c994b [normalboots] Modernize and simplify 2014-02-23 18:28:22 +01:00
ffe8fe356a [normalboots] Fix video url extraction 2014-02-23 18:06:51 +01:00
bc2f773b4f [youtube:playlist] Fix mixes extraction (fixes #2444) 2014-02-23 17:17:36 +01:00
f919201ecc [vine] Extract more metadata and support low format 2014-02-23 19:02:31 +07:00
7ff5d5c2e2 Add one more format to unified_strdate 2014-02-23 19:00:51 +07:00
9b77f951c7 [breakcom] Fix error when calling _search_regex
I passed `’webpage’` instead of the variable `webpage`.
2014-02-23 12:28:44 +01:00
a25f2f990a [breakcom] Fix info json extraction 2014-02-23 12:20:58 +01:00
78b373975d [vine] Fix uploader extraction 2014-02-23 12:08:30 +01:00
2fcc873c4c release 2014.02.22.1 2014-02-22 23:17:56 +01:00
23c2baadb3 [videobam] Set age_limit to 18
From [their ToS](http://videobam.com/terms): "User must be eighteen 18[sic] years of age or older to use or access this web site."
2014-02-22 23:15:41 +01:00
521ee82334 Fix imports 2014-02-22 23:03:12 +01:00
1df96e59ce [f4m] Clean up 2014-02-22 23:03:00 +01:00
3e123c1e28 [videobam] Add support for videobam.com (Closes #2411) 2014-02-23 04:50:05 +07:00
f38da66731 Credit @soult for br 2014-02-22 20:19:41 +01:00
06aabfc422 [br] Simplify 2014-02-22 20:17:26 +01:00
1052d2bfec Merge remote-tracking branch 'soult/br' 2014-02-22 17:14:47 +01:00
5e0b652344 release 2014.02.22 2014-02-22 15:07:25 +01:00
0f8f097183 [release.sh] Do not run tests by default
We are at the point that testing takes waay too long for a release cycle, and fails way too often.
Tests through travis are a better indicator than testing just before release.
2014-02-22 15:06:07 +01:00
491ed3dda2 [trutube] Support multiple formats (#2433) 2014-02-22 15:05:30 +01:00
af284c6d1b Merge remote-tracking branch 'JohnyMoSwag/master' 2014-02-22 14:38:42 +01:00
41d3ec5fba [savefrom] Add extractor (Fixes #2434) 2014-02-22 14:36:16 +01:00
0568c352f3 [canalc2] Modernize 2014-02-22 14:27:09 +01:00
2e7b4cb714 [spankwire] Fix uploader id regex 2014-02-22 16:50:08 +07:00
9767726b66 [spankwire] Improve and modernize 2014-02-22 16:45:03 +07:00
9ddfd84e41 added trutubeIE 2014-02-22 00:11:57 -08:00
1cf563d84b release 2014.02.21.1 2014-02-21 18:19:48 +01:00
7928024f57 [BR] Add basic test 2014-02-21 18:00:05 +01:00
3eb38acb43 [BR] Add "BR" extractor
Extractor for videos from the Bayerischer Rundfunk Mediathek[1]. Currently only
supports videos. Audio and podcasts do not work yet with this extractor.

1: http://br.de/mediathek
2014-02-21 17:58:52 +01:00
f7300c5c90 [generic] Fix on python 2.6
`ParseError` is not available, it raises `xml.parsers.expat.ExpatError`.
The webpage needs to be encoded.
2014-02-21 16:59:10 +01:00
3489b7d26c [youtube] Simplify the decryption process for the manifest urls and add a test (closes #2422) 2014-02-21 15:15:58 +01:00
acd2bcc384 Merge branch 'youtube-dash' of github.com:m0vie/youtube-dl 2014-02-21 15:02:47 +01:00
43e77ca455 release 2014.02.21 2014-02-21 12:16:03 +01:00
da36297988 [wimp] Modernize and replace test 2014-02-21 17:57:19 +07:00
dbb94fb044 [youtube] Fix playlist extraction (Closes #2423, #2424, #2425) 2014-02-21 17:19:55 +07:00
d68f0cdb23 [youtube] decrypt signature when downloading dash manifest 2014-02-21 03:24:56 +01:00
eae16eb67b release 2014.02.20 2014-02-20 13:14:21 +01:00
4fc946b546 [generic] Add support for RSS feeds (Fixes #667) 2014-02-20 13:14:09 +01:00
280bc5dad6 [bbccouk] Add friendly contry filter error message (#2184) 2014-02-20 18:50:34 +07:00
f43770d8c9 Merge pull request #2413 from bentley/optypo
Fix minor typo: “to to” → “to”.
2014-02-20 08:02:54 +01:00
98c4b8fa1b Fix minor typo: “to to” → “to”. 2014-02-19 20:02:29 -07:00
ccb079ee67 [xhamster] Fix and improve 2014-02-20 02:37:44 +07:00
2ea237472c Merge pull request #2408 from pulpe/_readme
[README.md] correct the test command
2014-02-19 16:45:14 +01:00
0d4b4865cc [README.md] correct the test command 2014-02-19 16:13:45 +01:00
fe52f9f956 Document prefered config location (#2407) 2014-02-19 11:35:35 +01:00
882907a818 release 2014.02.19.1 2014-02-19 01:27:22 +01:00
572a89cc4e [liveleak] Add support for prochan embeds (Fixes #2406) 2014-02-19 01:27:12 +01:00
26 changed files with 659 additions and 256 deletions

View File

@ -20,7 +20,7 @@ which means you can modify it, redistribute it or use it however you like.
sure that you have sufficient permissions
(run with sudo if needed)
-i, --ignore-errors continue on download errors, for example to
to skip unavailable videos in a playlist
skip unavailable videos in a playlist
--abort-on-error Abort downloading of further videos (in the
playlist or the command line) if an error
occurs
@ -246,7 +246,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl.conf`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
# OUTPUT TEMPLATE
@ -357,7 +357,7 @@ If you want to create a build of youtube-dl yourself, you'll need
### Adding support for a new site
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py Test_Download.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
If you want to add support for a new site, copy *any* [recently modified](https://github.com/rg3/youtube-dl/commits/master/youtube_dl/extractor) file in `youtube_dl/extractor`, add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Don't forget to run the tests with `python test/test_download.py TestDownload.test_YourExtractor`! For a detailed tutorial, refer to [this blog post](http://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/).
# BUGS

View File

@ -14,9 +14,9 @@
set -e
skip_tests=false
if [ "$1" = '--skip-test' ]; then
skip_tests=true
skip_tests=true
if [ "$1" = '--run-tests' ]; then
skip_tests=false
shift
fi

View File

@ -18,6 +18,7 @@ from test.helper import (
import hashlib
import io
import json
import re
import socket
import youtube_dl.YoutubeDL
@ -137,12 +138,21 @@ def generator(test_case):
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
for (info_field, expected) in tc.get('info_dict', {}).items():
if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(info_dict.get(info_field))
else:
if isinstance(expected, compat_str) and expected.startswith('re:'):
got = info_dict.get(info_field)
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
match_str = expected[len('re:'):]
match_rex = re.compile(match_str)
self.assertTrue(
isinstance(got, compat_str) and match_rex.match(got),
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
else:
if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(info_dict.get(info_field))
else:
got = info_dict.get(info_field)
self.assertEqual(expected, got,
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# If checkable fields are missing from the test case, print the info_dict
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))

View File

@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'python language')
self.assertTrue(len(result['entries']) == 15)
def test_generic_rss_feed(self):
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)
if __name__ == '__main__':
unittest.main()

View File

@ -46,6 +46,7 @@ __authors__ = (
'Andreas Schmitz',
'Michael Kaiser',
'Niklas Laxström',
'David Triendl',
)
__license__ = 'Public Domain'
@ -208,7 +209,7 @@ def parseOpts(overrideArguments=None):
general.add_option('-U', '--update',
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
action='store_true', dest='ignoreerrors', help='continue on download errors, for example to skip unavailable videos in a playlist', default=False)
general.add_option('--abort-on-error',
action='store_false', dest='ignoreerrors',
help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')

View File

@ -12,7 +12,6 @@ from .http import HttpFD
from ..utils import (
struct_pack,
struct_unpack,
compat_urllib_request,
compat_urlparse,
format_bytes,
encodeFilename,
@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_char()
# flags
self.read(3)
# BootstrapinfoVersion
bootstrap_info_version = self.read_unsigned_int()
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
self.read(1)
# time scale
@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
self.read_unsigned_long_long()
# SmpteTimeCodeOffset
self.read_unsigned_long_long()
# MovieIdentifier
movie_identifier = self.read_string()
self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char()
# ServerEntryTable
for i in range(server_count):
self.read_string()
quality_count = self.read_unsigned_char()
# QualityEntryTable
for i in range(server_count):
for i in range(quality_count):
self.read_string()
# DrmData
self.read_string()

View File

@ -19,6 +19,7 @@ from .bbccouk import BBCCoUkIE
from .blinkx import BlinkxIE
from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .br import BRIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .c56 import C56IE
@ -186,6 +187,7 @@ from .rutube import (
RutubeMovieIE,
RutubePersonIE,
)
from .savefrom import SaveFromIE
from .servingsys import ServingSysIE
from .sina import SinaIE
from .slashdot import SlashdotIE
@ -224,6 +226,7 @@ from .tinypic import TinyPicIE
from .toutv import TouTvIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .trutube import TruTubeIE
from .tube8 import Tube8IE
from .tudou import TudouIE
from .tumblr import TumblrIE
@ -238,6 +241,7 @@ from .vesti import VestiIE
from .vevo import VevoIE
from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE

View File

@ -162,6 +162,11 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
mobj = re.match(self._VALID_URL, url)
group_id = mobj.group('id')
webpage = self._download_webpage(url, group_id, 'Downloading video page')
if re.search(r'id="emp-error" class="notinuk">', webpage):
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
expected=True)
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
'Downloading playlist XML')

View File

@ -0,0 +1,80 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import ExtractorError
class BRIE(InfoExtractor):
IE_DESC = "Bayerischer Rundfunk Mediathek"
_VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
_BASE_URL = "http://www.br.de"
_TEST = {
"url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
"md5": "c4f83cf0f023ba5875aba0bf46860df2",
"info_dict": {
"id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
"ext": "mp4",
"title": "Feiern und Verzichten",
"description": "Anselm Grün: Feiern und Verzichten",
"uploader": "BR/Birgit Baier",
"upload_date": "20140301"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
xml = self._download_xml(self._BASE_URL + xml_url, None)
videos = [{
"id": xml_video.get("externalId"),
"title": xml_video.find("title").text,
"formats": self._extract_formats(xml_video.find("assets")),
"thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
"description": " ".join(xml_video.find("shareTitle").text.splitlines()),
"uploader": xml_video.find("author").text,
"upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
"webpage_url": xml_video.find("permalink").text,
} for xml_video in xml.findall("video")]
if len(videos) > 1:
self._downloader.report_warning(
'found multiple videos; please '
'report this with the video URL to http://yt-dl.org/bug')
if not videos:
raise ExtractorError('No video entries found')
return videos[0]
def _extract_formats(self, assets):
formats = [{
"url": asset.find("downloadUrl").text,
"ext": asset.find("mediaType").text,
"format_id": asset.get("type"),
"width": int(asset.find("frameWidth").text),
"height": int(asset.find("frameHeight").text),
"tbr": int(asset.find("bitrateVideo").text),
"abr": int(asset.find("bitrateAudio").text),
"vcodec": asset.find("codecVideo").text,
"container": asset.find("mediaType").text,
"filesize": int(asset.find("size").text),
} for asset in assets.findall("asset")
if asset.find("downloadUrl") is not None]
self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants):
thumbnails = [{
"url": self._BASE_URL + variant.find("url").text,
"width": int(variant.find("width").text),
"height": int(variant.find("height").text),
} for variant in variants.findall("variant")]
thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
return thumbnails

View File

@ -23,8 +23,8 @@ class BreakIE(InfoExtractor):
video_id = mobj.group(1).split("-")[-1]
embed_url = 'http://www.break.com/embed/%s' % video_id
webpage = self._download_webpage(embed_url, video_id)
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
'info json', flags=re.DOTALL)
info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
webpage, 'info json', flags=re.DOTALL)
info = json.loads(info_json)
video_url = info['videoUri']
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)

View File

@ -1,4 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
_TEST = {
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
u'file': u'12163.mp4',
u'md5': u'060158428b650f896c542dfbb3d6487f',
u'info_dict': {
u'title': u'Terrasses du Numérique'
'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
'md5': '060158428b650f896c542dfbb3d6487f',
'info_dict': {
'id': '12163',
'ext': 'mp4',
'title': 'Terrasses du Numérique'
}
}
@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
title = self._html_search_regex(
r'class="evenement8">(.*?)</a>', webpage, u'title')
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': title,
}
r'class="evenement8">(.*?)</a>', webpage, 'title')
return {
'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': title,
}

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import os
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from .youtube import YoutubeIE
@ -12,6 +13,7 @@ from ..utils import (
compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
compat_xml_parse_error,
ExtractorError,
HEADRequest,
@ -159,6 +161,25 @@ class GenericIE(InfoExtractor):
raise ExtractorError('Invalid URL protocol')
return response
def _extract_rss(self, url, video_id, doc):
playlist_title = doc.find('./channel/title').text
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
entries = [{
'_type': 'url',
'url': e.find('link').text,
'title': e.find('title').text,
} for e in doc.findall('./channel/item')]
return {
'_type': 'playlist',
'id': url,
'title': playlist_title,
'description': playlist_desc,
'entries': entries,
}
def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
@ -219,6 +240,14 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id)
# Is it an RSS feed?
try:
doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
except compat_xml_parse_error:
pass
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name

View File

@ -4,15 +4,17 @@ import json
import re
from .common import InfoExtractor
from ..utils import int_or_none
class LiveLeakIE(InfoExtractor):
_VALID_URL = r'^(?:http://)?(?:\w+\.)?liveleak\.com/view\?(?:.*?)i=(?P<video_id>[\w_]+)(?:.*)'
_TESTS = [{
'url': 'http://www.liveleak.com/view?i=757_1364311680',
'file': '757_1364311680.mp4',
'md5': '0813c2430bea7a46bf13acf3406992f4',
'info_dict': {
'id': '757_1364311680',
'ext': 'mp4',
'description': 'extremely bad day for this guy..!',
'uploader': 'ljfriel2',
'title': 'Most unlucky car accident'
@ -20,25 +22,62 @@ class LiveLeakIE(InfoExtractor):
},
{
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
'file': 'f93_1390833151.mp4',
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
'info_dict': {
'id': 'f93_1390833151',
'ext': 'mp4',
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
'uploader': 'ARD_Stinkt',
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
}
},
{
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
'md5': '42c6d97d54f1db107958760788c5f48f',
'info_dict': {
'id': '4f7_1392687779',
'ext': 'mp4',
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
'uploader': 'CapObveus',
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
'age_limit': 18,
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
age_limit = int_or_none(self._search_regex(
r'you confirm that you are ([0-9]+) years and over.',
webpage, 'age limit', default=None))
sources_raw = self._search_regex(
r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None)
if sources_raw is None:
sources_raw = '[{ %s}]' % (
self._search_regex(r'(file: ".*?"),', webpage, 'video URL'))
alt_source = self._search_regex(
r'(file: ".*?"),', webpage, 'video URL', default=None)
if alt_source:
sources_raw = '[{ %s}]' % alt_source
else:
# Maybe an embed?
embed_url = self._search_regex(
r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"',
webpage, 'embed URL')
return {
'_type': 'url_transparent',
'url': embed_url,
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'age_limit': age_limit,
}
sources_json = re.sub(r'\s([a-z]+):\s', r'"\1": ', sources_raw)
sources = json.loads(sources_json)
@ -49,15 +88,11 @@ class LiveLeakIE(InfoExtractor):
} for s in sources]
self._sort_formats(formats)
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
video_description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
return {
'id': video_id,
'title': video_title,
'description': video_description,
'uploader': video_uploader,
'formats': formats,
'age_limit': age_limit,
}

View File

@ -1,61 +1,51 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
unified_strdate,
)
class NormalbootsIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
_VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
_TEST = {
u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
u'file': u'home-alone-games-jontron.mp4',
u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
u'info_dict': {
u'title': u'Home Alone Games - JonTron - NormalBoots',
u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
u'uploader': u'JonTron',
u'upload_date': u'20140125',
'url': 'http://normalboots.com/video/home-alone-games-jontron/',
'md5': '8bf6de238915dd501105b44ef5f1e0f6',
'info_dict': {
'id': 'home-alone-games-jontron',
'ext': 'mp4',
'title': 'Home Alone Games - JonTron - NormalBoots',
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for Tense Battle Theme:\xa0http://www.youtube.com/Kiamet/',
'uploader': 'JonTron',
'upload_date': '20140125',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('videoid')
info = {
'id': video_id,
'uploader': None,
'upload_date': None,
}
if url[:4] != 'http':
url = 'http://' + url
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage)
video_description = self._og_search_description(webpage)
video_thumbnail = self._og_search_thumbnail(webpage)
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
webpage, 'uploader')
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date')
video_upload_date = unified_strdate(raw_upload_date)
video_upload_date = unified_strdate(raw_upload_date)
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
player_page = self._download_webpage(player_url, video_id)
video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
info['url'] = video_url
info['title'] = video_title
info['description'] = video_description
info['thumbnail'] = video_thumbnail
info['uploader'] = video_uploader
info['upload_date'] = video_upload_date
return info
video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
return {
'id': video_id,
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': video_uploader,
'upload_date': video_upload_date,
}

View File

@ -0,0 +1,37 @@
# coding: utf-8
from __future__ import unicode_literals
import os.path
import re
from .common import InfoExtractor
class SaveFromIE(InfoExtractor):
IE_NAME = 'savefrom.net'
_VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
_TEST = {
'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
'info_dict': {
'id': 'UlVRAPW2WJY',
'ext': 'mp4',
'title': 'About Team Radical MMA | MMA Fighting',
'upload_date': '20120816',
'uploader': 'Howcast',
'uploader_id': 'Howcast',
'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
},
'params': {
'skip_download': True
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = os.path.splitext(url.split('/')[-1])[0]
return {
'_type': 'url',
'id': video_id,
'url': mobj.group('url'),
}

View File

@ -1,6 +1,5 @@
from __future__ import unicode_literals
import os
import re
from .common import InfoExtractor
@ -8,23 +7,27 @@ from ..utils import (
compat_urllib_parse_urlparse,
compat_urllib_request,
compat_urllib_parse,
unified_strdate,
str_to_int,
int_or_none,
)
from ..aes import (
aes_decrypt_text
)
from ..aes import aes_decrypt_text
class SpankwireIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
_VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
_TEST = {
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
'file': '103545.mp4',
'md5': '1b3f55e345500552dbc252a3e9c1af43',
'md5': '8bbfde12b101204b39e4b9fe7eb67095',
'info_dict': {
"uploader": "oreusz",
"title": "Buckcherry`s X Rated Music Video Crazy Bitch",
"description": "Crazy Bitch X rated music video.",
"age_limit": 18,
'id': '103545',
'ext': 'mp4',
'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
'description': 'Crazy Bitch X rated music video.',
'uploader': 'oreusz',
'uploader_id': '124697',
'upload_date': '20070508',
'age_limit': 18,
}
}
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
description = self._html_search_regex(
r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
thumbnail = self._html_search_regex(
r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
uploader = self._html_search_regex(
r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
uploader_id = self._html_search_regex(
r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
if upload_date:
upload_date = unified_strdate(upload_date)
view_count = self._html_search_regex(
r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
comment_count = int_or_none(self._html_search_regex(
r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
formats = []
for video_url in video_urls:
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
resolution, bitrate_str = format
format = "-".join(format)
height = int(resolution.rstrip('P'))
tbr = int(bitrate_str.rstrip('K'))
height = int(resolution.rstrip('Pp'))
tbr = int(bitrate_str.rstrip('Kk'))
formats.append({
'url': video_url,
'ext': extension,
'resolution': resolution,
'format': format,
'tbr': tbr,
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
return {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'thumbnail': thumbnail,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
'formats': formats,
'age_limit': age_limit,
}

View File

@ -0,0 +1,44 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class TruTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
_TEST = {
'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
'info_dict': {
'id': '14880',
'ext': 'flv',
'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
'thumbnail': 're:^http:.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_title = self._og_search_title(webpage).strip()
thumbnail = self._search_regex(
r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
all_formats = re.finditer(
r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
formats = [{
'format_id': m.group('key'),
'quality': -i,
'url': m.group('url'),
} for i, m in enumerate(all_formats)]
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'formats': formats,
'thumbnail': thumbnail,
}

View File

@ -0,0 +1,80 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import int_or_none
class VideoBamIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
_TESTS = [
{
'url': 'http://videobam.com/OiJQM',
'md5': 'db471f27763a531f10416a0c58b5a1e0',
'info_dict': {
'id': 'OiJQM',
'ext': 'mp4',
'title': 'Is Alcohol Worse Than Ecstasy?',
'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
'uploader': 'frihetsvinge',
},
},
{
'url': 'http://videobam.com/pqLvq',
'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
'note': 'HD video',
'info_dict': {
'id': 'pqLvq',
'ext': 'mp4',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
formats = []
for preference, format_id in enumerate(['low', 'high']):
mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
if not mobj:
continue
formats.append({
'url': mobj.group('url'),
'ext': 'mp4',
'format_id': format_id,
'preference': preference,
})
if not formats:
player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
formats = [{
'url': item['url'],
'ext': 'mp4',
} for item in player_config['playlist'] if 'autoPlay' in item]
self._sort_formats(formats)
title = self._og_search_title(page, default='VideoBam', fatal=False)
description = self._og_search_description(page, default=None)
thumbnail = self._og_search_thumbnail(page)
uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
view_count = int_or_none(
self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'view_count': view_count,
'formats': formats,
'age_limit': 18,
}

View File

@ -1,8 +1,10 @@
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import unified_strdate
class VineIE(InfoExtractor):
@ -13,31 +15,46 @@ class VineIE(InfoExtractor):
'info_dict': {
'id': 'b9KOOWX7HUx',
'ext': 'mp4',
'uploader': 'Jack Dorsey',
'title': 'Chicken.',
'description': 'Chicken.',
'upload_date': '20130519',
'uploader': 'Jack Dorsey',
'uploader_id': '76',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'https://vine.co/v/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
self.report_extraction(video_id)
webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
video_url = self._html_search_meta('twitter:player:stream', webpage,
'video URL')
data = json.loads(self._html_search_regex(
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
webpage, 'uploader', fatal=False, flags=re.DOTALL)
formats = [
{
'url': data['videoLowURL'],
'ext': 'mp4',
'format_id': 'low',
},
{
'url': data['videoUrl'],
'ext': 'mp4',
'format_id': 'standard',
}
]
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': uploader,
}
'description': data['description'],
'thumbnail': data['thumbnailUrl'],
'upload_date': unified_strdate(data['created']),
'uploader': data['username'],
'uploader_id': data['userIdStr'],
'like_count': data['likes']['count'],
'comment_count': data['comments']['count'],
'repost_count': data['reposts']['count'],
'formats': formats,
}

View File

@ -6,14 +6,15 @@ from .common import InfoExtractor
class WimpIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
_VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
_TEST = {
'url': 'http://www.wimp.com/deerfence/',
'file': 'deerfence.flv',
'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
'url': 'http://www.wimp.com/maruexhausted/',
'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
'info_dict': {
"title": "Watch Till End: Herd of deer jump over a fence.",
"description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
'id': 'maruexhausted',
'ext': 'flv',
'title': 'Maru is exhausted.',
'description': 'md5:57e099e857c0a4ea312542b684a869b8',
}
}
@ -30,4 +31,4 @@ class WimpIE(InfoExtractor):
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
}
}

View File

@ -22,8 +22,8 @@ class WorldStarHipHopIE(InfoExtractor):
webpage_src = self._download_webpage(url, video_id)
m_vevo_id = re.search(r'videoId=(.*?)&amp?',
webpage_src)
webpage_src)
if m_vevo_id is not None:
self.to_screen(u'Vevo video detected:')
return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')

View File

@ -4,51 +4,51 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urllib_parse,
ExtractorError,
unified_strdate,
str_to_int,
int_or_none,
parse_duration,
)
class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster"""
_VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
_TESTS = [{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'file': '1509445.mp4',
'md5': '8281348b8d3c53d39fffb377d24eac4e',
'info_dict': {
"upload_date": "20121014",
"uploader_id": "Ruseful2011",
"title": "FemaleAgent Shy beauty takes the bait",
"age_limit": 18,
_VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
_TESTS = [
{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'md5': '8281348b8d3c53d39fffb377d24eac4e',
'info_dict': {
'id': '1509445',
'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait',
'upload_date': '20121014',
'uploader_id': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
}
},
{
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'md5': '4cbd8d56708ecb4fb4124c23e4acb81a',
'info_dict': {
'id': '2221348',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'upload_date': '20130914',
'uploader_id': 'jojo747400',
'duration': 200,
'age_limit': 18,
}
}
},
{
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'file': '2221348.flv',
'md5': 'e767b9475de189320f691f49c679c4c7',
'info_dict': {
"upload_date": "20130914",
"uploader_id": "jojo747400",
"title": "Britney Spears Sexy Booty",
"age_limit": 18,
}
}]
]
def _real_extract(self,url):
def extract_video_url(webpage):
mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
if mobj is None:
raise ExtractorError('Unable to extract media URL')
if len(mobj.group('server')) == 0:
return compat_urllib_parse.unquote(mobj.group('file'))
else:
return mobj.group('server')+'/key='+mobj.group('file')
def extract_mp4_video_url(webpage):
mp4 = re.search(r'<a href=\"(.+?)\" class=\"mp4Play\"',webpage)
mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage)
if mp4 is None:
return None
raise ExtractorError('Unable to extract media URL')
else:
return mp4.group(1)
@ -62,50 +62,48 @@ class XHamsterIE(InfoExtractor):
mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
video_title = self._html_search_regex(
r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
video_description = mobj.group(1) if mobj else None
description = mobj.group(1) if mobj else None
mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
if mobj:
video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
else:
video_upload_date = None
self._downloader.report_warning('Unable to extract upload date')
upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
webpage, 'upload date', fatal=False)
if upload_date:
upload_date = unified_strdate(upload_date)
video_uploader_id = self._html_search_regex(
r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
webpage, 'uploader id', default='anonymous')
video_thumbnail = self._search_regex(
r'\'image\':\'(?P<thumbnail>[^\']+)\'',
webpage, 'thumbnail', fatal=False)
thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
webpage, 'duration', fatal=False))
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
if view_count:
view_count = str_to_int(view_count)
mobj = re.search(r"hint='(?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes'", webpage)
(like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
comment_count = mobj.group('commentcount') if mobj else 0
age_limit = self._rta_search(webpage)
hd = is_hd(webpage)
video_url = extract_video_url(webpage)
formats = [{
'url': video_url,
'format_id': 'hd' if hd else 'sd',
'preference': 0,
'preference': 1,
}]
video_mp4_url = extract_mp4_video_url(webpage)
if video_mp4_url is not None:
formats.append({
'url': video_mp4_url,
'ext': 'mp4',
'format_id': 'mp4-hd' if hd else 'mp4-sd',
'preference': 1,
})
if not hd:
webpage = self._download_webpage(
mrss_url + '?hd', video_id, note='Downloading HD webpage')
webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage')
if is_hd(webpage):
video_url = extract_video_url(webpage)
formats.append({
@ -118,11 +116,16 @@ class XHamsterIE(InfoExtractor):
return {
'id': video_id,
'title': video_title,
'formats': formats,
'description': video_description,
'upload_date': video_upload_date,
'uploader_id': video_uploader_id,
'thumbnail': video_thumbnail,
'title': title,
'description': description,
'upload_date': upload_date,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'like_count': int_or_none(like_count),
'dislike_count': int_or_none(dislike_count),
'comment_count': int_or_none(comment_count),
'age_limit': age_limit,
'formats': formats,
}

View File

@ -29,7 +29,6 @@ from ..utils import (
ExtractorError,
int_or_none,
PagedList,
RegexNotFoundError,
unescapeHTML,
unified_strdate,
orderedSet,
@ -297,6 +296,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"format": "141",
},
},
# DASH manifest with encrypted signature
{
u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
u'info_dict': {
u'id': u'IB3lcPjvWLA',
u'ext': u'm4a',
u'title': u'Afrojack - The Spark ft. Spree Wilson',
u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
u'uploader': u'AfrojackVEVO',
u'uploader_id': u'AfrojackVEVO',
u'upload_date': u'20131011',
},
u"params": {
u'youtube_include_dash_manifest': True,
u'format': '141',
},
},
]
@ -1272,8 +1288,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
if not mobj:
raise ValueError('Could not find vevo ID')
info = json.loads(mobj.group(1))
args = info['args']
ytplayer_config = json.loads(mobj.group(1))
args = ytplayer_config['args']
# Easy way to know if the 's' value is in url_encoded_fmt_stream_map
# this signatures are encrypted
if 'url_encoded_fmt_stream_map' not in args:
@ -1366,12 +1382,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# Look for the DASH manifest
dash_manifest_url_lst = video_info.get('dashmpd')
if (dash_manifest_url_lst and dash_manifest_url_lst[0] and
self._downloader.params.get('youtube_include_dash_manifest', False)):
if (self._downloader.params.get('youtube_include_dash_manifest', False)):
try:
# The DASH manifest used needs to be the one from the original video_webpage.
# The one found in get_video_info seems to be using different signatures.
# However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
# Luckily, it seems, this case uses some kind of default signature (len == 86), so the
# combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
if age_gate:
dash_manifest_url = video_info.get('dashmpd')[0]
else:
dash_manifest_url = ytplayer_config['args']['dashmpd']
def decrypt_sig(mobj):
s = mobj.group(1)
dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
return '/signature/%s' % dec_s
dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
dash_doc = self._download_xml(
dash_manifest_url_lst[0], video_id,
dash_manifest_url, video_id,
note=u'Downloading DASH manifest',
errnote=u'Could not download DASH manifest')
for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
@ -1443,9 +1471,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
_VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist'
def _real_initialize(self):
@ -1460,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
get_element_by_attribute('class', 'title ', webpage))
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
title_span = (search_title('playlist-title') or
search_title('title long-title') or search_title('title'))
title = clean_html(title_span)
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
ids = orderedSet(re.findall(video_re, webpage))
video_re = r'''(?x)data-video-username="(.*?)".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
# Some of the videos may have been deleted, their username field is empty
ids = [video_id for (username, video_id) in matches if username]
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)
@ -1493,29 +1525,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
raise ExtractorError(u'For downloading YouTube.com top lists, use '
u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
# Extract the video ids from the playlist pages
ids = []
for page_num in itertools.count(1):
url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
matches = re.finditer(self._VIDEO_RE, page)
matches = re.finditer(self._VIDEO_RE, content_html)
# We remove the duplicates and the link with index 0
# (it's not the first video of the playlist)
new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
ids.extend(new_ids)
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
try:
playlist_title = self._og_search_title(page)
except RegexNotFoundError:
self.report_warning(
u'Playlist page is missing OpenGraph title, falling back ...',
playlist_id)
playlist_title = self._html_search_regex(
r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
more = self._download_json(
'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex(
r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)

View File

@ -1,4 +1,5 @@
# coding: utf-8
from __future__ import unicode_literals
import re
@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor):
_VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
_TEST = {
u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
u"file": u"2037704.webm",
u"info_dict": {
u"upload_date": u"20131127",
u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
u"uploader": u"spezial",
u"title": u"ZDFspezial - Ende des Machtpokers"
'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
'info_dict': {
'id': '2037704',
'ext': 'webm',
'title': 'ZDFspezial - Ende des Machtpokers',
'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
'duration': 1022,
'uploader': 'spezial',
'uploader_id': '225948',
'upload_date': '20131127',
},
u"skip": u"Videos on ZDF.de are depublicised in short order",
'skip': 'Videos on ZDF.de are depublicised in short order',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
doc = self._download_xml(
xml_url, video_id,
note=u'Downloading video info',
errnote=u'Failed to download video info')
note='Downloading video info',
errnote='Failed to download video info')
title = doc.find('.//information/title').text
description = doc.find('.//information/detail').text
duration = int(doc.find('.//details/lengthSec').text)
uploader_node = doc.find('.//details/originChannelTitle')
uploader = None if uploader_node is None else uploader_node.text
duration_str = doc.find('.//details/length').text
duration_m = re.match(r'''(?x)^
(?P<hours>[0-9]{2})
:(?P<minutes>[0-9]{2})
:(?P<seconds>[0-9]{2})
(?:\.(?P<ms>[0-9]+)?)
''', duration_str)
duration = (
(
(int(duration_m.group('hours')) * 60 * 60) +
(int(duration_m.group('minutes')) * 60) +
int(duration_m.group('seconds'))
)
if duration_m
else None
)
uploader_id_node = doc.find('.//details/originChannelId')
uploader_id = None if uploader_id_node is None else uploader_id_node.text
upload_date = unified_strdate(doc.find('.//details/airtime').text)
def xml_to_format(fnode):
video_url = fnode.find('url').text
is_available = u'http://www.metafilegenerator' not in video_url
is_available = 'http://www.metafilegenerator' not in video_url
format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor):
quality = fnode.find('./quality').text
abr = int(fnode.find('./audioBitrate').text) // 1000
vbr = int(fnode.find('./videoBitrate').text) // 1000
vbr_node = fnode.find('./videoBitrate')
vbr = None if vbr_node is None else int(vbr_node.text) // 1000
format_note = u''
width_node = fnode.find('./width')
width = None if width_node is None else int_or_none(width_node.text)
height_node = fnode.find('./height')
height = None if height_node is None else int_or_none(height_node.text)
format_note = ''
if not format_note:
format_note = None
return {
'format_id': format_id + u'-' + quality,
'format_id': format_id + '-' + quality,
'url': video_url,
'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'width': int_or_none(fnode.find('./width').text),
'height': int_or_none(fnode.find('./height').text),
'width': width,
'height': height,
'filesize': int_or_none(fnode.find('./filesize').text),
'format_note': format_note,
'protocol': proto,
@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor):
return {
'id': video_id,
'title': title,
'formats': formats,
'description': description,
'uploader': uploader,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'upload_date': upload_date,
}
'formats': formats,
}

View File

@ -174,6 +174,11 @@ try:
except NameError:
compat_chr = chr
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
def compat_ord(c):
if type(c) is int: return c
else: return ord(c)
@ -774,6 +779,7 @@ def unified_strdate(date_str):
'%Y-%m-%dT%H:%M:%S.%fZ',
'%Y-%m-%dT%H:%M:%S.%f0Z',
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%dT%H:%M:%S.%f',
'%Y-%m-%dT%H:%M',
]
for expression in format_expressions:

View File

@ -1,2 +1,2 @@
__version__ = '2014.02.19'
__version__ = '2014.02.24'