Compare commits

...

44 Commits

Author SHA1 Message Date
Philipp Hagemeister
650d688d10 release 2014.04.07 2014-04-07 13:11:37 +02:00
Philipp Hagemeister
0ba77818f3 [ted] Add width and height (Fixes #2716) 2014-04-07 13:11:30 +02:00
Sergey M․
09baa7da7e [rts] Update test 2014-04-07 00:34:23 +07:00
Sergey M․
85e787f51d [cbsnews] Add support for cbsnews.com (Closes #2691) 2014-04-06 06:03:58 +07:00
Philipp Hagemeister
2a9e1e453a Merge branch 'master' of github.com:rg3/youtube-dl 2014-04-05 20:05:47 +02:00
Philipp Hagemeister
ee1e199685 [justin.tv] Modernize (Fixes #2705) 2014-04-05 17:56:36 +02:00
Sergey M․
17c5a00774 [novamov] Simplify 2014-04-05 19:36:22 +07:00
Sergey M․
15c0e8e7b2 [generic] Generalize novamov based embeds 2014-04-05 17:20:05 +07:00
Sergey M․
cca37fba48 [divxstage] Fix typo in IE_NAME 2014-04-05 17:15:43 +07:00
Sergey M․
9d0993ec4a [movshare] Support more domains 2014-04-05 17:00:18 +07:00
Sergey M․
342f33bf9e [divxstage] Support more domains 2014-04-05 16:50:05 +07:00
Sergey M․
7cd3bc5f99 [nowvideo] Support more domains 2014-04-05 16:38:57 +07:00
Sergey M․
931055e6cb [videoweed] Revert _FILE_DELETED_REGEX 2014-04-05 16:32:14 +07:00
Sergey M․
d0e4cf82f1 [movshare] Add _FILE_DELETED_REGEX 2014-04-05 16:31:38 +07:00
Sergey M․
6f88df2c57 [divxstage] Add support for divxstage.eu 2014-04-05 16:29:44 +07:00
Sergey M․
4479bf2762 [videoweed] Simplify 2014-04-05 16:09:28 +07:00
Sergey M․
1ff7c0f7d8 [movshare] Add support for movshare.net 2014-04-05 16:09:03 +07:00
Sergey M․
610e47c87e Credit @sainyamkapoor for videoweed extractor 2014-04-05 15:53:50 +07:00
Sergey M․
50f566076f [generic] Add support for videoweed embeds 2014-04-05 15:49:45 +07:00
Sergey M․
92810ff497 [nowvideo] Improve _VALID_URL 2014-04-05 15:35:21 +07:00
Sergey M․
60ccc59a1c [novamov] Improve _VALID_URL 2014-04-05 15:34:54 +07:00
Sergey M․
91745595d3 [videoweed] Simplify 2014-04-05 15:32:55 +07:00
Sainyam Kapoor
d6e40507d0 [videoweed]Cleanup 2014-04-05 10:53:22 +05:30
Sainyam Kapoor
deed48b472 [Videoweed] Added support for videoweed. 2014-04-05 10:40:03 +05:30
Philipp Hagemeister
e4d41bfca5 Merge pull request #2696 from anovicecodemonkey/support-ustream-embeds
[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)
2014-04-04 23:33:08 +02:00
Philipp Hagemeister
a355b70f27 [cspan] Do not test number of playlist entries
Apparently, CSpan switches between single-file and multiple-file results. Either one is fine as long as we get the full four hours.
2014-04-04 23:16:22 +02:00
Philipp Hagemeister
f8514f6186 [rts] Use visible id in file names
Maybe the internal ID is more precise, but it's totally confusing, and the obvious ID still allows a google search.
2014-04-04 23:13:55 +02:00
Philipp Hagemeister
e09b8fcd9d [ro220] Make test case more flexible
Either one or two spaces is fine here.
2014-04-04 23:08:33 +02:00
Philipp Hagemeister
7d1b527ff9 [motorsport] Fix on Python 3 2014-04-04 23:06:27 +02:00
Philipp Hagemeister
f943c7b622 release 2014.04.04.7 2014-04-04 23:01:45 +02:00
Philipp Hagemeister
676eb3f2dd Fix unicode_escape (Fixes #2695) 2014-04-04 23:00:51 +02:00
Philipp Hagemeister
98b7cf1ace release 2014.04.04.6 2014-04-04 22:48:35 +02:00
Philipp Hagemeister
c465afd736 [teamcoco] Fix regex in 2.6 (#2700)
The re engine does not want to repeat an empty string, for fear that something like

    (.*)*

could be matching the tokens ...

    ""
    "" ""
    "" "" ""

Of course, that's harmless with a question mark, although still somewhat strange.
2014-04-04 22:46:47 +02:00
Philipp Hagemeister
b84d6e7fc4 Merge remote-tracking branch 'AGSPhoenix/teamcoco-fix' 2014-04-04 22:44:49 +02:00
Philipp Hagemeister
2efd5d78c1 release 2014.04.04.5 2014-04-04 22:24:45 +02:00
Philipp Hagemeister
c8edf47b3a [yahoo] Support https and -uploader URLs (Fixes #2701) 2014-04-04 22:23:59 +02:00
Philipp Hagemeister
3b4c26a428 [pornhd] Avoid shadowing variable url 2014-04-04 22:22:30 +02:00
Philipp Hagemeister
1525148114 Remove unused imports 2014-04-04 22:22:11 +02:00
AGSPhoenix
fa387d2d99 Revert "Workaround for regex engine limitation"
This reverts commit 6d0d573eca.
2014-04-04 15:37:49 -04:00
AGSPhoenix
6d0d573eca Workaround for regex engine limitation 2014-04-04 15:25:28 -04:00
AGSPhoenix
bb799e811b Add a test for the new URL pages
Add a test for the pages with the video_id in the URL.
2014-04-04 13:52:35 -04:00
AGSPhoenix
04ee53eca1 Support TeamCoco URLs with video_id in the title
If the URL has the video_id in it, use that since the current method of
finding the id breaks on those pages.

Fixes 2698.
2014-04-04 13:42:34 -04:00
anovicecodemonkey
ca6aada48e Fix _TEST for Ustream embed URLs 2014-04-05 03:26:29 +10:30
anovicecodemonkey
5c38625259 [UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694) 2014-04-05 00:53:09 +10:30
25 changed files with 316 additions and 95 deletions

View File

@@ -157,5 +157,11 @@ class TestAllURLsMatching(unittest.TestCase):
'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
['ComedyCentralShows'])
def test_yahoo_https(self):
# https://github.com/rg3/youtube-dl/issues/2701
self.assertMatch(
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
['Yahoo'])
if __name__ == '__main__':
unittest.main()

View File

@@ -324,7 +324,6 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '342759')
self.assertEqual(
result['title'], 'General Motors Ignition Switch Recall')
self.assertEqual(len(result['entries']), 9)
whole_duration = sum(e['duration'] for e in result['entries'])
self.assertEqual(whole_duration, 14855)

View File

@@ -38,6 +38,7 @@ from youtube_dl.utils import (
xpath_with_ns,
parse_iso8601,
strip_jsonp,
uppercase_escape,
)
if sys.version_info < (3, 0):
@@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}])
def test_uppercase_escpae(self):
self.assertEqual(uppercase_escape(u''), u'')
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
if __name__ == '__main__':
unittest.main()

View File

@@ -52,6 +52,7 @@ __authors__ = (
'Juan C. Olivares',
'Mattias Harrysson',
'phaer',
'Sainyam Kapoor',
)
__license__ = 'Public Domain'

View File

@@ -32,6 +32,7 @@ from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE
@@ -62,6 +63,7 @@ from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .dropbox import DropboxIE
from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE
@@ -156,6 +158,7 @@ from .mofosex import MofosexIE
from .mooshare import MooshareIE
from .morningstar import MorningstarIE
from .motorsport import MotorsportIE
from .movshare import MovShareIE
from .mtv import (
MTVIE,
MTVIggyIE,
@@ -276,6 +279,7 @@ from .videodetective import VideoDetectiveIE
from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .videoweed import VideoWeedIE
from .vimeo import (
VimeoIE,
VimeoChannelIE,

View File

@@ -0,0 +1,87 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
class CBSNewsIE(InfoExtractor):
IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
_TESTS = [
{
'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
'info_dict': {
'id': 'tesla-and-spacex-elon-musks-industrial-empire',
'ext': 'flv',
'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
'duration': 791,
},
'params': {
# rtmp download
'skip_download': True,
},
},
{
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
'info_dict': {
'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
'ext': 'flv',
'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
'duration': 205,
},
'params': {
# rtmp download
'skip_download': True,
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_info = json.loads(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
webpage, 'video JSON info'))
item = video_info['item'] if 'item' in video_info else video_info
title = item.get('articleTitle') or item.get('hed')
duration = item.get('duration')
thumbnail = item.get('mediaImage') or item.get('thumbnail')
formats = []
for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
uri = item.get('media' + format_id + 'URI')
if not uri:
continue
fmt = {
'url': uri,
'format_id': format_id,
}
if uri.startswith('rtmp'):
fmt.update({
'app': 'ondemand?auth=cbs',
'play_path': 'mp4:' + uri.split('<break>')[-1],
'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
'page_url': 'http://www.cbsnews.com',
'ext': 'flv',
})
elif uri.endswith('.m3u8'):
fmt['ext'] = 'mp4'
formats.append(fmt)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}

View File

@@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_urllib_request,
compat_str,
get_element_by_attribute,
get_element_by_id,
orderedSet,
str_to_int,

View File

@@ -0,0 +1,27 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class DivxStageIE(NovaMovIE):
IE_NAME = 'divxstage'
IE_DESC = 'DivxStage'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
_HOST = 'www.divxstage.eu'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
_DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
_TEST = {
'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
'md5': '63969f6eb26533a1968c4d325be63e72',
'info_dict': {
'id': '57f238e2e5e01',
'ext': 'flv',
'title': 'youtubedl test video',
'description': 'This is a test video for youtubedl.',
}
}

View File

@@ -184,6 +184,17 @@ class GenericIE(InfoExtractor):
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
}
},
# Embeded Ustream video
{
'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
'md5': '27b99cdb639c9b12a79bca876a073417',
'info_dict': {
'id': '45734260',
'ext': 'flv',
'uploader': 'AU SPA: The NSA and Privacy',
'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
}
},
# nowvideo embed hidden behind percent encoding
{
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -500,17 +511,18 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
# Look for embedded NovaMov player
# Look for embedded NovaMov-based player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
r'''(?x)<iframe[^>]+?src=(["\'])
(?P<url>http://(?:(?:embed|www)\.)?
(?:novamov\.com|
nowvideo\.(?:ch|sx|eu|at|ag|co)|
videoweed\.(?:es|com)|
movshare\.(?:net|sx|ag)|
divxstage\.(?:eu|net|ch|co|at|ag))
/embed\.php.+?)\1''', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'NovaMov')
# Look for embedded NowVideo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'NowVideo')
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
mobj = re.search(
@@ -556,6 +568,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
# Look for embedded Ustream videos
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Ustream')
# Look for embedded arte.tv player
mobj = re.search(
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',

View File

@@ -1,9 +1,12 @@
from __future__ import unicode_literals
import json
import os
import re
from .common import InfoExtractor
from ..utils import (
compat_str,
ExtractorError,
formatSeconds,
)
@@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
/?(?:\#.*)?$
"""
_JUSTIN_PAGE_LIMIT = 100
IE_NAME = u'justin.tv'
IE_NAME = 'justin.tv'
IE_DESC = 'justin.tv and twitch.tv'
_TEST = {
u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
u'file': u'296128360.flv',
u'md5': u'ecaa8a790c22a40770901460af191c9a',
u'info_dict': {
u"upload_date": u"20110927",
u"uploader_id": 25114803,
u"uploader": u"thegamedevhub",
u"title": u"Beginner Series - Scripting With Python Pt.1"
'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
'md5': 'ecaa8a790c22a40770901460af191c9a',
'info_dict': {
'id': '296128360',
'ext': 'flv',
'upload_date': '20110927',
'uploader_id': 25114803,
'uploader': 'thegamedevhub',
'title': 'Beginner Series - Scripting With Python Pt.1'
}
}
def report_download_page(self, channel, offset):
"""Report attempt to download a single page of videos."""
self.to_screen(u'%s: Downloading video information from %d to %d' %
(channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
# Return count of items, list of *valid* items
def _parse_page(self, url, video_id):
info_json = self._download_webpage(url, video_id,
u'Downloading video info JSON',
u'unable to download video info JSON')
'Downloading video info JSON',
'unable to download video info JSON')
response = json.loads(info_json)
if type(response) != list:
error_text = response.get('error', 'unknown error')
raise ExtractorError(u'Justin.tv API: %s' % error_text)
raise ExtractorError('Justin.tv API: %s' % error_text)
info = []
for clip in response:
video_url = clip['video_file_url']
@@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
video_id = clip['id']
video_title = clip.get('title', video_id)
info.append({
'id': video_id,
'id': compat_str(video_id),
'url': video_url,
'title': video_title,
'uploader': clip.get('channel_name', video_uploader_id),
@@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'invalid URL: %s' % url)
api_base = 'http://api.justin.tv'
paged = False
@@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
webpage = self._download_webpage(url, chapter_id)
m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
if not m:
raise ExtractorError(u'Cannot find archive of a chapter')
raise ExtractorError('Cannot find archive of a chapter')
archive_id = m.group(1)
api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
doc = self._download_xml(api, chapter_id,
note=u'Downloading chapter information',
errnote=u'Chapter information download failed')
doc = self._download_xml(
api, chapter_id,
note='Downloading chapter information',
errnote='Chapter information download failed')
for a in doc.findall('.//archive'):
if archive_id == a.find('./id').text:
break
else:
raise ExtractorError(u'Could not find chapter in chapter information')
raise ExtractorError('Could not find chapter in chapter information')
video_url = a.find('./video_file_url').text
video_ext = video_url.rpartition('.')[2] or u'flv'
video_ext = video_url.rpartition('.')[2] or 'flv'
chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
note='Downloading chapter metadata',
errnote='Download of chapter metadata failed')
chapter_info = json.loads(chapter_info_json)
chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
chapter_info = self._download_json(
chapter_api_url, 'c' + chapter_id,
note='Downloading chapter metadata',
errnote='Download of chapter metadata failed')
bracket_start = int(doc.find('.//bracket_start').text)
bracket_end = int(doc.find('.//bracket_end').text)
# TODO determine start (and probably fix up file)
# youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
#video_url += u'?start=' + TODO:start_timestamp
#video_url += '?start=' + TODO:start_timestamp
# bracket_start is 13290, but we want 51670615
self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
info = {
'id': u'c' + chapter_id,
'id': 'c' + chapter_id,
'url': video_url,
'ext': video_ext,
'title': chapter_info['title'],
@@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
'uploader': chapter_info['channel']['display_name'],
'uploader_id': chapter_info['channel']['name'],
}
return [info]
return info
else:
video_id = mobj.group('videoid')
api = api_base + '/broadcast/by_archive/%s.json' % video_id
self.report_extraction(video_id)
info = []
entries = []
offset = 0
limit = self._JUSTIN_PAGE_LIMIT
while True:
@@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
self.report_download_page(video_id, offset)
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
page_count, page_info = self._parse_page(page_url, video_id)
info.extend(page_info)
entries.extend(page_info)
if not paged or page_count != limit:
break
offset += limit
return info
return {
'_type': 'playlist',
'id': video_id,
'entries': entries,
}

View File

@@ -1,17 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import hashlib
import json
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_parse_qs,
compat_str,
int_or_none,
)
class MorningstarIE(InfoExtractor):

View File

@@ -44,7 +44,7 @@ class MotorsportIE(InfoExtractor):
e = compat_str(int(time.time()) + 24 * 60 * 60)
base_video_url = params['location'] + '?e=' + e
s = 'h3hg713fh32'
h = hashlib.md5(s + base_video_url).hexdigest()
h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
video_url = base_video_url + '&h=' + h
uploader = self._html_search_regex(

View File

@@ -0,0 +1,27 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class MovShareIE(NovaMovIE):
IE_NAME = 'movshare'
IE_DESC = 'MovShare'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
_HOST = 'www.movshare.net'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
_DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
_TEST = {
'url': 'http://www.movshare.net/video/559e28be54d96',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': '559e28be54d96',
'ext': 'flv',
'title': 'dissapeared image',
'description': 'optical illusion dissapeared image magic illusion',
}
}

View File

@@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
IE_NAME = 'novamov'
IE_DESC = 'NovaMov'
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
_VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
_VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}
_HOST = 'www.novamov.com'
@@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
video_id = mobj.group('id')
page = self._download_webpage(
'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')
if re.search(self._FILE_DELETED_REGEX, page) is not None:
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')
title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)
api_response = self._download_webpage(

View File

@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
IE_NAME = 'nowvideo'
IE_DESC = 'NowVideo'
_VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}
_HOST = 'www.nowvideo.ch'

View File

@@ -39,11 +39,11 @@ class PornHdIE(InfoExtractor):
formats = [
{
'url': url,
'url': format_url,
'ext': format.lower(),
'format_id': '%s-%s' % (format.lower(), quality.lower()),
'quality': 1 if quality.lower() == 'high' else 0,
} for format, quality, url in re.findall(
} for format, quality, format_url in re.findall(
r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
]

View File

@@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
'md5': '03af18b73a07b4088753930db7a34add',
'info_dict': {
"title": "Luati-le Banii sez 4 ep 1",
"description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
"description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
}
}

View File

@@ -35,13 +35,13 @@ class RTSIE(InfoExtractor):
},
{
'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
'md5': 'c197f0b2421995c63a64cc73d800f42e',
'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
'info_dict': {
'id': '5738317',
'id': '5624067',
'ext': 'mp4',
'duration': 55,
'title': 'Bande de lancement de Passe-moi les jumelles',
'description': '',
'duration': 3720,
'title': 'Les yeux dans les cieux - Mon homard au Canada',
'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
'uploader': 'Passe-moi les jumelles',
'upload_date': '20140404',
'timestamp': 1396635300,
@@ -98,17 +98,20 @@ class RTSIE(InfoExtractor):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
def download_json(video_id):
def download_json(internal_id):
return self._download_json(
'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
video_id)
all_info = download_json(video_id)
# video_id extracted out of URL is not always a real id
if 'video' not in all_info and 'audio' not in all_info:
page = self._download_webpage(url, video_id)
video_id = self._html_search_regex(r'<(?:video|audio) data-id="(\d+)"', page, 'video id')
all_info = download_json(video_id)
internal_id = self._html_search_regex(
r'<(?:video|audio) data-id="([0-9]+)"', page,
'internal video id')
all_info = download_json(internal_id)
info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']

View File

@@ -9,8 +9,18 @@ from ..utils import (
class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
_TEST = {
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<url_title>.*)'
_TESTS = [
{
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
'file': '80187.mp4',
'md5': '3f7746aa0dc86de18df7539903d399ea',
'info_dict': {
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
}
},
{
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
'file': '19705.mp4',
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
@@ -19,6 +29,7 @@ class TeamcocoIE(InfoExtractor):
"title": "Louis C.K. Interview Pt. 1 11/3/11"
}
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -26,11 +37,13 @@ class TeamcocoIE(InfoExtractor):
raise ExtractorError('Invalid URL: %s' % url)
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
video_id = self._html_search_regex(
r'<article class="video" data-id="(\d+?)"',
webpage, 'video id')
video_id = mobj.group("video_id")
if video_id == '':
video_id = self._html_search_regex(
r'<article class="video" data-id="(\d+?)"',
webpage, 'video id')
self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id

View File

@@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor):
'consciousness, but that half the time our brains are '
'actively fooling us.'),
'uploader': 'Dan Dennett',
'width': 854,
}
}, {
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -50,10 +51,10 @@ class TEDIE(SubtitlesInfoExtractor):
}
}]
_FORMATS_PREFERENCE = {
'low': 1,
'medium': 2,
'high': 3,
_NATIVE_FORMATS = {
'low': {'preference': 1, 'width': 320, 'height': 180},
'medium': {'preference': 2, 'width': 512, 'height': 288},
'high': {'preference': 3, 'width': 854, 'height': 480},
}
def _extract_info(self, webpage):
@@ -98,12 +99,14 @@ class TEDIE(SubtitlesInfoExtractor):
talk_info = self._extract_info(webpage)['talks'][0]
formats = [{
'ext': 'mp4',
'url': format_url,
'format_id': format_id,
'format': format_id,
'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
} for (format_id, format_url) in talk_info['nativeDownloads'].items()]
for f in formats:
finfo = self._NATIVE_FORMATS.get(f['format_id'])
if finfo:
f.update(finfo)
self._sort_formats(formats)
video_id = compat_str(talk_info['id'])

View File

@@ -11,7 +11,7 @@ from ..utils import (
class UstreamIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
_VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
IE_NAME = 'ustream'
_TEST = {
'url': 'http://www.ustream.tv/recorded/20274954',
@@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
if m.group('type') == 'embed':
video_id = m.group('videoID')
webpage = self._download_webpage(url, video_id)
desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
return self.url_result(desktop_url, 'Ustream')
video_id = m.group('videoID')
video_url = 'http://tcdn.ustream.tv/video/%s' % video_id

View File

@@ -0,0 +1,26 @@
from __future__ import unicode_literals
from .novamov import NovaMovIE
class VideoWeedIE(NovaMovIE):
IE_NAME = 'videoweed'
IE_DESC = 'VideoWeed'
_VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
_HOST = 'www.videoweed.es'
_FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
_TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
_TEST = {
'url': 'http://www.videoweed.es/file/b42178afbea14',
'md5': 'abd31a2132947262c50429e1d16c1bfd',
'info_dict': {
'id': 'b42178afbea14',
'ext': 'flv',
'title': 'optical illusion dissapeared image magic illusion',
'description': ''
},
}

View File

@@ -1,6 +1,7 @@
from __future__ import unicode_literals
import itertools
import json
import re
from .common import InfoExtractor, SearchInfoExtractor
@@ -14,7 +15,7 @@ from ..utils import (
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',

View File

@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
import calendar
import codecs
import contextlib
import ctypes
import datetime
@@ -1263,9 +1264,11 @@ class PagedList(object):
def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
r'\\U[0-9a-fA-F]{8}',
lambda m: m.group(0).decode('unicode-escape'), s)
lambda m: unicode_escape(m.group(0))[0],
s)
try:
struct.pack(u'!I', 0)

View File

@@ -1,2 +1,2 @@
__version__ = '2014.04.04.4'
__version__ = '2014.04.07'