release 2014.04.07

[ted] Add width and height (Fixes #2716 )
[rts] Update test
2014-04-07 13:11:37 +02:00 · 2014-04-07 13:11:30 +02:00 · 2014-04-07 00:34:23 +07:00 · 2014-04-06 06:03:58 +07:00 · 2014-04-05 20:05:47 +02:00 · 2014-04-05 17:56:36 +02:00
25 changed files with 316 additions and 95 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -157,5 +157,11 @@ class TestAllURLsMatching(unittest.TestCase):
            'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
            ['ComedyCentralShows'])

+    def test_yahoo_https(self):
+        # https://github.com/rg3/youtube-dl/issues/2701
+        self.assertMatch(
+            'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
+            ['Yahoo'])
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -324,7 +324,6 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['id'], '342759')
        self.assertEqual(
            result['title'], 'General Motors Ignition Switch Recall')
-        self.assertEqual(len(result['entries']), 9)
        whole_duration = sum(e['duration'] for e in result['entries'])
        self.assertEqual(whole_duration, 14855)

--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -38,6 +38,7 @@ from youtube_dl.utils import (
    xpath_with_ns,
    parse_iso8601,
    strip_jsonp,
+    uppercase_escape,
 )

 if sys.version_info < (3, 0):
@@ -279,6 +280,9 @@ class TestUtil(unittest.TestCase):
        d = json.loads(stripped)
        self.assertEqual(d, [{"id": "532cb", "x": 3}])

+    def test_uppercase_escpae(self):
+        self.assertEqual(uppercase_escape(u'aä'), u'aä')
+        self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -52,6 +52,7 @@ __authors__  = (
    'Juan C. Olivares',
    'Mattias Harrysson',
    'phaer',
+    'Sainyam Kapoor',
 )

 __license__ = 'Public Domain'
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -32,6 +32,7 @@ from .canal13cl import Canal13clIE
 from .canalplus import CanalplusIE
 from .canalc2 import Canalc2IE
 from .cbs import CBSIE
+from .cbsnews import CBSNewsIE
 from .ceskatelevize import CeskaTelevizeIE
 from .channel9 import Channel9IE
 from .chilloutzone import ChilloutzoneIE
@@ -62,6 +63,7 @@ from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .defense import DefenseGouvFrIE
 from .discovery import DiscoveryIE
+from .divxstage import DivxStageIE
 from .dropbox import DropboxIE
 from .ebaumsworld import EbaumsWorldIE
 from .ehow import EHowIE
@@ -156,6 +158,7 @@ from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .morningstar import MorningstarIE
 from .motorsport import MotorsportIE
+from .movshare import MovShareIE
 from .mtv import (
    MTVIE,
    MTVIggyIE,
@@ -276,6 +279,7 @@ from .videodetective import VideoDetectiveIE
 from .videolecturesnet import VideoLecturesNetIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
+from .videoweed import VideoWeedIE
 from .vimeo import (
    VimeoIE,
    VimeoChannelIE,
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -0,0 +1,87 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class CBSNewsIE(InfoExtractor):
+    IE_DESC = 'CBS News'
+    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
+
+    _TESTS = [
+        {
+            'url': 'http://www.cbsnews.com/news/tesla-and-spacex-elon-musks-industrial-empire/',
+            'info_dict': {
+                'id': 'tesla-and-spacex-elon-musks-industrial-empire',
+                'ext': 'flv',
+                'title': 'Tesla and SpaceX: Elon Musk\'s industrial empire',
+                'thumbnail': 'http://beta.img.cbsnews.com/i/2014/03/30/60147937-2f53-4565-ad64-1bdd6eb64679/60-0330-pelley-640x360.jpg',
+                'duration': 791,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+        {
+            'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
+            'info_dict': {
+                'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack',
+                'ext': 'flv',
+                'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+                'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg',
+                'duration': 205,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_info = json.loads(self._html_search_regex(
+            r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
+            webpage, 'video JSON info'))
+
+        item = video_info['item'] if 'item' in video_info else video_info
+        title = item.get('articleTitle') or item.get('hed')
+        duration = item.get('duration')
+        thumbnail = item.get('mediaImage') or item.get('thumbnail')
+
+        formats = []
+        for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
+            uri = item.get('media' + format_id + 'URI')
+            if not uri:
+                continue
+            fmt = {
+                'url': uri,
+                'format_id': format_id,
+            }
+            if uri.startswith('rtmp'):
+                fmt.update({
+                    'app': 'ondemand?auth=cbs',
+                    'play_path': 'mp4:' + uri.split('<break>')[-1],
+                    'player_url': 'http://www.cbsnews.com/[[IMPORT]]/vidtech.cbsinteractive.com/player/3_3_0/CBSI_PLAYER_HD.swf',
+                    'page_url': 'http://www.cbsnews.com',
+                    'ext': 'flv',
+                })
+            elif uri.endswith('.m3u8'):
+                fmt['ext'] = 'mp4'
+            formats.append(fmt)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_urllib_request,
    compat_str,
-    get_element_by_attribute,
    get_element_by_id,
    orderedSet,
    str_to_int,
--- a/youtube_dl/extractor/divxstage.py
+++ b/youtube_dl/extractor/divxstage.py
@@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class DivxStageIE(NovaMovIE):
+    IE_NAME = 'divxstage'
+    IE_DESC = 'DivxStage'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'}
+
+    _HOST = 'www.divxstage.eu'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<div class="video_det">\s*<strong>([^<]+)</strong>'
+    _DESCRIPTION_REGEX = r'<div class="video_det">\s*<strong>[^<]+</strong>\s*<p>([^<]+)</p>'
+
+    _TEST = {
+        'url': 'http://www.divxstage.eu/video/57f238e2e5e01',
+        'md5': '63969f6eb26533a1968c4d325be63e72',
+        'info_dict': {
+            'id': '57f238e2e5e01',
+            'ext': 'flv',
+            'title': 'youtubedl test video',
+            'description': 'This is a test video for youtubedl.',
+        }
+    }
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -184,6 +184,17 @@ class GenericIE(InfoExtractor):
                'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
            }
        },
+        # Embeded Ustream video
+        {
+            'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
+            'md5': '27b99cdb639c9b12a79bca876a073417',
+            'info_dict': {
+                'id': '45734260',
+                'ext': 'flv',
+                'uploader': 'AU SPA:  The NSA and Privacy',
+                'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
+            }
+        },
        # nowvideo embed hidden behind percent encoding
        {
            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -500,17 +511,18 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group(1), 'Mpora')

-        # Look for embedded NovaMov player
+        # Look for embedded NovaMov-based player
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
+            r'''(?x)<iframe[^>]+?src=(["\'])
+                    (?P<url>http://(?:(?:embed|www)\.)?
+                        (?:novamov\.com|
+                           nowvideo\.(?:ch|sx|eu|at|ag|co)|
+                           videoweed\.(?:es|com)|
+                           movshare\.(?:net|sx|ag)|
+                           divxstage\.(?:eu|net|ch|co|at|ag))
+                        /embed\.php.+?)\1''', webpage)
        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'NovaMov')
-
-        # Look for embedded NowVideo player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
-        if mobj is not None:
-            return self.url_result(mobj.group('url'), 'NowVideo')
+            return self.url_result(mobj.group('url'))

        # Look for embedded Facebook player
        mobj = re.search(
@@ -556,6 +568,12 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'TED')

+        # Look for embedded Ustream videos
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Ustream')
+
        # Look for embedded arte.tv player
        mobj = re.search(
            r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,9 +1,12 @@
+from __future__ import unicode_literals
+
 import json
 import os
 import re

 from .common import InfoExtractor
 from ..utils import (
+    compat_str,
    ExtractorError,
    formatSeconds,
 )
@@ -24,34 +27,31 @@ class JustinTVIE(InfoExtractor):
        /?(?:\#.*)?$
        """
    _JUSTIN_PAGE_LIMIT = 100
-    IE_NAME = u'justin.tv'
+    IE_NAME = 'justin.tv'
+    IE_DESC = 'justin.tv and twitch.tv'
    _TEST = {
-        u'url': u'http://www.twitch.tv/thegamedevhub/b/296128360',
-        u'file': u'296128360.flv',
-        u'md5': u'ecaa8a790c22a40770901460af191c9a',
-        u'info_dict': {
-            u"upload_date": u"20110927", 
-            u"uploader_id": 25114803, 
-            u"uploader": u"thegamedevhub", 
-            u"title": u"Beginner Series - Scripting With Python Pt.1"
+        'url': 'http://www.twitch.tv/thegamedevhub/b/296128360',
+        'md5': 'ecaa8a790c22a40770901460af191c9a',
+        'info_dict': {
+            'id': '296128360',
+            'ext': 'flv',
+            'upload_date': '20110927',
+            'uploader_id': 25114803,
+            'uploader': 'thegamedevhub',
+            'title': 'Beginner Series - Scripting With Python Pt.1'
        }
    }

-    def report_download_page(self, channel, offset):
-        """Report attempt to download a single page of videos."""
-        self.to_screen(u'%s: Downloading video information from %d to %d' %
-                (channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
-
    # Return count of items, list of *valid* items
    def _parse_page(self, url, video_id):
        info_json = self._download_webpage(url, video_id,
-                                           u'Downloading video info JSON',
-                                           u'unable to download video info JSON')
+                                           'Downloading video info JSON',
+                                           'unable to download video info JSON')

        response = json.loads(info_json)
        if type(response) != list:
            error_text = response.get('error', 'unknown error')
-            raise ExtractorError(u'Justin.tv API: %s' % error_text)
+            raise ExtractorError('Justin.tv API: %s' % error_text)
        info = []
        for clip in response:
            video_url = clip['video_file_url']
@@ -62,7 +62,7 @@ class JustinTVIE(InfoExtractor):
                video_id = clip['id']
                video_title = clip.get('title', video_id)
                info.append({
-                    'id': video_id,
+                    'id': compat_str(video_id),
                    'url': video_url,
                    'title': video_title,
                    'uploader': clip.get('channel_name', video_uploader_id),
@@ -74,8 +74,6 @@ class JustinTVIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError(u'invalid URL: %s' % url)

        api_base = 'http://api.justin.tv'
        paged = False
@@ -89,40 +87,41 @@ class JustinTVIE(InfoExtractor):
            webpage = self._download_webpage(url, chapter_id)
            m = re.search(r'PP\.archive_id = "([0-9]+)";', webpage)
            if not m:
-                raise ExtractorError(u'Cannot find archive of a chapter')
+                raise ExtractorError('Cannot find archive of a chapter')
            archive_id = m.group(1)

            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            doc = self._download_xml(api, chapter_id,
-                                             note=u'Downloading chapter information',
-                                             errnote=u'Chapter information download failed')
+            doc = self._download_xml(
+                api, chapter_id,
+                note='Downloading chapter information',
+                errnote='Chapter information download failed')
            for a in doc.findall('.//archive'):
                if archive_id == a.find('./id').text:
                    break
            else:
-                raise ExtractorError(u'Could not find chapter in chapter information')
+                raise ExtractorError('Could not find chapter in chapter information')

            video_url = a.find('./video_file_url').text
-            video_ext = video_url.rpartition('.')[2] or u'flv'
+            video_ext = video_url.rpartition('.')[2] or 'flv'

-            chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
-            chapter_info_json = self._download_webpage(chapter_api_url, u'c' + chapter_id,
-                                   note='Downloading chapter metadata',
-                                   errnote='Download of chapter metadata failed')
-            chapter_info = json.loads(chapter_info_json)
+            chapter_api_url = 'https://api.twitch.tv/kraken/videos/c' + chapter_id
+            chapter_info = self._download_json(
+                chapter_api_url, 'c' + chapter_id,
+                note='Downloading chapter metadata',
+                errnote='Download of chapter metadata failed')

            bracket_start = int(doc.find('.//bracket_start').text)
            bracket_end = int(doc.find('.//bracket_end').text)

            # TODO determine start (and probably fix up file)
            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
-            #video_url += u'?start=' + TODO:start_timestamp
+            #video_url += '?start=' + TODO:start_timestamp
            # bracket_start is 13290, but we want 51670615
-            self._downloader.report_warning(u'Chapter detected, but we can just download the whole file. '
-                                            u'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))
+            self._downloader.report_warning('Chapter detected, but we can just download the whole file. '
+                                            'Chapter starts at %s and ends at %s' % (formatSeconds(bracket_start), formatSeconds(bracket_end)))

            info = {
-                'id': u'c' + chapter_id,
+                'id': 'c' + chapter_id,
                'url': video_url,
                'ext': video_ext,
                'title': chapter_info['title'],
@@ -131,14 +130,12 @@ class JustinTVIE(InfoExtractor):
                'uploader': chapter_info['channel']['display_name'],
                'uploader_id': chapter_info['channel']['name'],
            }
-            return [info]
+            return info
        else:
            video_id = mobj.group('videoid')
            api = api_base + '/broadcast/by_archive/%s.json' % video_id

-        self.report_extraction(video_id)
-
-        info = []
+        entries = []
        offset = 0
        limit = self._JUSTIN_PAGE_LIMIT
        while True:
@@ -146,8 +143,12 @@ class JustinTVIE(InfoExtractor):
                self.report_download_page(video_id, offset)
            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
            page_count, page_info = self._parse_page(page_url, video_id)
-            info.extend(page_info)
+            entries.extend(page_info)
            if not paged or page_count != limit:
                break
            offset += limit
-        return info
+        return {
+            '_type': 'playlist',
+            'id': video_id,
+            'entries': entries,
+        }
--- a/youtube_dl/extractor/morningstar.py
+++ b/youtube_dl/extractor/morningstar.py
@@ -1,17 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import hashlib
-import json
 import re
-import time

 from .common import InfoExtractor
-from ..utils import (
-    compat_parse_qs,
-    compat_str,
-    int_or_none,
-)


 class MorningstarIE(InfoExtractor):
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dl/extractor/motorsport.py
@@ -44,7 +44,7 @@ class MotorsportIE(InfoExtractor):
        e = compat_str(int(time.time()) + 24 * 60 * 60)
        base_video_url = params['location'] + '?e=' + e
        s = 'h3hg713fh32'
-        h = hashlib.md5(s + base_video_url).hexdigest()
+        h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
        video_url = base_video_url + '&h=' + h

        uploader = self._html_search_regex(
--- a/youtube_dl/extractor/movshare.py
+++ b/youtube_dl/extractor/movshare.py
@@ -0,0 +1,27 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class MovShareIE(NovaMovIE):
+    IE_NAME = 'movshare'
+    IE_DESC = 'MovShare'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'}
+
+    _HOST = 'www.movshare.net'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
+    _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
+
+    _TEST = {
+        'url': 'http://www.movshare.net/video/559e28be54d96',
+        'md5': 'abd31a2132947262c50429e1d16c1bfd',
+        'info_dict': {
+            'id': '559e28be54d96',
+            'ext': 'flv',
+            'title': 'dissapeared image',
+            'description': 'optical illusion  dissapeared image  magic illusion',
+        }
+    }
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor):
    IE_NAME = 'novamov'
    IE_DESC = 'NovaMov'

-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'novamov\.com'}
+    _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<id>[a-z\d]{13})'
+    _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'}

    _HOST = 'www.novamov.com'

@@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')

        page = self._download_webpage(
            'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page')

        if re.search(self._FILE_DELETED_REGEX, page) is not None:
-            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
+            raise ExtractorError('Video %s does not exist' % video_id, expected=True)

        filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey')

        title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False)
-
        description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False)

        api_response = self._download_webpage(
--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
    IE_NAME = 'nowvideo'
    IE_DESC = 'NowVideo'

-    _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P<videoid>[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}

    _HOST = 'www.nowvideo.ch'

--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -39,11 +39,11 @@ class PornHdIE(InfoExtractor):

        formats = [
            {
-                'url': url,
+                'url': format_url,
                'ext': format.lower(),
                'format_id': '%s-%s' % (format.lower(), quality.lower()),
                'quality': 1 if quality.lower() == 'high' else 0,
-            } for format, quality, url in re.findall(
+            } for format, quality, format_url in re.findall(
                r'var __video([\da-zA-Z]+?)(Low|High)StreamUrl = \'(http://.+?)\?noProxy=1\'', webpage)
        ]

--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@@ -18,7 +18,7 @@ class Ro220IE(InfoExtractor):
        'md5': '03af18b73a07b4088753930db7a34add',
        'info_dict': {
            "title": "Luati-le Banii sez 4 ep 1",
-            "description": "Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+            "description": "re:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$",
        }
    }

--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@@ -35,13 +35,13 @@ class RTSIE(InfoExtractor):
        },
        {
            'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html',
-            'md5': 'c197f0b2421995c63a64cc73d800f42e',
+            'md5': 'c148457a27bdc9e5b1ffe081a7a8337b',
            'info_dict': {
-                'id': '5738317',
+                'id': '5624067',
                'ext': 'mp4',
-                'duration': 55,
-                'title': 'Bande de lancement de Passe-moi les jumelles',
-                'description': '',
+                'duration': 3720,
+                'title': 'Les yeux dans les cieux - Mon homard au Canada',
+                'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7',
                'uploader': 'Passe-moi les jumelles',
                'upload_date': '20140404',
                'timestamp': 1396635300,
@@ -98,17 +98,20 @@ class RTSIE(InfoExtractor):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')

-        def download_json(video_id):
+        def download_json(internal_id):
            return self._download_json(
-                'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
+                'http://www.rts.ch/a/%s.html?f=json/article' % internal_id,
+                video_id)

        all_info = download_json(video_id)

        # video_id extracted out of URL is not always a real id
        if 'video' not in all_info and 'audio' not in all_info:
            page = self._download_webpage(url, video_id)
-            video_id = self._html_search_regex(r'<(?:video|audio) data-id="(\d+)"', page, 'video id')
-            all_info = download_json(video_id)
+            internal_id = self._html_search_regex(
+                r'<(?:video|audio) data-id="([0-9]+)"', page,
+                'internal video id')
+            all_info = download_json(internal_id)

        info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio']

--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -9,8 +9,18 @@ from ..utils import (


 class TeamcocoIE(InfoExtractor):
-    _VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
-    _TEST = {
+    _VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<url_title>.*)'
+    _TESTS = [
+    {
+        'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
+        'file': '80187.mp4',
+        'md5': '3f7746aa0dc86de18df7539903d399ea',
+        'info_dict': {
+            'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+            'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
+        }
+    },
+    {
        'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
        'file': '19705.mp4',
        'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
@@ -19,6 +29,7 @@ class TeamcocoIE(InfoExtractor):
            "title": "Louis C.K. Interview Pt. 1 11/3/11"
        }
    }
+    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@@ -26,11 +37,13 @@ class TeamcocoIE(InfoExtractor):
            raise ExtractorError('Invalid URL: %s' % url)
        url_title = mobj.group('url_title')
        webpage = self._download_webpage(url, url_title)
-
-        video_id = self._html_search_regex(
-            r'<article class="video" data-id="(\d+?)"',
-            webpage, 'video id')
-
+        
+        video_id = mobj.group("video_id")
+        if video_id == '':
+            video_id = self._html_search_regex(
+                r'<article class="video" data-id="(\d+?)"',
+                webpage, 'video id')
+        
        self.report_extraction(video_id)

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -37,6 +37,7 @@ class TEDIE(SubtitlesInfoExtractor):
                'consciousness, but that half the time our brains are '
                'actively fooling us.'),
            'uploader': 'Dan Dennett',
+            'width': 854,
        }
    }, {
        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
@@ -50,10 +51,10 @@ class TEDIE(SubtitlesInfoExtractor):
        }
    }]

-    _FORMATS_PREFERENCE = {
-        'low': 1,
-        'medium': 2,
-        'high': 3,
+    _NATIVE_FORMATS = {
+        'low': {'preference': 1, 'width': 320, 'height': 180},
+        'medium': {'preference': 2, 'width': 512, 'height': 288},
+        'high': {'preference': 3, 'width': 854, 'height': 480},
    }

    def _extract_info(self, webpage):
@@ -98,12 +99,14 @@ class TEDIE(SubtitlesInfoExtractor):
        talk_info = self._extract_info(webpage)['talks'][0]

        formats = [{
-            'ext': 'mp4',
            'url': format_url,
            'format_id': format_id,
            'format': format_id,
-            'preference': self._FORMATS_PREFERENCE.get(format_id, -1),
        } for (format_id, format_url) in talk_info['nativeDownloads'].items()]
+        for f in formats:
+            finfo = self._NATIVE_FORMATS.get(f['format_id'])
+            if finfo:
+                f.update(finfo)
        self._sort_formats(formats)

        video_id = compat_str(talk_info['id'])
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -11,7 +11,7 @@ from ..utils import (


 class UstreamIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
+    _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)'
    IE_NAME = 'ustream'
    _TEST = {
        'url': 'http://www.ustream.tv/recorded/20274954',
@@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor):

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
+        if m.group('type') == 'embed':
+            video_id = m.group('videoID')
+            webpage = self._download_webpage(url, video_id)
+            desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
+            desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
+            return self.url_result(desktop_url, 'Ustream')
+
        video_id = m.group('videoID')

        video_url = 'http://tcdn.ustream.tv/video/%s' % video_id
--- a/youtube_dl/extractor/videoweed.py
+++ b/youtube_dl/extractor/videoweed.py
@@ -0,0 +1,26 @@
+from __future__ import unicode_literals
+
+from .novamov import NovaMovIE
+
+
+class VideoWeedIE(NovaMovIE):
+    IE_NAME = 'videoweed'
+    IE_DESC = 'VideoWeed'
+
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'}
+
+    _HOST = 'www.videoweed.es'
+
+    _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
+    _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
+
+    _TEST = {
+        'url': 'http://www.videoweed.es/file/b42178afbea14',
+        'md5': 'abd31a2132947262c50429e1d16c1bfd',
+        'info_dict': {
+            'id': 'b42178afbea14',
+            'ext': 'flv',
+            'title': 'optical illusion  dissapeared image magic illusion',
+            'description': ''
+        },
+    }
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals

 import itertools
+import json
 import re

 from .common import InfoExtractor, SearchInfoExtractor
@@ -14,7 +15,7 @@ from ..utils import (

 class YahooIE(InfoExtractor):
    IE_DESC = 'Yahoo screen'
-    _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
+    _VALID_URL = r'https?://screen\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
    _TESTS = [
        {
            'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-

 import calendar
+import codecs
 import contextlib
 import ctypes
 import datetime
@@ -1263,9 +1264,11 @@ class PagedList(object):


 def uppercase_escape(s):
+    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
        r'\\U[0-9a-fA-F]{8}',
-        lambda m: m.group(0).decode('unicode-escape'), s)
+        lambda m: unicode_escape(m.group(0))[0],
+        s)

 try:
    struct.pack(u'!I', 0)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.04.04.4'
+__version__ = '2014.04.07'
Author	SHA1	Message	Date
Philipp Hagemeister	650d688d10	release 2014.04.07	2014-04-07 13:11:37 +02:00
Philipp Hagemeister	0ba77818f3	[ted] Add width and height (Fixes #2716 )	2014-04-07 13:11:30 +02:00
Sergey M․	09baa7da7e	[rts] Update test	2014-04-07 00:34:23 +07:00
Sergey M․	85e787f51d	[cbsnews] Add support for cbsnews.com (Closes #2691 )	2014-04-06 06:03:58 +07:00
Philipp Hagemeister	2a9e1e453a	Merge branch 'master' of github.com:rg3/youtube-dl	2014-04-05 20:05:47 +02:00
Philipp Hagemeister	ee1e199685	[justin.tv] Modernize (Fixes #2705 )	2014-04-05 17:56:36 +02:00
Sergey M․	17c5a00774	[novamov] Simplify	2014-04-05 19:36:22 +07:00
Sergey M․	15c0e8e7b2	[generic] Generalize novamov based embeds	2014-04-05 17:20:05 +07:00
Sergey M․	cca37fba48	[divxstage] Fix typo in IE_NAME	2014-04-05 17:15:43 +07:00
Sergey M․	9d0993ec4a	[movshare] Support more domains	2014-04-05 17:00:18 +07:00
Sergey M․	342f33bf9e	[divxstage] Support more domains	2014-04-05 16:50:05 +07:00
Sergey M․	7cd3bc5f99	[nowvideo] Support more domains	2014-04-05 16:38:57 +07:00
Sergey M․	931055e6cb	[videoweed] Revert _FILE_DELETED_REGEX	2014-04-05 16:32:14 +07:00
Sergey M․	d0e4cf82f1	[movshare] Add _FILE_DELETED_REGEX	2014-04-05 16:31:38 +07:00
Sergey M․	6f88df2c57	[divxstage] Add support for divxstage.eu	2014-04-05 16:29:44 +07:00
Sergey M․	4479bf2762	[videoweed] Simplify	2014-04-05 16:09:28 +07:00
Sergey M․	1ff7c0f7d8	[movshare] Add support for movshare.net	2014-04-05 16:09:03 +07:00
Sergey M․	610e47c87e	Credit @sainyamkapoor for videoweed extractor	2014-04-05 15:53:50 +07:00
Sergey M․	50f566076f	[generic] Add support for videoweed embeds	2014-04-05 15:49:45 +07:00
Sergey M․	92810ff497	[nowvideo] Improve _VALID_URL	2014-04-05 15:35:21 +07:00
Sergey M․	60ccc59a1c	[novamov] Improve _VALID_URL	2014-04-05 15:34:54 +07:00
Sergey M․	91745595d3	[videoweed] Simplify	2014-04-05 15:32:55 +07:00
Sainyam Kapoor	d6e40507d0	[videoweed]Cleanup	2014-04-05 10:53:22 +05:30
Sainyam Kapoor	deed48b472	[Videoweed] Added support for videoweed.	2014-04-05 10:40:03 +05:30
Philipp Hagemeister	e4d41bfca5	Merge pull request #2696 from anovicecodemonkey/support-ustream-embeds [UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694)	2014-04-04 23:33:08 +02:00
Philipp Hagemeister	a355b70f27	[cspan] Do not test number of playlist entries Apparently, CSpan switches between single-file and multiple-file results. Either one is fine as long as we get the full four hours.	2014-04-04 23:16:22 +02:00
Philipp Hagemeister	f8514f6186	[rts] Use visible id in file names Maybe the internal ID is more precise, but it's totally confusing, and the obvious ID still allows a google search.	2014-04-04 23:13:55 +02:00
Philipp Hagemeister	e09b8fcd9d	[ro220] Make test case more flexible Either one or two spaces is fine here.	2014-04-04 23:08:33 +02:00
Philipp Hagemeister	7d1b527ff9	[motorsport] Fix on Python 3	2014-04-04 23:06:27 +02:00
Philipp Hagemeister	f943c7b622	release 2014.04.04.7	2014-04-04 23:01:45 +02:00
Philipp Hagemeister	676eb3f2dd	Fix unicode_escape (Fixes #2695 )	2014-04-04 23:00:51 +02:00
Philipp Hagemeister	98b7cf1ace	release 2014.04.04.6	2014-04-04 22:48:35 +02:00
Philipp Hagemeister	c465afd736	[teamcoco] Fix regex in 2.6 (#2700 ) The re engine does not want to repeat an empty string, for fear that something like (.) could be matching the tokens ... "" "" "" "" "" "" Of course, that's harmless with a question mark, although still somewhat strange.	2014-04-04 22:46:47 +02:00
Philipp Hagemeister	b84d6e7fc4	Merge remote-tracking branch 'AGSPhoenix/teamcoco-fix'	2014-04-04 22:44:49 +02:00
Philipp Hagemeister	2efd5d78c1	release 2014.04.04.5	2014-04-04 22:24:45 +02:00
Philipp Hagemeister	c8edf47b3a	[yahoo] Support https and -uploader URLs (Fixes #2701 )	2014-04-04 22:23:59 +02:00
Philipp Hagemeister	3b4c26a428	[pornhd] Avoid shadowing variable url	2014-04-04 22:22:30 +02:00
Philipp Hagemeister	1525148114	Remove unused imports	2014-04-04 22:22:11 +02:00
AGSPhoenix	fa387d2d99	Revert "Workaround for regex engine limitation" This reverts commit `6d0d573eca`.	2014-04-04 15:37:49 -04:00
AGSPhoenix	6d0d573eca	Workaround for regex engine limitation	2014-04-04 15:25:28 -04:00
AGSPhoenix	bb799e811b	Add a test for the new URL pages Add a test for the pages with the video_id in the URL.	2014-04-04 13:52:35 -04:00
AGSPhoenix	04ee53eca1	Support TeamCoco URLs with video_id in the title If the URL has the video_id in it, use that since the current method of finding the id breaks on those pages. Fixes 2698.	2014-04-04 13:42:34 -04:00
anovicecodemonkey	ca6aada48e	Fix _TEST for Ustream embed URLs	2014-04-05 03:26:29 +10:30
anovicecodemonkey	5c38625259	[UstreamIE] [generic] Added support for Ustream embed URLs (Fixes #2694 )	2014-04-05 00:53:09 +10:30