release 2014.02.22

[release.sh] Do not run tests by default
We are at the point that testing takes waay too long for a release cycle, and fails way too often. Tests through travis are a better indicator than testing just before release.
2014-02-22 15:07:25 +01:00 · 2014-02-22 15:06:07 +01:00 · 2014-02-22 15:05:30 +01:00 · 2014-02-22 14:38:42 +01:00 · 2014-02-22 14:36:16 +01:00 · 2014-02-22 14:27:09 +01:00
13 changed files with 231 additions and 76 deletions
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -14,9 +14,9 @@
 set -e
-skip_tests=false
+skip_tests=true
-if [ "$1" = '--skip-test' ]; then
+if [ "$1" = '--run-tests' ]; then
-    skip_tests=true
+    skip_tests=false
    shift
 fi
--- a/test/test_download.py
+++ b/test/test_download.py
@ -18,6 +18,7 @@ from test.helper import (
 import hashlib
 import io
 import json
 import re
 import socket
 import youtube_dl.YoutubeDL
@ -137,6 +138,15 @@ def generator(test_case):
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
                    if isinstance(expected, compat_str) and expected.startswith('re:'):
                        got = info_dict.get(info_field)
                        match_str = expected[len('re:'):]
                        match_rex = re.compile(match_str)
                        self.assertTrue(
                            isinstance(got, compat_str) and match_rex.match(got),
                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
                    else:
                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
                            got = 'md5:' + md5(info_dict.get(info_field))
                        else:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -186,6 +186,7 @@ from .rutube import (
    RutubeMovieIE,
    RutubePersonIE,
 )
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
@ -224,6 +225,7 @@ from .tinypic import TinyPicIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@ -1,4 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
-        u'file': u'12163.mp4',
+        'md5': '060158428b650f896c542dfbb3d6487f',
-        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        'info_dict': {
-        u'info_dict': {
+            'id': '12163',
-            u'title': u'Terrasses du Numérique'
+            'ext': 'mp4',
            'title': 'Terrasses du Numérique'
        }
    }
@ -28,9 +31,10 @@ class Canalc2IE(InfoExtractor):
        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
        title = self._html_search_regex(
-            r'class="evenement8">(.*?)</a>', webpage, u'title')
+            r'class="evenement8">(.*?)</a>', webpage, 'title')
-        return {'id': video_id,
+        return {
            'id': video_id,
            'ext': 'mp4',
            'url': video_url,
            'title': title,
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -13,6 +13,7 @@ from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
    compat_xml_parse_error,
    ExtractorError,
    HEADRequest,
@ -241,10 +242,10 @@ class GenericIE(InfoExtractor):
        # Is it an RSS feed?
        try:
-            doc = xml.etree.ElementTree.fromstring(webpage)
+            doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
-        except xml.etree.ElementTree.ParseError:
+        except compat_xml_parse_error:
            pass
        # it's tempting to parse this further, but you would
--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dl/extractor/savefrom.py
@ -0,0 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import os.path
 import re
 from .common import InfoExtractor
 class SaveFromIE(InfoExtractor):
    IE_NAME = 'savefrom.net'
    _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
    _TEST = {
        'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
        'info_dict': {
            'id': 'UlVRAPW2WJY',
            'ext': 'mp4',
            'title': 'About Team Radical MMA | MMA Fighting',
            'upload_date': '20120816',
            'uploader': 'Howcast',
            'uploader_id': 'Howcast',
            'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
        },
        'params': {
            'skip_download': True
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = os.path.splitext(url.split('/')[-1])[0]
        return {
            '_type': 'url',
            'id': video_id,
            'url': mobj.group('url'),
        }
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 import os
 import re
 from .common import InfoExtractor
@ -8,23 +7,27 @@ from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
    unified_strdate,
    str_to_int,
    int_or_none,
 )
-from ..aes import (
+from ..aes import aes_decrypt_text
    aes_decrypt_text
 )
 class SpankwireIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
    _TEST = {
        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
-        'file': '103545.mp4',
+        'md5': '8bbfde12b101204b39e4b9fe7eb67095',
        'md5': '1b3f55e345500552dbc252a3e9c1af43',
        'info_dict': {
-            "uploader": "oreusz",
+            'id': '103545',
-            "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
+            'ext': 'mp4',
-            "description": "Crazy Bitch X rated music video.",
+            'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
-            "age_limit": 18,
+            'description': 'Crazy Bitch X rated music video.',
            'uploader': 'oreusz',
            'uploader_id': '124697',
            'upload_date': '20070508',
            'age_limit': 18,
        }
    }
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
-        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
+        title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        description = self._html_search_regex(
            r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        uploader_id = self._html_search_regex(
            r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
        upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
        if upload_date:
            upload_date = unified_strdate(upload_date)
        view_count = self._html_search_regex(
            r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
        if view_count:
            view_count = str_to_int(view_count)
        comment_count = int_or_none(self._html_search_regex(
            r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
        if webpage.find('flashvars\.encrypted = "true"') != -1:
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
        formats = []
        for video_url in video_urls:
            path = compat_urllib_parse_urlparse(video_url).path
            extension = os.path.splitext(path)[1][1:]
            format = path.split('/')[4].split('_')[:2]
            resolution, bitrate_str = format
            format = "-".join(format)
-            height = int(resolution.rstrip('P'))
+            height = int(resolution.rstrip('Pp'))
-            tbr = int(bitrate_str.rstrip('K'))
+            tbr = int(bitrate_str.rstrip('Kk'))
            formats.append({
                'url': video_url,
                'ext': extension,
                'resolution': resolution,
                'format': format,
                'tbr': tbr,
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
        return {
            'id': video_id,
-            'uploader': video_uploader,
+            'title': title,
            'title': video_title,
            'thumbnail': thumbnail,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'upload_date': upload_date,
            'view_count': view_count,
            'comment_count': comment_count,
            'formats': formats,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@ -0,0 +1,47 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
 )
 class TruTubeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
    _TEST = {
        'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
        'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
        'info_dict': {
            'id': '14880',
            'ext': 'flv',
            'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
            'thumbnail': 're:^http:.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage).strip()
        thumbnail = self._search_regex(
            r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
        all_formats = re.finditer(
            r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
        formats = [{
            'format_id': m.group('key'),
            'quality': -i,
            'url': m.group('url'),
        } for i, m in enumerate(all_formats)]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_title,
            'formats': formats,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@ -6,14 +6,15 @@ from .common import InfoExtractor
 class WimpIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
+    _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'
    _TEST = {
-        'url': 'http://www.wimp.com/deerfence/',
+        'url': 'http://www.wimp.com/maruexhausted/',
-        'file': 'deerfence.flv',
+        'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',
        'md5': '8b215e2e0168c6081a1cf84b2846a2b5',
        'info_dict': {
-            "title": "Watch Till End: Herd of deer jump over a fence.",
+            'id': 'maruexhausted',
-            "description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.",
+            'ext': 'flv',
            'title': 'Maru is exhausted.',
            'description': 'md5:57e099e857c0a4ea312542b684a869b8',
        }
    }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -297,6 +297,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                u"format": "141",
            },
        },
        # DASH manifest with encrypted signature
        {
            u'url': u'https://www.youtube.com/watch?v=IB3lcPjvWLA',
            u'info_dict': {
                u'id': u'IB3lcPjvWLA',
                u'ext': u'm4a',
                u'title': u'Afrojack - The Spark ft. Spree Wilson',
                u'description': u'md5:3199ed45ee8836572865580804d7ac0f',
                u'uploader': u'AfrojackVEVO',
                u'uploader_id': u'AfrojackVEVO',
                u'upload_date': u'20131011',
            },
            u"params": {
                u'youtube_include_dash_manifest': True,
                u'format': '141',
            },
        },
    ]
@ -1272,8 +1289,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
            if not mobj:
                raise ValueError('Could not find vevo ID')
-            info = json.loads(mobj.group(1))
+            ytplayer_config = json.loads(mobj.group(1))
-            args = info['args']
+            args = ytplayer_config['args']
            # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
            # this signatures are encrypted
            if 'url_encoded_fmt_stream_map' not in args:
@ -1366,12 +1383,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
        # Look for the DASH manifest
-        dash_manifest_url_lst = video_info.get('dashmpd')
+        if (self._downloader.params.get('youtube_include_dash_manifest', False)):
        if (dash_manifest_url_lst and dash_manifest_url_lst[0] and
                self._downloader.params.get('youtube_include_dash_manifest', False)):
            try:
                # The DASH manifest used needs to be the one from the original video_webpage.
                # The one found in get_video_info seems to be using different signatures.
                # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage.
                # Luckily, it seems, this case uses some kind of default signature (len == 86), so the
                # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here.
                if age_gate:
                    dash_manifest_url = video_info.get('dashmpd')[0]
                else:
                    dash_manifest_url = ytplayer_config['args']['dashmpd']
                def decrypt_sig(mobj):
                    s = mobj.group(1)
                    dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
                    return '/signature/%s' % dec_s
                dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url)
                dash_doc = self._download_xml(
-                    dash_manifest_url_lst[0], video_id,
+                    dash_manifest_url, video_id,
                    note=u'Downloading DASH manifest',
                    errnote=u'Could not download DASH manifest')
                for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
@ -1443,9 +1472,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
                     |
                        ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})
                     )"""
-    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
-    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
+    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
    IE_NAME = u'youtube:playlist'
    def _real_initialize(self):
@ -1493,29 +1522,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
            raise ExtractorError(u'For downloading YouTube.com top lists, use '
                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
        url = self._TEMPLATE_URL % playlist_id
        page = self._download_webpage(url, playlist_id)
        more_widget_html = content_html = page
        # Extract the video ids from the playlist pages
        ids = []
        for page_num in itertools.count(1):
-            url = self._TEMPLATE_URL % (playlist_id, page_num)
+            matches = re.finditer(self._VIDEO_RE, content_html)
            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
            matches = re.finditer(self._VIDEO_RE, page)
            # We remove the duplicates and the link with index 0
            # (it's not the first video of the playlist)
            new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
            ids.extend(new_ids)
-            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
+            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
            if not mobj:
                break
-        try:
+            more = self._download_json(
-            playlist_title = self._og_search_title(page)
+                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
-        except RegexNotFoundError:
+            content_html = more['content_html']
-            self.report_warning(
+            more_widget_html = more['load_more_widget_html']
-                u'Playlist page is missing OpenGraph title, falling back ...',
+
                playlist_id)
        playlist_title = self._html_search_regex(
-                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title')
+                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, playlist_title)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -174,6 +174,11 @@ try:
 except NameError:
    compat_chr = chr
 try:
    from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
    from xml.parsers.expat import ExpatError as compat_xml_parse_error
 def compat_ord(c):
    if type(c) is int: return c
    else: return ord(c)
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.02.20'
+__version__ = '2014.02.22'
Author	SHA1	Message	Date
Philipp Hagemeister	5e0b652344	release 2014.02.22	2014-02-22 15:07:25 +01:00
Philipp Hagemeister	0f8f097183	[release.sh] Do not run tests by default We are at the point that testing takes waay too long for a release cycle, and fails way too often. Tests through travis are a better indicator than testing just before release.	2014-02-22 15:06:07 +01:00
Philipp Hagemeister	491ed3dda2	[trutube] Support multiple formats (#2433 )	2014-02-22 15:05:30 +01:00
Philipp Hagemeister	af284c6d1b	Merge remote-tracking branch 'JohnyMoSwag/master'	2014-02-22 14:38:42 +01:00
Philipp Hagemeister	41d3ec5fba	[savefrom] Add extractor (Fixes #2434 )	2014-02-22 14:36:16 +01:00
Philipp Hagemeister	0568c352f3	[canalc2] Modernize	2014-02-22 14:27:09 +01:00
Sergey M.	2e7b4cb714	[spankwire] Fix uploader id regex	2014-02-22 16:50:08 +07:00
Sergey M.	9767726b66	[spankwire] Improve and modernize	2014-02-22 16:45:03 +07:00
Johny Mo Swag	9ddfd84e41	added trutubeIE	2014-02-22 00:11:57 -08:00
Philipp Hagemeister	1cf563d84b	release 2014.02.21.1	2014-02-21 18:19:48 +01:00
Jaime Marquínez Ferrándiz	f7300c5c90	[generic] Fix on python 2.6 `ParseError` is not available, it raises `xml.parsers.expat.ExpatError`. The webpage needs to be encoded.	2014-02-21 16:59:10 +01:00
Jaime Marquínez Ferrándiz	3489b7d26c	[youtube] Simplify the decryption process for the manifest urls and add a test (closes #2422 )	2014-02-21 15:15:58 +01:00
Jaime Marquínez Ferrándiz	acd2bcc384	Merge branch 'youtube-dash' of github.com:m0vie/youtube-dl	2014-02-21 15:02:47 +01:00
Philipp Hagemeister	43e77ca455	release 2014.02.21	2014-02-21 12:16:03 +01:00
Sergey M.	da36297988	[wimp] Modernize and replace test	2014-02-21 17:57:19 +07:00
Sergey M.	dbb94fb044	[youtube] Fix playlist extraction (Closes #2423 , #2424 , #2425 )	2014-02-21 17:19:55 +07:00
m0viefreak	d68f0cdb23	[youtube] decrypt signature when downloading dash manifest	2014-02-21 03:24:56 +01:00
`@ -1,2 +1,2 @@`

	`__version__ = '2014.02.20'`	`__version__ = '2014.02.22'`