release 2014.02.24

[zdf] Fix podcast extraction and use unicode literals (Closes #2446 )
[youtube] Fix mix video regex
2014-02-24 09:44:22 +01:00 · 2014-02-24 13:47:47 +07:00 · 2014-02-24 12:52:02 +07:00 · 2014-02-23 18:28:22 +01:00 · 2014-02-23 18:06:51 +01:00 · 2014-02-23 17:17:36 +01:00
19 changed files with 434 additions and 149 deletions
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -14,9 +14,9 @@
 set -e
-skip_tests=false
+skip_tests=true
-if [ "$1" = '--skip-test' ]; then
+if [ "$1" = '--run-tests' ]; then
-    skip_tests=true
+    skip_tests=false
    shift
 fi
--- a/test/test_download.py
+++ b/test/test_download.py
@ -18,6 +18,7 @@ from test.helper import (
 import hashlib
 import io
 import json
 import re
 import socket
 import youtube_dl.YoutubeDL
@ -137,12 +138,21 @@ def generator(test_case):
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
-                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
+                    if isinstance(expected, compat_str) and expected.startswith('re:'):
                        got = 'md5:' + md5(info_dict.get(info_field))
                    else:
                        got = info_dict.get(info_field)
-                    self.assertEqual(expected, got,
+                        match_str = expected[len('re:'):]
-                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
+                        match_rex = re.compile(match_str)
                        self.assertTrue(
                            isinstance(got, compat_str) and match_rex.match(got),
                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
                    else:
                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
                            got = 'md5:' + md5(info_dict.get(info_field))
                        else:
                            got = info_dict.get(info_field)
                        self.assertEqual(expected, got,
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -46,6 +46,7 @@ __authors__  = (
    'Andreas Schmitz',
    'Michael Kaiser',
    'Niklas Laxström',
    'David Triendl',
 )
 __license__ = 'Public Domain'
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@ -12,7 +12,6 @@ from .http import HttpFD
 from ..utils import (
    struct_pack,
    struct_unpack,
    compat_urllib_request,
    compat_urlparse,
    format_bytes,
    encodeFilename,
@ -117,8 +116,8 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_char()
        # flags
        self.read(3)
-        # BootstrapinfoVersion
+
-        bootstrap_info_version = self.read_unsigned_int()
+        self.read_unsigned_int()  # BootstrapinfoVersion
        # Profile,Live,Update,Reserved
        self.read(1)
        # time scale
@ -127,15 +126,15 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_long_long()
        # SmpteTimeCodeOffset
        self.read_unsigned_long_long()
-        # MovieIdentifier
+
-        movie_identifier = self.read_string()
+        self.read_string()  # MovieIdentifier
        server_count = self.read_unsigned_char()
        # ServerEntryTable
        for i in range(server_count):
            self.read_string()
        quality_count = self.read_unsigned_char()
        # QualityEntryTable
-        for i in range(server_count):
+        for i in range(quality_count):
            self.read_string()
        # DrmData
        self.read_string()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -19,6 +19,7 @@ from .bbccouk import BBCCoUkIE
 from .blinkx import BlinkxIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bloomberg import BloombergIE
 from .br import BRIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .c56 import C56IE
@ -186,6 +187,7 @@ from .rutube import (
    RutubeMovieIE,
    RutubePersonIE,
 )
 from .savefrom import SaveFromIE
 from .servingsys import ServingSysIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
@ -224,6 +226,7 @@ from .tinypic import TinyPicIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
@ -238,6 +241,7 @@ from .vesti import VestiIE
 from .vevo import VevoIE
 from .vice import ViceIE
 from .viddler import ViddlerIE
 from .videobam import VideoBamIE
 from .videodetective import VideoDetectiveIE
 from .videofyme import VideofyMeIE
 from .videopremium import VideoPremiumIE
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@ -0,0 +1,80 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import ExtractorError
 class BRIE(InfoExtractor):
    IE_DESC = "Bayerischer Rundfunk Mediathek"
    _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?P<id>[a-z0-9\-]+)\.html$"
    _BASE_URL = "http://www.br.de"
    _TEST = {
        "url": "http://www.br.de/mediathek/video/anselm-gruen-114.html",
        "md5": "c4f83cf0f023ba5875aba0bf46860df2",
        "info_dict": {
            "id": "2c8d81c5-6fb7-4a74-88d4-e768e5856532",
            "ext": "mp4",
            "title": "Feiern und Verzichten",
            "description": "Anselm Grün: Feiern und Verzichten",
            "uploader": "BR/Birgit Baier",
            "upload_date": "20140301"
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        page = self._download_webpage(url, display_id)
        xml_url = self._search_regex(
            r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL")
        xml = self._download_xml(self._BASE_URL + xml_url, None)
        videos = [{
            "id": xml_video.get("externalId"),
            "title": xml_video.find("title").text,
            "formats": self._extract_formats(xml_video.find("assets")),
            "thumbnails": self._extract_thumbnails(xml_video.find("teaserImage/variants")),
            "description": " ".join(xml_video.find("shareTitle").text.splitlines()),
            "uploader": xml_video.find("author").text,
            "upload_date": "".join(reversed(xml_video.find("broadcastDate").text.split("."))),
            "webpage_url": xml_video.find("permalink").text,
        } for xml_video in xml.findall("video")]
        if len(videos) > 1:
            self._downloader.report_warning(
                'found multiple videos; please '
                'report this with the video URL to http://yt-dl.org/bug')
        if not videos:
            raise ExtractorError('No video entries found')
        return videos[0]
    def _extract_formats(self, assets):
        formats = [{
            "url": asset.find("downloadUrl").text,
            "ext": asset.find("mediaType").text,
            "format_id": asset.get("type"),
            "width": int(asset.find("frameWidth").text),
            "height": int(asset.find("frameHeight").text),
            "tbr": int(asset.find("bitrateVideo").text),
            "abr": int(asset.find("bitrateAudio").text),
            "vcodec": asset.find("codecVideo").text,
            "container": asset.find("mediaType").text,
            "filesize": int(asset.find("size").text),
        } for asset in assets.findall("asset")
            if asset.find("downloadUrl") is not None]
        self._sort_formats(formats)
        return formats
    def _extract_thumbnails(self, variants):
        thumbnails = [{
            "url": self._BASE_URL + variant.find("url").text,
            "width": int(variant.find("width").text),
            "height": int(variant.find("height").text),
        } for variant in variants.findall("variant")]
        thumbnails.sort(key=lambda x: x["width"] * x["height"], reverse=True)
        return thumbnails
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@ -23,8 +23,8 @@ class BreakIE(InfoExtractor):
        video_id = mobj.group(1).split("-")[-1]
        embed_url = 'http://www.break.com/embed/%s' % video_id
        webpage = self._download_webpage(embed_url, video_id)
-        info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
+        info_json = self._search_regex(r'var embedVars = ({.*})\s*?</script>',
-                                       'info json', flags=re.DOTALL)
+            webpage, 'info json', flags=re.DOTALL)
        info = json.loads(info_json)
        video_url = info['videoUri']
        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@ -1,4 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
@ -9,11 +11,12 @@ class Canalc2IE(InfoExtractor):
    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?.*?idVideo=(?P<id>\d+)'
    _TEST = {
-        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        'url': 'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
-        u'file': u'12163.mp4',
+        'md5': '060158428b650f896c542dfbb3d6487f',
-        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        'info_dict': {
-        u'info_dict': {
+            'id': '12163',
-            u'title': u'Terrasses du Numérique'
+            'ext': 'mp4',
            'title': 'Terrasses du Numérique'
        }
    }
@ -28,10 +31,11 @@ class Canalc2IE(InfoExtractor):
        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
        title = self._html_search_regex(
-            r'class="evenement8">(.*?)</a>', webpage, u'title')
+            r'class="evenement8">(.*?)</a>', webpage, 'title')
-        
+
-        return {'id': video_id,
+        return {
-                'ext': 'mp4',
+            'id': video_id,
-                'url': video_url,
+            'ext': 'mp4',
-                'title': title,
+            'url': video_url,
-                }
+            'title': title,
        }
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dl/extractor/normalboots.py
@ -1,61 +1,51 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    unified_strdate,
 )
 class NormalbootsIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
+    _VALID_URL = r'http://(?:www\.)?normalboots\.com/video/(?P<videoid>[0-9a-z-]*)/?$'
    _TEST = {
-        u'url': u'http://normalboots.com/video/home-alone-games-jontron/',
+        'url': 'http://normalboots.com/video/home-alone-games-jontron/',
-        u'file': u'home-alone-games-jontron.mp4',
+        'md5': '8bf6de238915dd501105b44ef5f1e0f6',
-        u'md5': u'8bf6de238915dd501105b44ef5f1e0f6',
+        'info_dict': {
-        u'info_dict': {
+            'id': 'home-alone-games-jontron',
-            u'title': u'Home Alone Games - JonTron - NormalBoots',
+            'ext': 'mp4',
-            u'description': u'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for \u2018Tense Battle Theme\u2019:\xa0http://www.youtube.com/Kiamet/',
+            'title': 'Home Alone Games - JonTron - NormalBoots',
-            u'uploader': u'JonTron',
+            'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
-            u'upload_date': u'20140125',
+            'uploader': 'JonTron',
            'upload_date': '20140125',
        }
    }
-    
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('videoid')
-        
+
        info = {
            'id': video_id,
            'uploader': None,
            'upload_date': None,
        }
        if url[:4] != 'http':
            url = 'http://' + url
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage)
        video_description = self._og_search_description(webpage)
        video_thumbnail = self._og_search_thumbnail(webpage)
        video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
            webpage, 'uploader')
-        raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', 
+        raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
            webpage, 'date')
        video_upload_date = unified_strdate(raw_upload_date)
-        video_upload_date = unified_strdate(raw_upload_date)
+
        player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
        player_page = self._download_webpage(player_url, video_id)
-        video_url = u'http://player.screenwavemedia.com/' + self._html_search_regex(r"'file':\s'(?P<file>[0-9A-Za-z-_\.]+)'", player_page, 'file')
+        video_url = self._html_search_regex(r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file')
-        
+
-        info['url'] = video_url
+        return {
-        info['title'] = video_title
+            'id': video_id,
-        info['description'] = video_description
+            'url': video_url,
-        info['thumbnail'] = video_thumbnail
+            'title': self._og_search_title(webpage),
-        info['uploader'] = video_uploader
+            'description': self._og_search_description(webpage),
-        info['upload_date'] = video_upload_date
+            'thumbnail': self._og_search_thumbnail(webpage),
-        
+            'uploader': video_uploader,
-        return info
+            'upload_date': video_upload_date,
        }
--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dl/extractor/savefrom.py
@ -0,0 +1,37 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import os.path
 import re
 from .common import InfoExtractor
 class SaveFromIE(InfoExtractor):
    IE_NAME = 'savefrom.net'
    _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P<url>.*)$'
    _TEST = {
        'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
        'info_dict': {
            'id': 'UlVRAPW2WJY',
            'ext': 'mp4',
            'title': 'About Team Radical MMA | MMA Fighting',
            'upload_date': '20120816',
            'uploader': 'Howcast',
            'uploader_id': 'Howcast',
            'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
        },
        'params': {
            'skip_download': True
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = os.path.splitext(url.split('/')[-1])[0]
        return {
            '_type': 'url',
            'id': video_id,
            'url': mobj.group('url'),
        }
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 import os
 import re
 from .common import InfoExtractor
@ -8,23 +7,27 @@ from ..utils import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
    unified_strdate,
    str_to_int,
    int_or_none,
 )
-from ..aes import (
+from ..aes import aes_decrypt_text
    aes_decrypt_text
 )
 class SpankwireIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
    _TEST = {
        'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
-        'file': '103545.mp4',
+        'md5': '8bbfde12b101204b39e4b9fe7eb67095',
        'md5': '1b3f55e345500552dbc252a3e9c1af43',
        'info_dict': {
-            "uploader": "oreusz",
+            'id': '103545',
-            "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
+            'ext': 'mp4',
-            "description": "Crazy Bitch X rated music video.",
+            'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
-            "age_limit": 18,
+            'description': 'Crazy Bitch X rated music video.',
            'uploader': 'oreusz',
            'uploader_id': '124697',
            'upload_date': '20070508',
            'age_limit': 18,
        }
    }
@ -37,13 +40,26 @@ class SpankwireIE(InfoExtractor):
        req.add_header('Cookie', 'age_verified=1')
        webpage = self._download_webpage(req, video_id)
-        video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
+        title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
        video_uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        description = self._html_search_regex(
            r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
        thumbnail = self._html_search_regex(
            r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
        uploader = self._html_search_regex(
            r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
        uploader_id = self._html_search_regex(
            r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', webpage, 'uploader id', fatal=False)
        upload_date = self._html_search_regex(r'</a> on (.+?) at \d+:\d+', webpage, 'upload date', fatal=False)
        if upload_date:
            upload_date = unified_strdate(upload_date)
        view_count = self._html_search_regex(
            r'<div id="viewsCounter"><span>([^<]+)</span> views</div>', webpage, 'view count', fatal=False)
        if view_count:
            view_count = str_to_int(view_count)
        comment_count = int_or_none(self._html_search_regex(
            r'<span id="spCommentCount">\s*(\d+)</span> Comments</div>', webpage, 'comment count', fatal=False))
        video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
        if webpage.find('flashvars\.encrypted = "true"') != -1:
@ -53,16 +69,13 @@ class SpankwireIE(InfoExtractor):
        formats = []
        for video_url in video_urls:
            path = compat_urllib_parse_urlparse(video_url).path
            extension = os.path.splitext(path)[1][1:]
            format = path.split('/')[4].split('_')[:2]
            resolution, bitrate_str = format
            format = "-".join(format)
-            height = int(resolution.rstrip('P'))
+            height = int(resolution.rstrip('Pp'))
-            tbr = int(bitrate_str.rstrip('K'))
+            tbr = int(bitrate_str.rstrip('Kk'))
            formats.append({
                'url': video_url,
                'ext': extension,
                'resolution': resolution,
                'format': format,
                'tbr': tbr,
@ -75,10 +88,14 @@ class SpankwireIE(InfoExtractor):
        return {
            'id': video_id,
-            'uploader': video_uploader,
+            'title': title,
            'title': video_title,
            'thumbnail': thumbnail,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'upload_date': upload_date,
            'view_count': view_count,
            'comment_count': comment_count,
            'formats': formats,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@ -0,0 +1,44 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class TruTubeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
    _TEST = {
        'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
        'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
        'info_dict': {
            'id': '14880',
            'ext': 'flv',
            'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
            'thumbnail': 're:^http:.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage).strip()
        thumbnail = self._search_regex(
            r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
        all_formats = re.finditer(
            r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
        formats = [{
            'format_id': m.group('key'),
            'quality': -i,
            'url': m.group('url'),
        } for i, m in enumerate(all_formats)]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_title,
            'formats': formats,
            'thumbnail': thumbnail,
        }
--- a/youtube_dl/extractor/videobam.py
+++ b/youtube_dl/extractor/videobam.py
@ -0,0 +1,80 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import int_or_none
 class VideoBamIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
    _TESTS = [
        {
            'url': 'http://videobam.com/OiJQM',
            'md5': 'db471f27763a531f10416a0c58b5a1e0',
            'info_dict': {
                'id': 'OiJQM',
                'ext': 'mp4',
                'title': 'Is Alcohol Worse Than Ecstasy?',
                'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
                'uploader': 'frihetsvinge',
            },
        },
        {
            'url': 'http://videobam.com/pqLvq',
            'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
            'note': 'HD video',
            'info_dict': {
                'id': 'pqLvq',
                'ext': 'mp4',
            }
        },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
        formats = []
        for preference, format_id in enumerate(['low', 'high']):
            mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
            if not mobj:
                continue
            formats.append({
                'url': mobj.group('url'),
                'ext': 'mp4',
                'format_id': format_id,
                'preference': preference,
            })
        if not formats:
            player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
            formats = [{
                'url': item['url'],
                'ext': 'mp4',
            } for item in player_config['playlist'] if 'autoPlay' in item]
        self._sort_formats(formats)
        title = self._og_search_title(page, default='VideoBam', fatal=False)
        description = self._og_search_description(page, default=None)
        thumbnail = self._og_search_thumbnail(page)
        uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
        view_count = int_or_none(
            self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'view_count': view_count,
            'formats': formats,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@ -1,8 +1,10 @@
 from __future__ import unicode_literals
 import re
 import json
 from .common import InfoExtractor
 from ..utils import unified_strdate
 class VineIE(InfoExtractor):
@ -13,31 +15,46 @@ class VineIE(InfoExtractor):
        'info_dict': {
            'id': 'b9KOOWX7HUx',
            'ext': 'mp4',
            'uploader': 'Jack Dorsey',
            'title': 'Chicken.',
            'description': 'Chicken.',
            'upload_date': '20130519',
            'uploader': 'Jack Dorsey',
            'uploader_id': '76',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage_url = 'https://vine.co/v/' + video_id
        webpage = self._download_webpage(webpage_url, video_id)
-        self.report_extraction(video_id)
+        webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id)
-        video_url = self._html_search_meta('twitter:player:stream', webpage,
+        data = json.loads(self._html_search_regex(
-            'video URL')
+            r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
-        uploader = self._html_search_regex(r'<p class="username">(.*?)</p>',
+        formats = [
-            webpage, 'uploader', fatal=False, flags=re.DOTALL)
+            {
                'url': data['videoLowURL'],
                'ext': 'mp4',
                'format_id': 'low',
            },
            {
                'url': data['videoUrl'],
                'ext': 'mp4',
                'format_id': 'standard',
            }
        ]
        return {
            'id': video_id,
            'url': video_url,
            'ext': 'mp4',
            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'description': data['description'],
-            'uploader': uploader,
+            'thumbnail': data['thumbnailUrl'],
-        }
+            'upload_date': unified_strdate(data['created']),
            'uploader': data['username'],
            'uploader_id': data['userIdStr'],
            'like_count': data['likes']['count'],
            'comment_count': data['comments']['count'],
            'repost_count': data['reposts']['count'],
            'formats': formats,
        }
--- a/youtube_dl/extractor/worldstarhiphop.py
+++ b/youtube_dl/extractor/worldstarhiphop.py
@ -22,8 +22,8 @@ class WorldStarHipHopIE(InfoExtractor):
        webpage_src = self._download_webpage(url, video_id)
        m_vevo_id = re.search(r'videoId=(.*?)&amp?',
-            webpage_src)
+                              webpage_src)
-        
+
        if m_vevo_id is not None:
            self.to_screen(u'Vevo video detected:')
            return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo')
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -29,7 +29,6 @@ from ..utils import (
    ExtractorError,
    int_or_none,
    PagedList,
    RegexNotFoundError,
    unescapeHTML,
    unified_strdate,
    orderedSet,
@ -1489,11 +1488,15 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
        # the id of the playlist is just 'RD' + video_id
        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
-        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+        search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
-            get_element_by_attribute('class', 'title ', webpage))
+        title_span = (search_title('playlist-title') or
            search_title('title long-title') or search_title('title'))
        title = clean_html(title_span)
-        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+        video_re = r'''(?x)data-video-username="(.*?)".*?
-        ids = orderedSet(re.findall(video_re, webpage))
+                       href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
        matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
        # Some of the videos may have been deleted, their username field is empty
        ids = [video_id for (username, video_id) in matches if username]
        url_results = self._ids_to_results(ids)
        return self.playlist_result(url_results, playlist_id, title)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@ -1,4 +1,5 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
@ -13,52 +14,42 @@ class ZDFIE(InfoExtractor):
    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<video_id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
    _TEST = {
-        u"url": u"http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt",
+        'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
-        u"file": u"2037704.webm",
+        'info_dict': {
-        u"info_dict": {
+            'id': '2037704',
-            u"upload_date": u"20131127",
+            'ext': 'webm',
-            u"description": u"Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial \"Ende des Machtpokers - Große Koalition für Deutschland\".",
+            'title': 'ZDFspezial - Ende des Machtpokers',
-            u"uploader": u"spezial",
+            'description': 'Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial "Ende des Machtpokers - Große Koalition für Deutschland".',
-            u"title": u"ZDFspezial - Ende des Machtpokers"
+            'duration': 1022,
            'uploader': 'spezial',
            'uploader_id': '225948',
            'upload_date': '20131127',
        },
-        u"skip": u"Videos on ZDF.de are depublicised in short order",
+        'skip': 'Videos on ZDF.de are depublicised in short order',
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('video_id')
-        xml_url = u'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
+        xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        doc = self._download_xml(
            xml_url, video_id,
-            note=u'Downloading video info',
+            note='Downloading video info',
-            errnote=u'Failed to download video info')
+            errnote='Failed to download video info')
        title = doc.find('.//information/title').text
        description = doc.find('.//information/detail').text
        duration = int(doc.find('.//details/lengthSec').text)
        uploader_node = doc.find('.//details/originChannelTitle')
        uploader = None if uploader_node is None else uploader_node.text
-        duration_str = doc.find('.//details/length').text
+        uploader_id_node = doc.find('.//details/originChannelId')
-        duration_m = re.match(r'''(?x)^
+        uploader_id = None if uploader_id_node is None else uploader_id_node.text
            (?P<hours>[0-9]{2})
            :(?P<minutes>[0-9]{2})
            :(?P<seconds>[0-9]{2})
            (?:\.(?P<ms>[0-9]+)?)
            ''', duration_str)
        duration = (
            (
                (int(duration_m.group('hours')) * 60 * 60) +
                (int(duration_m.group('minutes')) * 60) +
                int(duration_m.group('seconds'))
            )
            if duration_m
            else None
        )
        upload_date = unified_strdate(doc.find('.//details/airtime').text)
        def xml_to_format(fnode):
            video_url = fnode.find('url').text
-            is_available = u'http://www.metafilegenerator' not in video_url
+            is_available = 'http://www.metafilegenerator' not in video_url
            format_id = fnode.attrib['basetype']
            format_m = re.match(r'''(?x)
@ -71,22 +62,28 @@ class ZDFIE(InfoExtractor):
            quality = fnode.find('./quality').text
            abr = int(fnode.find('./audioBitrate').text) // 1000
-            vbr = int(fnode.find('./videoBitrate').text) // 1000
+            vbr_node = fnode.find('./videoBitrate')
            vbr = None if vbr_node is None else int(vbr_node.text) // 1000
-            format_note = u''
+            width_node = fnode.find('./width')
            width = None if width_node is None else int_or_none(width_node.text)
            height_node = fnode.find('./height')
            height = None if height_node is None else int_or_none(height_node.text)
            format_note = ''
            if not format_note:
                format_note = None
            return {
-                'format_id': format_id + u'-' + quality,
+                'format_id': format_id + '-' + quality,
                'url': video_url,
                'ext': ext,
                'acodec': format_m.group('acodec'),
                'vcodec': format_m.group('vcodec'),
                'abr': abr,
                'vbr': vbr,
-                'width': int_or_none(fnode.find('./width').text),
+                'width': width,
-                'height': int_or_none(fnode.find('./height').text),
+                'height': height,
                'filesize': int_or_none(fnode.find('./filesize').text),
                'format_note': format_note,
                'protocol': proto,
@ -103,9 +100,10 @@ class ZDFIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'description': description,
            'uploader': uploader,
            'duration': duration,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'upload_date': upload_date,
-        }
+            'formats': formats,
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -779,6 +779,7 @@ def unified_strdate(date_str):
        '%Y-%m-%dT%H:%M:%S.%fZ',
        '%Y-%m-%dT%H:%M:%S.%f0Z',
        '%Y-%m-%dT%H:%M:%S',
        '%Y-%m-%dT%H:%M:%S.%f',
        '%Y-%m-%dT%H:%M',
    ]
    for expression in format_expressions:
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.02.21.1'
+__version__ = '2014.02.24'
Author	SHA1	Message	Date
Philipp Hagemeister	0bf5cf9886	release 2014.02.24	2014-02-24 09:44:22 +01:00
Sergey M.	919052d094	[zdf] Fix podcast extraction and use unicode literals (Closes #2446 )	2014-02-24 13:47:47 +07:00
Sergey M.	a2dafe2887	[youtube] Fix mix video regex Attributes' order in <li> is arbitrary and changes every time playlist page is fetched, so we can't rely on `data-index` to be before `data-video-username`.	2014-02-24 12:52:02 +07:00
Jaime Marquínez Ferrándiz	92661c994b	[normalboots] Modernize and simplify	2014-02-23 18:28:22 +01:00
Jaime Marquínez Ferrándiz	ffe8fe356a	[normalboots] Fix video url extraction	2014-02-23 18:06:51 +01:00
Jaime Marquínez Ferrándiz	bc2f773b4f	[youtube:playlist] Fix mixes extraction (fixes #2444 )	2014-02-23 17:17:36 +01:00
Sergey M.	f919201ecc	[vine] Extract more metadata and support low format	2014-02-23 19:02:31 +07:00
Sergey M.	7ff5d5c2e2	Add one more format to unified_strdate	2014-02-23 19:00:51 +07:00
Jaime Marquínez Ferrándiz	9b77f951c7	[breakcom] Fix error when calling _search_regex I passed `’webpage’` instead of the variable `webpage`.	2014-02-23 12:28:44 +01:00
Jaime Marquínez Ferrándiz	a25f2f990a	[breakcom] Fix info json extraction	2014-02-23 12:20:58 +01:00
Jaime Marquínez Ferrándiz	78b373975d	[vine] Fix uploader extraction	2014-02-23 12:08:30 +01:00
Philipp Hagemeister	2fcc873c4c	release 2014.02.22.1	2014-02-22 23:17:56 +01:00
Philipp Hagemeister	23c2baadb3	[videobam] Set age_limit to 18 From [their ToS](http://videobam.com/terms): "User must be eighteen 18[sic] years of age or older to use or access this web site."	2014-02-22 23:15:41 +01:00
Philipp Hagemeister	521ee82334	Fix imports	2014-02-22 23:03:12 +01:00
Philipp Hagemeister	1df96e59ce	[f4m] Clean up	2014-02-22 23:03:00 +01:00
Sergey M.	3e123c1e28	[videobam] Add support for videobam.com (Closes #2411 )	2014-02-23 04:50:05 +07:00
Philipp Hagemeister	f38da66731	Credit @soult for br	2014-02-22 20:19:41 +01:00
Philipp Hagemeister	06aabfc422	[br] Simplify	2014-02-22 20:17:26 +01:00
Philipp Hagemeister	1052d2bfec	Merge remote-tracking branch 'soult/br'	2014-02-22 17:14:47 +01:00
Philipp Hagemeister	5e0b652344	release 2014.02.22	2014-02-22 15:07:25 +01:00
Philipp Hagemeister	0f8f097183	[release.sh] Do not run tests by default We are at the point that testing takes waay too long for a release cycle, and fails way too often. Tests through travis are a better indicator than testing just before release.	2014-02-22 15:06:07 +01:00
Philipp Hagemeister	491ed3dda2	[trutube] Support multiple formats (#2433 )	2014-02-22 15:05:30 +01:00
Philipp Hagemeister	af284c6d1b	Merge remote-tracking branch 'JohnyMoSwag/master'	2014-02-22 14:38:42 +01:00
Philipp Hagemeister	41d3ec5fba	[savefrom] Add extractor (Fixes #2434 )	2014-02-22 14:36:16 +01:00
Philipp Hagemeister	0568c352f3	[canalc2] Modernize	2014-02-22 14:27:09 +01:00
Sergey M.	2e7b4cb714	[spankwire] Fix uploader id regex	2014-02-22 16:50:08 +07:00
Sergey M.	9767726b66	[spankwire] Improve and modernize	2014-02-22 16:45:03 +07:00
Johny Mo Swag	9ddfd84e41	added trutubeIE	2014-02-22 00:11:57 -08:00
David Triendl	7928024f57	[BR] Add basic test	2014-02-21 18:00:05 +01:00
David Triendl	3eb38acb43	[BR] Add "BR" extractor Extractor for videos from the Bayerischer Rundfunk Mediathek[1]. Currently only supports videos. Audio and podcasts do not work yet with this extractor. 1: http://br.de/mediathek	2014-02-21 17:58:52 +01:00
`@ -1,2 +1,2 @@`

	`__version__ = '2014.02.21.1'`	`__version__ = '2014.02.24'`