release 2014.02.04

Merge remote-tracking branch 'origin/master'
[pbs] Add support for article pages (Fixes #870 )
2014-02-04 16:33:19 +01:00 · 2014-02-04 16:33:06 +01:00 · 2014-02-04 16:31:00 +01:00 · 2014-02-04 21:31:25 +07:00 · 2014-02-04 20:53:46 +07:00 · 2014-02-04 10:37:01 +01:00
11 changed files with 336 additions and 28 deletions
--- a/test/test_signatures.py
+++ b/test/test_signatures.py
@ -0,0 +1,75 @@
 #!/usr/bin/env python
 # Allow direct execution
 import os
 import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import io
 import re
 import string
 from youtube_dl.extractor import YoutubeIE
 from youtube_dl.utils import compat_str, compat_urlretrieve
 _TESTS = [
    (
        u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
        u'js',
        86,
        u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
    ),
    (
        u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
        u'js',
        85,
        u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
    ),
 ]
 class TestSignature(unittest.TestCase):
    def setUp(self):
        TEST_DIR = os.path.dirname(os.path.abspath(__file__))
        self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
        if not os.path.exists(self.TESTDATA_DIR):
            os.mkdir(self.TESTDATA_DIR)
 def make_tfunc(url, stype, sig_length, expected_sig):
    basename = url.rpartition('/')[2]
    m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
    assert m, '%r should follow URL format' % basename
    test_id = m.group(1)
    def test_func(self):
        fn = os.path.join(self.TESTDATA_DIR, basename)
        if not os.path.exists(fn):
            compat_urlretrieve(url, fn)
        ie = YoutubeIE()
        if stype == 'js':
            with io.open(fn, encoding='utf-8') as testf:
                jscode = testf.read()
            func = ie._parse_sig_js(jscode)
        else:
            assert stype == 'swf'
            with open(fn, 'rb') as testf:
                swfcode = testf.read()
            func = ie._parse_sig_swf(swfcode)
        src_sig = compat_str(string.printable[:sig_length])
        got_sig = func(src_sig)
        self.assertEqual(got_sig, expected_sig)
    test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
    setattr(TestSignature, test_func.__name__, test_func)
 for test_spec in _TESTS:
    make_tfunc(*test_spec)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -96,6 +96,7 @@ from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE
 from .internetvideoarchive import InternetVideoArchiveIE
 from .iprima import IPrimaIE
 from .ivi import (
    IviIE,
    IviCompilationIE
@ -126,6 +127,7 @@ from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
 from .mooshare import MooshareIE
 from .mtv import (
    MTVIE,
    MTVIggyIE,
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@ -1,19 +1,14 @@
 from __future__ import unicode_literals
 import datetime
 import json
 import re
 import socket
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_request,
    ExtractorError,
    unescapeHTML,
 )
--- a/youtube_dl/extractor/collegehumor.py
+++ b/youtube_dl/extractor/collegehumor.py
@ -38,7 +38,7 @@ class CollegeHumorIE(InfoExtractor):
            'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]',
            'uploader': 'Funnyplox TV',
            'uploader_id': 'funnyploxtv',
-            'description': 'md5:506f69f7a297ed698ced3375f2363b0e',
+            'description': 'md5:11812366244110c3523968aa74f02521',
            'upload_date': '20140128',
        },
        'params': {
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@ -0,0 +1,85 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 import re
 from random import random
 from math import floor
 from .common import InfoExtractor
 from ..utils import compat_urllib_request
 class IPrimaIE(InfoExtractor):
    _VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)'
    _TESTS = [{
        'url': 'http://play.iprima.cz/particka/particka-92',
        'info_dict': {
            'id': '39152',
            'ext': 'flv',
            'title': 'Partička (92)',
            'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6',
            'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg',
        },
        'params': {
            'skip_download': True,
        },
    },
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
        webpage = self._download_webpage(url, video_id)
        player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % (
                         floor(random()*1073741824),
                         floor(random()*1073741824))
        req = compat_urllib_request.Request(player_url)
        req.add_header('Referer', url)
        playerpage = self._download_webpage(req, video_id)
        base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1])
        zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO')
        if zoneGEO != '0':
            base_url = base_url.replace('token', 'token_'+zoneGEO)
        formats = []
        for format_id in ['lq', 'hq', 'hd']:
            filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename')
            if filename == 'null':
                continue
            real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id')
            if format_id == 'lq':
                quality = 0
            elif format_id == 'hq':
                quality = 1
            elif format_id == 'hd':
                quality = 2
                filename = 'hq/'+filename
            formats.append({
                'format_id': format_id,
                'url': base_url,
                'quality': quality,
                'play_path': 'mp4:'+filename.replace('"', '')[:-4],
                'rtmp_live': True,
                'ext': 'flv',
            })
        self._sort_formats(formats)
        return {
            'id': real_id,
            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
            'description': self._og_search_description(webpage),
        }
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@ -31,7 +31,7 @@ class LifeNewsIE(InfoExtractor):
        webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page')
        video_url = self._html_search_regex(
-            r'<video.*?src="([^"]+)"></video>', webpage, 'video URL')
+            r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL')
        thumbnail = self._html_search_regex(
            r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail')
--- a/youtube_dl/extractor/mooshare.py
+++ b/youtube_dl/extractor/mooshare.py
@ -0,0 +1,114 @@
 from __future__ import unicode_literals
 import re
 import time
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    compat_urllib_request,
    compat_urllib_parse,
 )
 class MooshareIE(InfoExtractor):
    IE_NAME = 'mooshare'
    IE_DESC = 'Mooshare.biz'
    _VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})'
    _TESTS = [
        {
            'url': 'http://mooshare.biz/8dqtk4bjbp8g',
            'md5': '4e14f9562928aecd2e42c6f341c8feba',
            'info_dict': {
                'id': '8dqtk4bjbp8g',
                'ext': 'mp4',
                'title': 'Comedy Football 2011 - (part 1-2)',
                'duration': 893,
            },
        },
        {
            'url': 'http://mooshare.biz/aipjtoc4g95j',
            'info_dict': {
                'id': 'aipjtoc4g95j',
                'ext': 'mp4',
                'title': 'Orange Caramel  Dashing Through the Snow',
                'duration': 212,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            }
        }
    ]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        page = self._download_webpage(url, video_id, 'Downloading page')
        if re.search(r'>Video Not Found or Deleted<', page) is not None:
            raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
        hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
        title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
        download_form = {
            'op': 'download1',
            'id': video_id,
            'hash': hash_key,
        }
        request = compat_urllib_request.Request(
            'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        self.to_screen('%s: Waiting for timeout' % video_id)
        time.sleep(5)
        video_page = self._download_webpage(request, video_id, 'Downloading video page')
        thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
        duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
        duration = int(duration_str) if duration_str is not None else None
        formats = []
        # SD video
        mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
        if mobj is not None:
            formats.append({
                'url': mobj.group('url'),
                'format_id': 'sd',
                'format': 'SD',
            })
        # HD video
        mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
        if mobj is not None:
            formats.append({
                'url': mobj.group('url'),
                'format_id': 'hd',
                'format': 'HD',
            })
        # rtmp video
        mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
        if mobj is not None:
            formats.append({
                'url': mobj.group('rtmpurl'),
                'play_path': mobj.group('playpath'),
                'rtmp_live': False,
                'ext': 'mp4',
                'format_id': 'rtmp',
                'format': 'HD',
            })
        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
        }
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@ -1,3 +1,5 @@
 from __future__ import unicode_literals
 import re
 import json
@ -5,26 +7,59 @@ from .common import InfoExtractor
 class PBSIE(InfoExtractor):
-    _VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?'
+    _VALID_URL = r'''(?x)https?://
        (?:
            # Direct video URL
            video\.pbs\.org/video/(?P<id>[0-9]+)/? |
            # Article with embedded player
           (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) |
           # Player
           video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/
        )
    '''
    _TEST = {
-        u'url': u'http://video.pbs.org/video/2365006249/',
+        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-        u'file': u'2365006249.mp4',
+        'md5': 'ce1888486f0908d555a8093cac9a7362',
-        u'md5': 'ce1888486f0908d555a8093cac9a7362',
+        'info_dict': {
-        u'info_dict': {
+            'id': '2365006249',
-            u'title': u'A More Perfect Union',
+            'ext': 'mp4',
-            u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a',
+            'title': 'A More Perfect Union',
-            u'duration': 3190,
+            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
            'duration': 3190,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        presumptive_id = mobj.group('presumptive_id')
        display_id = presumptive_id
        if presumptive_id:
            webpage = self._download_webpage(url, display_id)
            url = self._search_regex(
                r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
                webpage, 'player URL')
            mobj = re.match(self._VALID_URL, url)
        player_id = mobj.group('player_id')
        if not display_id:
            display_id = player_id
        if player_id:
            player_page = self._download_webpage(
                url, display_id, note='Downloading player page',
                errnote='Could not download player page')
            video_id = self._search_regex(
                r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
        else:
            video_id = mobj.group('id')
            display_id = video_id
        info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
-        info_page = self._download_webpage(info_url, video_id)
+        info = self._download_json(info_url, display_id)
-        info =json.loads(info_page)
+
-        return {'id': video_id,
+        return {
            'id': video_id,
            'title': info['title'],
            'url': info['alternate_encoding']['url'],
            'ext': 'mp4',
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@ -68,13 +68,14 @@ class SubtitlesInfoExtractor(InfoExtractor):
    def _request_subtitle_url(self, sub_lang, url):
        """ makes the http request for the subtitle """
        try:
-            return self._download_subtitle_url(sub_lang, url)
+            sub = self._download_subtitle_url(sub_lang, url)
        except ExtractorError as err:
            self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
            return
        if not sub:
            self._downloader.report_warning(u'Did not fetch video subtitles')
            return
        return sub
    def _get_available_subtitles(self, video_id, webpage):
        """
--- a/youtube_dl/extractor/traileraddict.py
+++ b/youtube_dl/extractor/traileraddict.py
@ -6,6 +6,7 @@ from .common import InfoExtractor
 class TrailerAddictIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)'
    _TEST = {
        'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer',
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@
-__version__ = '2014.02.03.1'
+__version__ = '2014.02.04'
Author	SHA1	Message	Date
Philipp Hagemeister	eef726c04b	release 2014.02.04	2014-02-04 16:33:19 +01:00
Philipp Hagemeister	acf1555d76	Merge remote-tracking branch 'origin/master'	2014-02-04 16:33:06 +01:00
Philipp Hagemeister	22e7f1a6ec	[pbs] Add support for article pages (Fixes #870 )	2014-02-04 16:31:00 +01:00
Sergey M.	3c49325658	[lifenews] Fix video URL extraction (Closes #2302 )	2014-02-04 21:31:25 +07:00
Sergey M	bb1cd2bea1	[mooshare] Add support for mooshare.biz (Closes #2149 )	2014-02-04 20:53:46 +07:00
Philipp Hagemeister	fdf1f8d4ce	[collegehumor] Adapt test to changed video description	2014-02-04 10:37:01 +01:00
Philipp Hagemeister	117c8c6b97	[bliptv] Remove unused imports	2014-02-04 10:25:19 +01:00
Philipp Hagemeister	5cef4ff09b	[subtittles] Check that the result is not empty	2014-02-04 10:24:17 +01:00
Philipp Hagemeister	91264ce572	[iprima] Use centralized format sorting	2014-02-04 10:24:00 +01:00
Philipp Hagemeister	c79ef8e1ae	Merge remote-tracking branch 'pulpe/_iprima'	2014-02-04 10:21:42 +01:00
Philipp Hagemeister	58d915df51	[traileraddict] mark as broken traileraddict has changed their URL encoding scheme. I'm working on restoring support, but that may take some time.	2014-02-04 10:13:52 +01:00
pulpe	7881a64499	[iprima] Add support for play.iprima.cz	2014-02-04 07:45:41 +01:00
`@ -1,2 +1,2 @@`

	`__version__ = '2014.02.03.1'`	`__version__ = '2014.02.04'`