release 2015.02.11

[youtube] Extract average rating (closes #2362 )
flake8: Ignore some error added in pep8 1.6
2015-02-11 19:02:39 +01:00 · 2015-02-11 18:39:31 +01:00 · 2015-02-11 18:15:15 +01:00 · 2015-02-11 18:15:15 +01:00 · 2015-02-11 22:33:03 +06:00 · 2015-02-11 17:25:04 +01:00
12 changed files with 128 additions and 110 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -3,4 +3,4 @@ universal = True

 [flake8]
 exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git
-ignore = E501
+ignore = E402,E501,E731
--- a/test/parameters.json
+++ b/test/parameters.json
@ -39,5 +39,6 @@
    "writesubtitles": false,
    "allsubtitles": false,
    "listssubtitles": false,
-    "socket_timeout": 20
+    "socket_timeout": 20,
+    "fixup": "never"
 }
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
-
 import base64
 from math import ceil

@ -329,3 +327,5 @@ def inc(data):
            data[i] = data[i] + 1
            break
    return data
+
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@ -15,12 +15,13 @@ from ..utils import (

 class CanalplusIE(InfoExtractor):
    IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'
-    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
+    _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))'
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s'
    _SITE_ID_MAP = {
        'canalplus.fr': 'cplus',
        'piwiplus.fr': 'teletoon',
        'd8.tv': 'd8',
+        'itele.fr': 'itele',
    }

    _TESTS = [{
@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor):
            'upload_date': '20131108',
        },
        'skip': 'videos get deleted after a while',
+    }, {
+        'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559',
+        'md5': '65aa83ad62fe107ce29e564bb8712580',
+        'info_dict': {
+            'id': '1213714',
+            'ext': 'flv',
+            'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',
+            'description': 'md5:8216206ec53426ea6321321f3b3c16db',
+            'upload_date': '20150211',
+        },
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -157,6 +157,7 @@ class InfoExtractor(object):
    view_count:     How many users have watched the video on the platform.
    like_count:     Number of positive ratings of the video
    dislike_count:  Number of negative ratings of the video
+    average_rating: Average rating give by users, the scale used depends on the webpage
    comment_count:  Number of comments on the video
    comments:       A list of comments, each with one or more of the following
                    properties (all but one of text or html optional):
@ -271,7 +272,7 @@ class InfoExtractor(object):
            raise
        except compat_http_client.IncompleteRead as e:
            raise ExtractorError('A network error has occured.', cause=e, expected=True)
-        except (KeyError,) as e:
+        except (KeyError, StopIteration) as e:
            raise ExtractorError('An extractor error has occured.', cause=e)

    def set_downloader(self, downloader):
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor):
            'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
            'ext': 'flv',
            'title': 'Videoinstallation für eine Kaufhausfassade'
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
        }
    }

--- a/youtube_dl/extractor/dotsub.py
+++ b/youtube_dl/extractor/dotsub.py
@ -1,13 +1,14 @@
 from __future__ import unicode_literals

-import re
-import time
-
 from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    int_or_none,
+)


 class DotsubIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27',
        'md5': '0914d4d69605090f623b7ac329fea66e',
@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor):
            'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27',
            'ext': 'flv',
            'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary',
+            'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074',
+            'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+            'duration': 3169,
            'uploader': '4v4l0n42',
-            'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com',
-            'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p',
+            'timestamp': 1292248482.625,
            'upload_date': '20101213',
+            'view_count': int,
        }
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        info_url = "https://dotsub.com/api/media/%s/metadata" % video_id
-        info = self._download_json(info_url, video_id)
-        date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds
+        video_id = self._match_id(url)
+
+        info = self._download_json(
+            'https://dotsub.com/api/media/%s/metadata' % video_id, video_id)
+        video_url = info.get('mediaURI')
+
+        if not video_url:
+            webpage = self._download_webpage(url, video_id)
+            video_url = self._search_regex(
+                r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')

        return {
            'id': video_id,
-            'url': info['mediaURI'],
+            'url': video_url,
            'ext': 'flv',
            'title': info['title'],
-            'thumbnail': info['screenshotURI'],
-            'description': info['description'],
-            'uploader': info['user'],
-            'view_count': info['numberOfViews'],
-            'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday),
+            'description': info.get('description'),
+            'thumbnail': info.get('screenshotURI'),
+            'duration': int_or_none(info.get('duration'), 1000),
+            'uploader': info.get('user'),
+            'timestamp': float_or_none(info.get('dateCreated'), 1000),
+            'view_count': int_or_none(info.get('numberOfViews')),
        }
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@ -1,18 +1,17 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse,
 )
 from ..utils import (
    ExtractorError,
+    js_to_json,
 )


 class EscapistIE(InfoExtractor):
-    _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-'
+    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
    _TEST = {
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
        'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor):
            'id': '6618',
            'ext': 'mp4',
            'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
-            'uploader': 'the-escapist-presents',
+            'uploader_id': 'the-escapist-presents',
+            'uploader': 'The Escapist Presents',
            'title': "Breaking Down Baldur's Gate",
        }
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        showName = mobj.group('showname')
-        video_id = mobj.group('id')
-
-        self.report_extraction(video_id)
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        videoDesc = self._html_search_regex(
-            r'<meta name="description" content="([^"]*)"',
-            webpage, 'description', fatal=False)
+        uploader_id = self._html_search_regex(
+            r"<h1 class='headline'><a href='/videos/view/(.*?)'",
+            webpage, 'uploader ID', fatal=False)
+        uploader = self._html_search_regex(
+            r"<h1 class='headline'>(.*?)</a>",
+            webpage, 'uploader', fatal=False)
+        description = self._html_search_meta('description', webpage)

-        playerUrl = self._og_search_video_url(webpage, name='player URL')
+        raw_title = self._html_search_meta('title', webpage, fatal=True)
+        title = raw_title.partition(' : ')[2]

-        title = self._html_search_regex(
-            r'<meta name="title" content="([^"]*)"',
-            webpage, 'title').split(' : ')[-1]
-
-        configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL')
-        configUrl = compat_urllib_parse.unquote(configUrl)
+        player_url = self._og_search_video_url(webpage, name='player URL')
+        config_url = compat_urllib_parse.unquote(self._search_regex(
+            r'config=(.*)$', player_url, 'config URL'))

        formats = []

@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor):
                cfgurl, video_id,
                'Downloading ' + name + ' configuration',
                'Unable to download ' + name + ' configuration',
-                transform_source=lambda s: s.replace("'", '"'))
+                transform_source=js_to_json)

            playlist = config['playlist']
+            video_url = next(
+                p['url'] for p in playlist
+                if p.get('eventCategory') == 'Video')
            formats.append({
-                'url': playlist[1]['url'],
+                'url': video_url,
                'format_id': name,
                'quality': quality,
            })

-        _add_format('normal', configUrl, quality=0)
-        hq_url = (configUrl +
-                  ('&hq=1' if '?' in configUrl else configUrl + '?hq=1'))
+        _add_format('normal', config_url, quality=0)
+        hq_url = (config_url +
+                  ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
        try:
            _add_format('hq', hq_url, quality=1)
        except ExtractorError:
@ -75,9 +76,10 @@ class EscapistIE(InfoExtractor):
        return {
            'id': video_id,
            'formats': formats,
-            'uploader': showName,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'description': videoDesc,
-            'player_url': playerUrl,
+            'description': description,
+            'player_url': player_url,
        }
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@ -3,7 +3,9 @@ from __future__ import unicode_literals

 from .common import InfoExtractor
 from ..utils import (
-    unescapeHTML
+    clean_html,
+    xpath_text,
+    int_or_none,
 )


@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor):
    _TESTS = [
        {
            'url': 'http://www.ntv.ru/novosti/863142/',
+            'md5': 'ba7ea172a91cb83eb734cad18c10e723',
            'info_dict': {
                'id': '746000',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
                'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 136,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/video/novosti/750370/',
+            'md5': 'adecff79691b4d71e25220a191477124',
            'info_dict': {
                'id': '750370',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
                'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 172,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+            'md5': '82dbd49b38e3af1d00df16acbeab260c',
            'info_dict': {
                'id': '747480',
-                'ext': 'flv',
-                'title': '«Сегодня». 21 марта 2014 года. 16:00 ',
-                'description': '«Сегодня». 21 марта 2014 года. 16:00 ',
+                'ext': 'mp4',
+                'title': '«Сегодня». 21 марта 2014 года. 16:00',
+                'description': '«Сегодня». 21 марта 2014 года. 16:00',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 1496,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/kino/Koma_film',
+            'md5': 'f825770930937aa7e5aca0dc0d29319a',
            'info_dict': {
-                'id': '758100',
-                'ext': 'flv',
+                'id': '1007609',
+                'ext': 'mp4',
                'title': 'Остросюжетный фильм «Кома»',
                'description': 'Остросюжетный фильм «Кома»',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 5592,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
        {
            'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+            'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
            'info_dict': {
                'id': '751482',
-                'ext': 'flv',
+                'ext': 'mp4',
                'title': '«Дело врачей»: «Деревце жизни»',
                'description': '«Дело врачей»: «Деревце жизни»',
+                'thumbnail': 're:^http://.*\.jpg',
                'duration': 2590,
            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            },
        },
    ]

@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        page = self._download_webpage(url, video_id)

-        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
+        webpage = self._download_webpage(url, video_id)

-        player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML')
-        title = unescapeHTML(player.find('./data/title').text)
-        description = unescapeHTML(player.find('./data/description').text)
+        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id')
+
+        player = self._download_xml(
+            'http://www.ntv.ru/vi%s/' % video_id,
+            video_id, 'Downloading video XML')
+        title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
+        description = clean_html(xpath_text(player, './data/description', 'description'))

        video = player.find('./data/video')
-        video_id = video.find('./id').text
-        thumbnail = video.find('./splash').text
-        duration = int(video.find('./totaltime').text)
-        view_count = int(video.find('./views').text)
-        puid22 = video.find('./puid22').text
+        video_id = xpath_text(video, './id', 'video id')
+        thumbnail = xpath_text(video, './splash', 'thumbnail')
+        duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
+        view_count = int_or_none(xpath_text(video, './views', 'view count'))

-        apps = {
-            '4': 'video1',
-            '7': 'video2',
-        }
-
-        app = apps.get(puid22, apps['4'])
+        token = self._download_webpage(
+            'http://stat.ntv.ru/services/access/token',
+            video_id, 'Downloading access token')

        formats = []
        for format_id in ['', 'hi', 'webm']:
-            file = video.find('./%sfile' % format_id)
-            if file is None:
+            file_ = video.find('./%sfile' % format_id)
+            if file_ is None:
                continue
            size = video.find('./%ssize' % format_id)
            formats.append({
-                'url': 'rtmp://media.ntv.ru/%s' % app,
-                'app': app,
-                'play_path': file.text,
-                'rtmp_conn': 'B:1',
-                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
-                'page_url': 'http://www.ntv.ru',
-                'flash_version': 'LNX 11,2,202,341',
-                'rtmp_live': True,
-                'ext': 'flv',
-                'filesize': int(size.text),
+                'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
+                'filesize': int_or_none(size.text if size is not None else None),
            })
        self._sort_formats(formats)

--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@ -30,6 +30,11 @@ class TeamcocoIE(InfoExtractor):
            }
        }
    ]
+    _VIDEO_ID_REGEXES = (
+        r'"eVar42"\s*:\s*(\d+)',
+        r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"',
+        r'"id_not"\s*:\s*(\d+)'
+    )

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -40,8 +45,7 @@ class TeamcocoIE(InfoExtractor):
        video_id = mobj.group("video_id")
        if not video_id:
            video_id = self._html_search_regex(
-                r'<div\s+class="player".*?data-id="(\d+?)"',
-                webpage, 'video id')
+                self._VIDEO_ID_REGEXES, webpage, 'video id')

        data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
        data = self._download_xml(
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -25,6 +25,7 @@ from ..compat import (
 from ..utils import (
    clean_html,
    ExtractorError,
+    float_or_none,
    get_element_by_attribute,
    get_element_by_id,
    int_or_none,
@ -1124,6 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'view_count': view_count,
            'like_count': like_count,
            'dislike_count': dislike_count,
+            'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
            'formats': formats,
        }

--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.10.4'
+__version__ = '2015.02.11'
Author	SHA1	Message	Date
Philipp Hagemeister	5277f09dfc	release 2015.02.11	2015-02-11 19:02:39 +01:00
Jaime Marquínez Ferrándiz	2d30521ab9	[youtube] Extract average rating (closes #2362 )	2015-02-11 18:39:31 +01:00
Jaime Marquínez Ferrándiz	050fa43561	flake8: Ignore some error added in pep8 1.6 * E402: we exectute code between imports, like modifying 'sys.path' in the tests * E731: we assign to lambdas in a lot of places, we may want to consider defining functions in a single line instead (what pep8 recommends)	2015-02-11 18:15:15 +01:00
Jaime Marquínez Ferrándiz	f36f92f4da	[aes] style: Put __all__ variable at the end of the file	2015-02-11 18:15:15 +01:00
Sergey M․	124f3bc67d	[dotsub] Fix extraction and modernize	2015-02-11 22:33:03 +06:00
Jaime Marquínez Ferrándiz	d304209a85	[test/parameters.json] Set 'fixup' to 'never' The fixed audio files for Youtube have a size lower that the minimum required.	2015-02-11 17:25:04 +01:00
Sergey M․	c56d7d899d	[dctptv] Skip rtmp download	2015-02-11 22:10:33 +06:00
Naglis Jonaitis	ea5db8469e	[canalplus] Add support for itele.fr URLs (Closes #4931 )	2015-02-11 16:21:52 +02:00
Naglis Jonaitis	3811c567e7	[teamcoco] Fix video id extraction	2015-02-11 15:47:19 +02:00
Sergey M․	054fe3cc40	[ntvru] Adapt to new direct delivery and modernize (Closes #4918 )	2015-02-10 21:35:34 +06:00
Philipp Hagemeister	af0d11f244	release 2015.02.10.5	2015-02-10 15:56:04 +01:00
Philipp Hagemeister	9650885be9	[escapist] Filter video differently (Fixes #4919 )	2015-02-10 15:55:51 +01:00
Philipp Hagemeister	596ac6e31f	[escapist] Modernize	2015-02-10 15:45:36 +01:00