release 2014.12.11

[zdf:channel] Simplify (#4427 )
Merge remote-tracking branch 'akretz/master'
2014-12-11 17:08:31 +01:00 · 2014-12-11 17:07:59 +01:00 · 2014-12-11 16:35:45 +01:00 · 2014-12-11 16:34:37 +01:00 · 2014-12-11 16:33:28 +01:00 · 2014-12-11 16:32:48 +01:00
10 changed files with 128 additions and 58 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -526,7 +526,7 @@ from .youtube import (
    YoutubeUserIE,
    YoutubeWatchLaterIE,
 )
-from .zdf import ZDFIE
+from .zdf import ZDFIE, ZDFChannelIE
 from .zingmp3 import (
    ZingMp3SongIE,
    ZingMp3AlbumIE,
--- a/youtube_dl/extractor/behindkink.py
+++ b/youtube_dl/extractor/behindkink.py
@@ -10,15 +10,15 @@ from ..utils import url_basename
 class BehindKinkIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
    _TEST = {
-        'url': 'http://www.behindkink.com/2014/08/14/ab1576-performers-voice-finally-heard-the-bill-is-killed/',
-        'md5': '41ad01222b8442089a55528fec43ec01',
+        'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
+        'md5': '507b57d8fdcd75a41a9a7bdb7989c762',
        'info_dict': {
-            'id': '36370',
+            'id': '37127',
            'ext': 'mp4',
-            'title': 'AB1576 - PERFORMERS VOICE FINALLY HEARD - THE BILL IS KILLED!',
-            'description': 'The adult industry voice was finally heard as Assembly Bill 1576 remained\xa0 in suspense today at the Senate Appropriations Hearing. AB1576 was, among other industry damaging issues, a condom mandate...',
-            'upload_date': '20140814',
-            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/08/36370_AB1576_Win.jpg',
+            'title': 'What are you passionate about – Marley Blaze',
+            'description': 'md5:aee8e9611b4ff70186f752975d9b94b4',
+            'upload_date': '20141205',
+            'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
            'age_limit': 18,
        }
    }
@@ -26,26 +26,19 @@ class BehindKinkIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
-        year = mobj.group('year')
-        month = mobj.group('month')
-        day = mobj.group('day')
-        upload_date = year + month + day

        webpage = self._download_webpage(url, display_id)

        video_url = self._search_regex(
-            r"'file':\s*'([^']+)'",
-            webpage, 'URL base')
-
-        video_id = url_basename(video_url)
-        video_id = video_id.split('_')[0]
+            r'<source src="([^"]+)"', webpage, 'video URL')
+        video_id = url_basename(video_url).split('_')[0]
+        upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day')

        return {
            'id': video_id,
-            'url': video_url,
-            'ext': 'mp4',
-            'title': self._og_search_title(webpage),
            'display_id': display_id,
+            'url': video_url,
+            'title': self._og_search_title(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
            'upload_date': upload_date,
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -13,9 +13,10 @@ from ..compat import (
    compat_urllib_request,
 )
 from ..utils import (
-    urlencode_postdata,
    ExtractorError,
+    int_or_none,
    limit_length,
+    urlencode_postdata,
 )


@@ -36,7 +37,6 @@ class FacebookIE(InfoExtractor):
        'info_dict': {
            'id': '637842556329505',
            'ext': 'mp4',
-            'duration': 38,
            'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
        }
    }, {
@@ -107,9 +107,7 @@ class FacebookIE(InfoExtractor):
        self._login()

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
        webpage = self._download_webpage(url, video_id)

@@ -149,6 +147,6 @@ class FacebookIE(InfoExtractor):
            'id': video_id,
            'title': video_title,
            'url': video_url,
-            'duration': int(video_data['video_duration']),
-            'thumbnail': video_data['thumbnail_src'],
+            'duration': int_or_none(video_data.get('video_duration')),
+            'thumbnail': video_data.get('thumbnail_src'),
        }
--- a/youtube_dl/extractor/ntv.py
+++ b/youtube_dl/extractor/ntv.py
@@ -130,7 +130,7 @@ class NTVIE(InfoExtractor):
                'rtmp_conn': 'B:1',
                'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128',
                'page_url': 'http://www.ntv.ru',
-                'flash_ver': 'LNX 11,2,202,341',
+                'flash_version': 'LNX 11,2,202,341',
                'rtmp_live': True,
                'ext': 'flv',
                'filesize': int(size.text),
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -274,15 +274,18 @@ class SmotriBroadcastIE(InfoExtractor):
        broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')

        if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
-            raise ExtractorError('Broadcast %s does not exist' % broadcast_id, expected=True)
+            raise ExtractorError(
+                'Broadcast %s does not exist' % broadcast_id, expected=True)

        # Adult content
        if re.search('EroConfirmText">', broadcast_page) is not None:

            (username, password) = self._get_login_info()
            if username is None:
-                raise ExtractorError('Erotic broadcasts allowed only for registered users, '
-                                     'use --username and --password options to provide account credentials.', expected=True)
+                raise ExtractorError(
+                    'Erotic broadcasts allowed only for registered users, '
+                    'use --username and --password options to provide account credentials.',
+                    expected=True)

            login_form = {
                'login-hint53': '1',
@@ -291,9 +294,11 @@ class SmotriBroadcastIE(InfoExtractor):
                'password': password,
            }

-            request = compat_urllib_request.Request(broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
+            request = compat_urllib_request.Request(
+                broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form))
            request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            broadcast_page = self._download_webpage(request, broadcast_id, 'Logging in and confirming age')
+            broadcast_page = self._download_webpage(
+                request, broadcast_id, 'Logging in and confirming age')

            if re.search('>Неверный логин или пароль<', broadcast_page) is not None:
                raise ExtractorError('Unable to log in: bad username or password', expected=True)
@@ -303,7 +308,7 @@ class SmotriBroadcastIE(InfoExtractor):
            adult_content = False

        ticket = self._html_search_regex(
-            'window\.broadcast_control\.addFlashVar\\(\'file\', \'([^\']+)\'\\);',
+            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
            broadcast_page, 'broadcast ticket')

        url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
@@ -312,26 +317,31 @@ class SmotriBroadcastIE(InfoExtractor):
        if broadcast_password:
            url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()

-        broadcast_json_page = self._download_webpage(url, broadcast_id, 'Downloading broadcast JSON')
+        broadcast_json_page = self._download_webpage(
+            url, broadcast_id, 'Downloading broadcast JSON')

        try:
            broadcast_json = json.loads(broadcast_json_page)

            protected_broadcast = broadcast_json['_pass_protected'] == 1
            if protected_broadcast and not broadcast_password:
-                raise ExtractorError('This broadcast is protected by a password, use the --video-password option', expected=True)
+                raise ExtractorError(
+                    'This broadcast is protected by a password, use the --video-password option',
+                    expected=True)

            broadcast_offline = broadcast_json['is_play'] == 0
            if broadcast_offline:
                raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)

            rtmp_url = broadcast_json['_server']
-            if not rtmp_url.startswith('rtmp://'):
+            mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
+            if not mobj:
                raise ExtractorError('Unexpected broadcast rtmp URL')

            broadcast_playpath = broadcast_json['_streamName']
+            broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
            broadcast_thumbnail = broadcast_json['_imgURL']
-            broadcast_title = broadcast_json['title']
+            broadcast_title = self._live_title(broadcast_json['title'])
            broadcast_description = broadcast_json['description']
            broadcaster_nick = broadcast_json['nick']
            broadcaster_login = broadcast_json['login']
@@ -352,6 +362,9 @@ class SmotriBroadcastIE(InfoExtractor):
            'age_limit': 18 if adult_content else 0,
            'ext': 'flv',
            'play_path': broadcast_playpath,
+            'player_url': 'http://pics.smotri.com/broadcast_play.swf',
+            'app': broadcast_app,
            'rtmp_live': True,
-            'rtmp_conn': rtmp_conn
+            'rtmp_conn': rtmp_conn,
+            'is_live': True,
        }
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -182,8 +182,8 @@ class TVPlayIE(InfoExtractor):
            'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')

        if video['is_geo_blocked']:
-            raise ExtractorError(
-                'This content is not available in your country due to copyright reasons', expected=True)
+            self.report_warning(
+                'This content might not be available in your country due to copyright reasons')

        streams = self._download_json(
            'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -14,23 +14,24 @@ from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
-from ..utils import (
+from ..compat import (
    compat_chr,
    compat_parse_qs,
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
    compat_str,
-
+)
+from ..utils import (
    clean_html,
-    get_element_by_id,
-    get_element_by_attribute,
    ExtractorError,
+    get_element_by_attribute,
+    get_element_by_id,
    int_or_none,
    OnDemandPagedList,
+    orderedSet,
    unescapeHTML,
    unified_strdate,
-    orderedSet,
    uppercase_escape,
 )

@@ -432,7 +433,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
            'expected_warnings': [
                'DASH manifest missing',
            ]
-        }
+        },
+        # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+        {
+            'url': 'lqQg6PlCWgI',
+            'info_dict': {
+                'id': 'lqQg6PlCWgI',
+                'ext': 'mp4',
+                'upload_date': '20120731',
+                'uploader_id': 'olympic',
+                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+                'uploader': 'Olympics',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+            },
+            'params': {
+                'skip_download': 'requires avconv',
+            }
+        },
    ]

    def __init__(self, *args, **kwargs):
@@ -682,7 +699,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
        return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')

-    def _parse_dash_manifest(self, video_id, dash_manifest_url):
+    def _parse_dash_manifest(
+            self, video_id, dash_manifest_url, player_url, age_gate):
        def decrypt_sig(mobj):
            s = mobj.group(1)
            dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
@@ -855,7 +873,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):

        m_cat_container = self._search_regex(
            r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
-            video_webpage, 'categories', fatal=False)
+            video_webpage, 'categories', default=None)
        if m_cat_container:
            category = self._html_search_regex(
                r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
@@ -933,7 +951,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                'url': video_info['conn'][0],
                'player_url': player_url,
            }]
-        elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1:
+        elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
            encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
            if 'rtmpe%3Dyes' in encoded_url_map:
                raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
@@ -999,13 +1017,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        # Look for the DASH manifest
        if self._downloader.params.get('youtube_include_dash_manifest', True):
            dash_mpd = video_info.get('dashmpd')
-            if not dash_mpd:
-                self.report_warning('%s: DASH manifest missing' % video_id)
-            else:
+            if dash_mpd:
                dash_manifest_url = dash_mpd[0]
                try:
                    dash_formats = self._parse_dash_manifest(
-                        video_id, dash_manifest_url)
+                        video_id, dash_manifest_url, player_url, age_gate)
                except (ExtractorError, KeyError) as e:
                    self.report_warning(
                        'Skipping DASH manifest: %r' % e, video_id)
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -1,12 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import functools
 import re

 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    unified_strdate,
+    OnDemandPagedList,
 )


@@ -87,7 +89,7 @@ def extract_from_xml_url(ie, video_id, xml_url):


 class ZDFIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.zdf\.de/ZDFmediathek(?P<hash>#)?/(.*beitrag/(?:video/)?)(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'
+    _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?:\?.*)?'

    _TEST = {
        'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt',
@@ -106,6 +108,52 @@ class ZDFIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-
        xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
        return extract_from_xml_url(self, video_id, xml_url)
+
+
+class ZDFChannelIE(InfoExtractor):
+    _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic',
+        'info_dict': {
+            'id': '1586442',
+        },
+        'playlist_count': 4,
+    }
+    _PAGE_SIZE = 50
+
+    def _fetch_page(self, channel_id, page):
+        offset = page * self._PAGE_SIZE
+        xml_url = (
+            'http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset=%d&maxLength=%d&id=%s'
+            % (offset, self._PAGE_SIZE, channel_id))
+        doc = self._download_xml(
+            xml_url, channel_id,
+            note='Downloading channel info',
+            errnote='Failed to download channel info')
+
+        title = doc.find('.//information/title').text
+        description = doc.find('.//information/detail').text
+        for asset in doc.findall('.//teasers/teaser'):
+            a_type = asset.find('./type').text
+            a_id = asset.find('./details/assetId').text
+            if a_type not in ('video', 'topic'):
+                continue
+            yield {
+                '_type': 'url',
+                'playlist_title': title,
+                'playlist_description': description,
+                'url': 'zdf:%s:%s' % (a_type, a_id),
+            }
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, channel_id), self._PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'entries': entries,
+        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -712,8 +712,10 @@ def date_from_str(date_str):
    Return a datetime object from a string in the format YYYYMMDD or
    (now|today)[+-][0-9](day|week|month|year)(s)?"""
    today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
        return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
    match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
    if match is not None:
        sign = match.group('sign')
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2014.12.10.1'
+__version__ = '2014.12.11'
Author	SHA1	Message	Date
Philipp Hagemeister	cb007f47c1	release 2014.12.11	2014-12-11 17:08:31 +01:00
Philipp Hagemeister	9abd500a74	[zdf:channel] Simplify (#4427 )	2014-12-11 17:07:59 +01:00
Philipp Hagemeister	cf68bcaeff	Merge remote-tracking branch 'akretz/master'	2014-12-11 16:35:45 +01:00
Philipp Hagemeister	cbe2bd914d	[youtube] Amend test	2014-12-11 16:34:37 +01:00
Philipp Hagemeister	75111274ed	[youtube] Do not warn if DASH manifest is missing (#4442 )	2014-12-11 16:33:28 +01:00
Philipp Hagemeister	624dcebff6	[youtube] Make category optional (#4442 )	2014-12-11 16:32:48 +01:00
Philipp Hagemeister	9684f17cde	Merge remote-tracking branch 'akretz/youtube_fix'	2014-12-11 16:28:10 +01:00
Philipp Hagemeister	e52a40abf7	[youtube] Add test case for #4431	2014-12-11 16:28:07 +01:00
Philipp Hagemeister	0daa05961b	Merge branch 'master' of github.com:rg3/youtube-dl	2014-12-11 16:23:01 +01:00
Naglis Jonaitis	158731f83e	[tvplay] Don't raise an exception if `is_geo_blocked` is True Videos which return `is_geo_blocked' to be True can actually be downloaded from the country to which the video is restricted	2014-12-11 17:07:50 +02:00
Adrian Kretz	24270b0301	[youtube] The case that 'url_encoded_fmt_stream_map' or 'adaptive_fmts' is the empty string is handled accordingly (fixes #4431 )	2014-12-11 16:00:46 +01:00
Naglis Jonaitis	3c1b81b957	[ntv] Rename `flash_ver` to `flash_version` in the format dict RTMP downloader uses `flash_version`	2014-12-11 16:58:45 +02:00
Philipp Hagemeister	45c24df512	Merge branch 'master' of github.com:rg3/youtube-dl	2014-12-11 15:27:54 +01:00
Sergey M․	bf671b605e	[behindkink] Remove superfluous whitespace	2014-12-11 20:09:52 +06:00
Sergey M․	09c82fbc9a	[behindkink] Simplify	2014-12-11 20:06:19 +06:00
Sergey M.	3bca0409fe	Merge pull request #4440 from 5moufl/behindkink-fix [BehindKink] update	2014-12-11 19:58:31 +06:00
5moufl	d6f78a354d	[BehindKink] Replace test Old one is not accessible anymore	2014-12-11 14:26:59 +01:00
5moufl	e0b9d47387	[BehindKink] Update URL extraction	2014-12-11 14:25:26 +01:00
Philipp Hagemeister	f8795e102b	[utils] Add "yesterday" as a date keyword	2014-12-11 10:29:30 +01:00
Philipp Hagemeister	4bb4a18876	[youtube] Fix imports	2014-12-11 10:08:17 +01:00
Adrian Kretz	8560c61842	[zdf] Add support for channels	2014-12-10 17:29:03 +01:00
Sergey M․	a81bbebf44	[smotri:broadcast] Fix extraction	2014-12-10 20:22:49 +06:00
Philipp Hagemeister	72e3ffeb74	release 2014.12.10.3	2014-12-10 15:19:08 +01:00
Philipp Hagemeister	2fc9f2b41d	[facebook] Make thumbnail and duration optional Fixes #4425. Looks like both properties aren't given to us anymore. For now, just fall back to not returning them.	2014-12-10 15:18:36 +01:00
Philipp Hagemeister	5f3544baa3	release 2014.12.10.2	2014-12-10 14:39:06 +01:00
Philipp Hagemeister	da27660014	[youtube] Pass in all variables to DASH manifest (Fixes #4424 )	2014-12-10 14:39:00 +01:00