release 2015.02.02.3

[vevo] Restore SMIL support (#3656 )
release 2015.02.02.2
2015-02-02 22:56:15 +01:00 · 2015-02-02 22:48:12 +01:00 · 2015-02-02 21:58:58 +01:00 · 2015-02-02 21:58:17 +01:00 · 2015-02-02 21:57:48 +01:00 · 2015-02-02 21:48:54 +01:00
13 changed files with 195 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -368,11 +368,11 @@ which means you can modify it, redistribute it or use it however you like.
    --add-metadata                   write metadata to the video file
    --xattrs                         write metadata to the video file's xattrs
                                     (using dublin core and xdg standards)
-    --fixup POLICY                   (experimental) Automatically correct known
-                                     faults of the file. One of never (do
-                                     nothing), warn (only emit a warning),
-                                     detect_or_warn(check whether we can do
-                                     anything about it, warn otherwise
+    --fixup POLICY                   Automatically correct known faults of the
+                                     file. One of never (do nothing), warn (only
+                                     emit a warning), detect_or_warn(the
+                                     default; fix file if we can, warn
+                                     otherwise)
    --prefer-avconv                  Prefer avconv over ffmpeg for running the
                                     postprocessors (default)
    --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -238,6 +238,8 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(parse_duration('5 s'), 5)
        self.assertEqual(parse_duration('3 min'), 180)
        self.assertEqual(parse_duration('2.5 hours'), 9000)
+        self.assertEqual(parse_duration('02:03:04'), 7384)
+        self.assertEqual(parse_duration('01:02:03:04'), 93784)

    def test_fix_xml_ampersands(self):
        self.assertEqual(
@ -371,6 +373,16 @@ class TestUtil(unittest.TestCase):
        on = js_to_json('{"abc": true}')
        self.assertEqual(json.loads(on), {'abc': True})

+        # Ignore JavaScript code as well
+        on = js_to_json('''{
+            "x": 1,
+            y: "a",
+            z: some.code
+        }''')
+        d = json.loads(on)
+        self.assertEqual(d['x'], 1)
+        self.assertEqual(d['y'], 'a')
+
    def test_clean_html(self):
        self.assertEqual(clean_html('a:\nb'), 'a: b')
        self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"')
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@ -45,6 +45,12 @@ class ExternalFD(FileDownloader):
    def supports(cls, info_dict):
        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')

+    def _source_address(self, command_option):
+        source_address = self.params.get('source_address')
+        if source_address is None:
+            return []
+        return [command_option, source_address]
+
    def _call_downloader(self, tmpfilename, info_dict):
        """ Either overwrite this or implement _make_cmd """
        cmd = self._make_cmd(tmpfilename, info_dict)
@ -72,6 +78,7 @@ class CurlFD(ExternalFD):
        cmd = [self.exe, '-o', tmpfilename]
        for key, val in info_dict['http_headers'].items():
            cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._source_address('--interface')
        cmd += ['--', info_dict['url']]
        return cmd

@ -81,6 +88,7 @@ class WgetFD(ExternalFD):
        cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
        for key, val in info_dict['http_headers'].items():
            cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._source_address('--bind-address')
        cmd += ['--', info_dict['url']]
        return cmd

@ -96,6 +104,7 @@ class Aria2cFD(ExternalFD):
        cmd += ['--out', os.path.basename(tmpfilename)]
        for key, val in info_dict['http_headers'].items():
            cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._source_address('--interface')
        cmd += ['--', info_dict['url']]
        return cmd

--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -3,6 +3,9 @@ from __future__ import unicode_literals
 import os
 import time

+from socket import error as SocketError
+import errno
+
 from .common import FileDownloader
 from ..compat import (
    compat_urllib_request,
@ -99,6 +102,11 @@ class HttpFD(FileDownloader):
                            resume_len = 0
                            open_mode = 'wb'
                            break
+            except SocketError as e:
+                if e.errno != errno.ECONNRESET:
+                    # Connection reset is no problem, just retry
+                    raise
+
            # Retry
            count += 1
            if count <= retries:
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -182,6 +182,7 @@ from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
+from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hornbunny import HornBunnyIE
 from .hostingbulk import HostingBulkIE
@ -316,7 +317,8 @@ from .nrk import (
    NRKIE,
    NRKTVIE,
 )
-from .ntv import NTVIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
 from .nytimes import NYTimesIE
 from .nuvid import NuvidIE
 from .oktoberfesttv import OktoberfestTVIE
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@ -25,9 +25,15 @@ class DRTVIE(SubtitlesInfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        programcard = self._download_json(
-            'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON')
+        webpage = self._download_webpage(url, video_id)

+        video_id = self._search_regex(
+            r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+            webpage, 'video id')
+
+        programcard = self._download_json(
+            'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
+            video_id, 'Downloading video JSON')
        data = programcard['Data'][0]

        title = data['Title']
--- a/youtube_dl/extractor/historicfilms.py
+++ b/youtube_dl/extractor/historicfilms.py
@ -0,0 +1,46 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class HistoricFilmsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?historicfilms\.com/(?:tapes/|play)(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.historicfilms.com/tapes/4728',
+        'md5': 'd4a437aec45d8d796a38a215db064e9a',
+        'info_dict': {
+            'id': '4728',
+            'ext': 'mov',
+            'title': 'Historic Films: GP-7',
+            'description': 'md5:1a86a0f3ac54024e419aba97210d959a',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 2096,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        tape_id = self._search_regex(
+            r'class="tapeId">([^<]+)<', webpage, 'tape id')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+        thumbnail = self._html_search_meta(
+            'thumbnailUrl', webpage, 'thumbnails') or self._og_search_thumbnail(webpage)
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration'))
+
+        video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id)
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }
--- a/youtube_dl/extractor/ntvde.py
+++ b/youtube_dl/extractor/ntvde.py
@ -0,0 +1,68 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    parse_duration,
+)
+
+
+class NTVDeIE(InfoExtractor):
+    IE_NAME = 'n-tv.de'
+    _VALID_URL = r'https?://(?:www\.)?n-tv\.de/mediathek/videos/[^/?#]+/[^/?#]+-article(?P<id>.+)\.html'
+
+    _TESTS = [{
+        'url': 'http://www.n-tv.de/mediathek/videos/panorama/Schnee-und-Glaette-fuehren-zu-zahlreichen-Unfaellen-und-Staus-article14438086.html',
+        'md5': '6ef2514d4b1e8e03ca24b49e2f167153',
+        'info_dict': {
+            'id': '14438086',
+            'ext': 'mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'title': 'Schnee und Glätte führen zu zahlreichen Unfällen und Staus',
+            'alt_title': 'Winterchaos auf deutschen Straßen',
+            'description': 'Schnee und Glätte sorgen deutschlandweit für einen chaotischen Start in die Woche: Auf den Straßen kommt es zu kilometerlangen Staus und Dutzenden Glätteunfällen. In Düsseldorf und München wirbelt der Schnee zudem den Flugplan durcheinander. Dutzende Flüge landen zu spät, einige fallen ganz aus.',
+            'duration': 4020,
+            'timestamp': 1422892797,
+            'upload_date': '20150202',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        info = self._parse_json(self._search_regex(
+            r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
+            video_id, transform_source=js_to_json)
+        timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
+        vdata = self._parse_json(self._search_regex(
+            r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
+            webpage, 'player data'),
+            video_id, transform_source=js_to_json)
+        duration = parse_duration(vdata.get('duration'))
+        formats = [{
+            'format_id': 'flash',
+            'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
+        }, {
+            'format_id': 'mobile',
+            'url': 'http://video.n-tv.de' + vdata['videoMp4'],
+            'tbr': 400,  # estimation
+        }]
+        m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
+        formats.extend(self._extract_m3u8_formats(
+            m3u8_url, video_id, ext='mp4',
+            entry_protocol='m3u8_native', preference=0))
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': info['headline'],
+            'description': info.get('intro'),
+            'alt_title': info.get('kicker'),
+            'timestamp': timestamp,
+            'thumbnail': vdata.get('html5VideoPoster'),
+            'duration': duration,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@ -1,15 +1,14 @@
 # encoding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
 from ..utils import (
    unescapeHTML
 )


-class NTVIE(InfoExtractor):
+class NTVRuIE(InfoExtractor):
+    IE_NAME = 'ntv.ru'
    _VALID_URL = r'http://(?:www\.)?ntv\.ru/(?P<id>.+)'

    _TESTS = [
@ -92,9 +91,7 @@ class NTVIE(InfoExtractor):
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id)

        video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id')
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@ -9,6 +9,7 @@ from ..compat import (
 )
 from ..utils import (
    ExtractorError,
+    int_or_none,
 )


@ -192,9 +193,29 @@ class VevoIE(InfoExtractor):
        # Download via HLS API
        formats.extend(self._download_api_formats(video_id))

+        # Download SMIL
+        smil_blocks = sorted((
+            f for f in video_info['videoVersions']
+            if f['sourceType'] == 13),
+            key=lambda f: f['version'])
+        smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % (
+            self._SMIL_BASE_URL, video_id, video_id.lower())
+        if smil_blocks:
+            smil_url_m = self._search_regex(
+                r'url="([^"]+)"', smil_blocks[-1]['data'], 'SMIL URL',
+                default=None)
+            if smil_url_m is not None:
+                smil_url = smil_url_m
+        if smil_url:
+            smil_xml = self._download_webpage(
+                smil_url, video_id, 'Downloading SMIL info', fatal=False)
+            if smil_xml:
+                formats.extend(self._formats_from_smil(smil_xml))
+
        self._sort_formats(formats)
-        timestamp_ms = int(self._search_regex(
-            r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
+        timestamp_ms = int_or_none(self._search_regex(
+            r'/Date\((\d+)\)/',
+            video_info['launchDate'], 'launch date', fatal=False))

        return {
            'id': video_id,
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@ -698,10 +698,9 @@ def parseOpts(overrideArguments=None):
    postproc.add_option(
        '--fixup',
        metavar='POLICY', dest='fixup', default='detect_or_warn',
-        help='(experimental) Automatically correct known faults of the file. '
+        help='Automatically correct known faults of the file. '
             'One of never (do nothing), warn (only emit a warning), '
-             'detect_or_warn(check whether we can do anything about it, warn '
-             'otherwise')
+             'detect_or_warn(the default; fix file if we can, warn otherwise)')
    postproc.add_option(
        '--prefer-avconv',
        action='store_false', dest='prefer_ffmpeg',
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1275,7 +1275,10 @@ def parse_duration(s):
            (?P<only_hours>[0-9.]+)\s*(?:hours?)|

            (?:
-                (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
+                (?:
+                    (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
+                    (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
+                )?
                (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
            )?
            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
@ -1293,6 +1296,8 @@ def parse_duration(s):
        res += int(m.group('mins')) * 60
    if m.group('hours'):
        res += int(m.group('hours')) * 60 * 60
+    if m.group('days'):
+        res += int(m.group('days')) * 24 * 60 * 60
    if m.group('ms'):
        res += float(m.group('ms'))
    return res
@ -1543,7 +1548,7 @@ def js_to_json(code):
    res = re.sub(r'''(?x)
        "(?:[^"\\]*(?:\\\\|\\")?)*"|
        '(?:[^'\\]*(?:\\\\|\\')?)*'|
-        [a-zA-Z_][a-zA-Z_0-9]*
+        [a-zA-Z_][.a-zA-Z_0-9]*
        ''', fix_kv, code)
    res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
    return res
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.02'
+__version__ = '2015.02.02.3'
Author	SHA1	Message	Date
Philipp Hagemeister	3a4cca687f	release 2015.02.02.3	2015-02-02 22:56:15 +01:00
Philipp Hagemeister	7d3d06a16c	[vevo] Restore SMIL support (#3656 )	2015-02-02 22:48:12 +01:00
Philipp Hagemeister	c21b1fbeeb	release 2015.02.02.2	2015-02-02 21:58:58 +01:00
Philipp Hagemeister	f920ce295e	[ntvru] Remove unused import	2015-02-02 21:58:17 +01:00
Philipp Hagemeister	7a7bd19c45	[n-tv.de] Use native m3u8 as best format	2015-02-02 21:57:48 +01:00
Philipp Hagemeister	8f4b58d70e	[ntvde] Add new extractor (Fixes #4850 )	2015-02-02 21:48:54 +01:00
Philipp Hagemeister	3fd45e03bf	[ntvru] Rename from NTV to clarify the difference between n-tv.de and ntv.ru	2015-02-02 20:43:02 +01:00
Philipp Hagemeister	869b4aeff4	release 2015.02.02.1	2015-02-02 20:35:04 +01:00
Philipp Hagemeister	cc9ca3ba6e	[downloader/external] Simplify source_address '' might actually be passed in, so check for None.	2015-02-02 20:33:25 +01:00
Philipp Hagemeister	ea71034bd3	Merge remote-tracking branch 'origin/master' Conflicts: youtube_dl/downloader/external.py	2015-02-02 20:32:07 +01:00
Philipp Hagemeister	9fffd0469f	[options] Mark --fixup as non-experimental and correct its help	2015-02-02 20:28:18 +01:00
Sergey M․	ae7773942e	[downloader/external] Simplify	2015-02-02 21:51:38 +06:00
Sergey M․	469a64cebf	[downloader/external] Simplify	2015-02-02 21:40:52 +06:00
Sergey M.	aae3fdcfae	Merge pull request #4845 from vijayanandnandam/master Passing source address option to external downloaders	2015-02-02 21:38:22 +06:00
vijayanand nandam	6a66904f8e	passing source address option to external downloaders	2015-02-02 20:51:40 +05:30
Sergey M․	78271e3319	[drtv] Extract material id (Closes #4814 )	2015-02-02 21:11:25 +06:00
Sergey M․	92bf0bcdf8	[historicfilms] Add extractor (Closes #4825 )	2015-02-02 20:52:37 +06:00
Philipp Hagemeister	1283204917	[http] PEP8 (#4831 )	2015-02-02 12:05:39 +01:00
Philipp Hagemeister	6789defea9	Merge pull request #4831 from light94/master Handling Connection Reset by Peer Error	2015-02-02 12:03:28 +01:00
light94	e77d2975af	Handling Connection Reset by Peer Error	2015-02-01 00:10:58 +05:30