release 2017.09.02

[ChangeLog] Actualize
[youtube] Force old layout for each webpage (closes #14083 )
2017-09-02 01:08:32 +07:00 · 2017-09-02 01:04:22 +07:00 · 2017-09-02 00:58:19 +07:00 · 2017-08-31 00:47:58 +07:00 · 2017-08-30 23:50:33 +07:00 · 2017-08-30 05:27:56 +07:00
9 changed files with 141 additions and 53 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.08.27.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.08.27.1**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.09.02*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.09.02**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.08.27.1
+[debug] youtube-dl version 2017.09.02
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/13
+++ b/13
@@ -1,3 +1,16 @@
+version 2017.09.02
+
+Extractors
+* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076,
+  #14077, #14079, #14082, #14083, #14094, #14095, #14096)
+* [youtube] Fix upload date extraction (#14065)
+ [charlierose] Add support for episodes (#14062)
+ [bbccouk] Add support for w-prefixed ids (#14056)
+* [googledrive] Extend URL regular expression (#9785)
+ [googledrive] Add support for source format (#14046)
+* [pornhd] Fix extraction (#14005)
+
+
 version 2017.08.27.1

 Extractors
--- a/4
+++ b/4
@@ -49,11 +49,11 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
 	mkdir -p zip
 	for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
 	  mkdir -p zip/$$d ;\
-	  cp -a $$d/*.py zip/$$d/ ;\
+	  cp -pPR $$d/*.py zip/$$d/ ;\
 	done
 	touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
 	mv zip/youtube_dl/__main__.py zip/
-	cd zip ; zip --quiet ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
+	cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
 	rm -rf zip
 	echo '#!$(PYTHON)' > youtube-dl
 	cat youtube-dl.zip >> youtube-dl
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -29,7 +29,7 @@ from ..compat import (
 class BBCCoUkIE(InfoExtractor):
    IE_NAME = 'bbc.co.uk'
    IE_DESC = 'BBC iPlayer'
-    _ID_REGEX = r'[pb][\da-z]{7}'
+    _ID_REGEX = r'[pbw][\da-z]{7}'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.)?bbc\.co\.uk/
@@ -233,6 +233,9 @@ class BBCCoUkIE(InfoExtractor):
        }, {
            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
            'only_matching': True,
+        }, {
+            'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+            'only_matching': True,
        }]

    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
--- a/youtube_dl/extractor/charlierose.py
+++ b/youtube_dl/extractor/charlierose.py
@@ -5,7 +5,7 @@ from ..utils import remove_end


 class CharlieRoseIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://charlierose.com/videos/27996',
        'md5': 'fda41d49e67d4ce7c2411fd2c4702e09',
@@ -24,6 +24,9 @@ class CharlieRoseIE(InfoExtractor):
    }, {
        'url': 'https://charlierose.com/videos/27996',
        'only_matching': True,
+    }, {
+        'url': 'https://charlierose.com/episodes/30887?autoplay=true',
+        'only_matching': True,
    }]

    _PLAYER_BASE = 'https://charlierose.com/video/player/%s'
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -4,6 +4,7 @@ import re

 from .common import InfoExtractor
 from ..utils import (
+    determine_ext,
    ExtractorError,
    int_or_none,
    lowercase_escape,
@@ -12,27 +13,53 @@ from ..utils import (


 class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
+    _VALID_URL = r'''(?x)
+                        https?://
+                            (?:
+                                (?:docs|drive)\.google\.com/
+                                (?:
+                                    (?:uc|open)\?.*?id=|
+                                    file/d/
+                                )|
+                                video\.google\.com/get_player\?.*?docid=
+                            )
+                            (?P<id>[a-zA-Z0-9_-]{28,})
+                    '''
    _TESTS = [{
        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
-        'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
+        'md5': '5c602afbbf2c1db91831f5d82f678554',
        'info_dict': {
            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
            'ext': 'mp4',
            'title': 'Big Buck Bunny.mp4',
            'duration': 45,
        }
+    }, {
+        # video can't be watched anonymously due to view count limit reached,
+        # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
+        'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
+        'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
+        'info_dict': {
+            'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
+            'ext': 'mp4',
+            'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
+        }
    }, {
        # video id is longer than 28 characters
        'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
-        'md5': 'c230c67252874fddd8170e3fd1a45886',
        'info_dict': {
            'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
            'ext': 'mp4',
            'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
            'duration': 189,
        },
-        'only_matching': True
+        'only_matching': True,
+    }, {
+        'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+        'only_matching': True,
+    }, {
+        'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
+        'only_matching': True,
    }]
    _FORMATS_EXT = {
        '5': 'flv',
@@ -147,21 +174,21 @@ class GoogleDriveIE(InfoExtractor):
        webpage = self._download_webpage(
            'http://docs.google.com/file/d/%s' % video_id, video_id)

-        reason = self._search_regex(
-            r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
-        if reason:
-            raise ExtractorError(reason)
-
-        title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
+        title = self._search_regex(
+            r'"title"\s*,\s*"([^"]+)', webpage, 'title',
+            default=None) or self._og_search_title(webpage)
        duration = int_or_none(self._search_regex(
            r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
            default=None))
+
+        formats = []
        fmt_stream_map = self._search_regex(
            r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
-            'fmt stream map').split(',')
+            'fmt stream map', default='').split(',')
        fmt_list = self._search_regex(
-            r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
-
+            r'"fmt_list"\s*,\s*"([^"]+)', webpage,
+            'fmt_list', default='').split(',')
+        if fmt_stream_map and fmt_list:
            resolutions = {}
            for fmt in fmt_list:
                mobj = re.search(
@@ -170,7 +197,6 @@ class GoogleDriveIE(InfoExtractor):
                    resolutions[mobj.group('format_id')] = (
                        int(mobj.group('width')), int(mobj.group('height')))

-        formats = []
            for fmt_stream in fmt_stream_map:
                fmt_stream_split = fmt_stream.split('|')
                if len(fmt_stream_split) < 2:
@@ -188,6 +214,44 @@ class GoogleDriveIE(InfoExtractor):
                        'height': resolution[1],
                    })
                formats.append(f)
+
+        source_url = update_url_query(
+            'https://drive.google.com/uc', {
+                'id': video_id,
+                'export': 'download',
+            })
+        urlh = self._request_webpage(
+            source_url, video_id, note='Requesting source file',
+            errnote='Unable to request source file', fatal=False)
+        if urlh:
+            def add_source_format(src_url):
+                formats.append({
+                    'url': src_url,
+                    'ext': determine_ext(title, 'mp4').lower(),
+                    'format_id': 'source',
+                    'quality': 1,
+                })
+            if urlh.headers.get('Content-Disposition'):
+                add_source_format(source_url)
+            else:
+                confirmation_webpage = self._webpage_read_content(
+                    urlh, url, video_id, note='Downloading confirmation page',
+                    errnote='Unable to confirm download', fatal=False)
+                if confirmation_webpage:
+                    confirm = self._search_regex(
+                        r'confirm=([^&"\']+)', confirmation_webpage,
+                        'confirmation code', fatal=False)
+                    if confirm:
+                        add_source_format(update_url_query(source_url, {
+                            'confirm': confirm,
+                        }))
+
+        if not formats:
+            reason = self._search_regex(
+                r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+            if reason:
+                raise ExtractorError(reason, expected=True)
+
        self._sort_formats(formats)

        hl = self._search_regex(
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -54,7 +54,7 @@ class PornHdIE(InfoExtractor):
             r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')

        sources = self._parse_json(js_to_json(self._search_regex(
-            r"(?s)sources'?\s*:\s*(\{.+?\})\s*\}[;,)]",
+            r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
            webpage, 'sources', default='{}')), video_id)

        if not sources:
@@ -82,7 +82,8 @@ class PornHdIE(InfoExtractor):
        view_count = int_or_none(self._html_search_regex(
            r'(\d+) views\s*<', webpage, 'view count', fatal=False))
        thumbnail = self._search_regex(
-            r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False)
+            r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
+            'thumbnail', fatal=False, group='url')

        return {
            'id': video_id,
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
 from ..compat import (
    compat_chr,
+    compat_kwargs,
    compat_parse_qs,
    compat_urllib_parse_unquote,
    compat_urllib_parse_unquote_plus,
@@ -245,6 +246,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):

        return True

+    def _download_webpage(self, *args, **kwargs):
+        kwargs.setdefault('query', {})['disable_polymer'] = 'true'
+        return super(YoutubeBaseInfoExtractor, self)._download_webpage(
+            *args, **compat_kwargs(kwargs))
+
    def _real_initialize(self):
        if self._downloader is None:
            return
@@ -1665,10 +1671,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        if not upload_date:
            upload_date = self._search_regex(
                [r'(?s)id="eow-date.*?>(.*?)</span>',
-                 r'id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live|Started) on (.+?)</strong>'],
+                 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
                video_webpage, 'upload date', default=None)
-            if upload_date:
-                upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
        upload_date = unified_strdate(upload_date)

        video_license = self._html_search_regex(
@@ -2054,7 +2058,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                     |
                        (%(playlist_id)s)
                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
-    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
    IE_NAME = 'youtube:playlist'
    _TESTS = [{
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2017.08.27.1'
+__version__ = '2017.09.02'
Author	SHA1	Message	Date
Sergey M․	a2022b0c40	release 2017.09.02	2017-09-02 01:08:32 +07:00
Sergey M․	8681ed7fc8	[ChangeLog] Actualize	2017-09-02 01:04:22 +07:00
Sergey M․	8d81f3e36d	[youtube] Force old layout for each webpage (closes #14083 )	2017-09-02 00:58:19 +07:00
Sergey M․	7998520933	[youtube] Fix upload date extraction (closes #14065 )	2017-08-31 00:47:58 +07:00
Sergey M․	5b4bfbfc3b	[charlierose] Add support for episodes (closes #14062 )	2017-08-30 23:50:33 +07:00
Sergey M․	53647dfd0a	[bbccouk] Add support for w-prefixed ids (closes #14056 )	2017-08-30 05:27:56 +07:00
Yen Chi Hsuan	22f65a9efc	Merge pull request #14048 from ryandesign/patch-1 Fix build failures with old cp and zip	2017-08-28 11:22:27 +08:00
Ryan Schmidt	c75c384fb6	Fix build failures with old cp and zip	2017-08-27 18:07:09 -05:00
Sergey M․	1b41da488d	[googledrive] Extend _VALID_URL (closes #9785 )	2017-08-28 00:50:41 +07:00
Sergey M․	fea82c1780	[googledrive] Add support for source format (closes #14046 )	2017-08-28 00:39:22 +07:00
Sergey M․	3902cdd0e3	[pornhd] Fix extraction (closes #14005 )	2017-08-27 22:37:26 +07:00