release 2015.02.19.2

[imgur] Add new extractor
[cbs] Modernize
2015-02-19 01:43:28 +01:00 · 2015-02-19 01:43:20 +01:00 · 2015-02-19 01:22:50 +01:00 · 2015-02-19 01:04:24 +01:00 · 2015-02-19 01:04:19 +01:00 · 2015-02-19 00:38:05 +01:00
7 changed files with 134 additions and 16 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -121,6 +121,7 @@
 - **EllenTV**
 - **EllenTV:clips**
 - **ElPais**: El País
+ - **Embedly**
 - **EMPFlix**
 - **Engadget**
 - **Eporner**
@ -190,6 +191,7 @@
 - **ign.com**
 - **imdb**: Internet Movie Database trailers
 - **imdb:list**: Internet Movie Database lists
+ - **Imgur**
 - **Ina**
 - **InfoQ**
 - **Instagram**
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -121,6 +121,7 @@ from .ellentv import (
    EllenTVClipsIE,
 )
 from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
 from .empflix import EMPFlixIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
@ -204,6 +205,7 @@ from .imdb import (
    ImdbIE,
    ImdbListIE
 )
+from .imgur import ImgurIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .instagram import InstagramIE, InstagramUserIE
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@ -39,8 +37,7 @@ class CBSIE(InfoExtractor):
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        real_id = self._search_regex(
            r"video\.settings\.pid\s*=\s*'([^']+)';",
--- a/youtube_dl/extractor/embedly.py
+++ b/youtube_dl/extractor/embedly.py
@ -0,0 +1,16 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class EmbedlyIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
+    _TESTS = [{
+        'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@ -0,0 +1,84 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    js_to_json,
+    mimetype2ext,
+)
+
+
+class ImgurIE(InfoExtractor):
+    _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)'
+
+    _TESTS = [{
+        'url': 'https://i.imgur.com/A61SaA1.gifv',
+        'info_dict': {
+            'id': 'A61SaA1',
+            'ext': 'mp4',
+            'title': 'MRW gifv is up and running without any bugs',
+            'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        width = int_or_none(self._search_regex(
+            r'<param name="width" value="([0-9]+)"',
+            webpage, 'width', fatal=False))
+        height = int_or_none(self._search_regex(
+            r'<param name="height" value="([0-9]+)"',
+            webpage, 'height', fatal=False))
+
+        formats = []
+        video_elements = self._search_regex(
+            r'(?s)<div class="video-elements">(.*?)</div>',
+            webpage, 'video elements')
+        formats = []
+        for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
+            formats.append({
+                'format_id': m.group('type').partition('/')[2],
+                'url': self._proto_relative_url(m.group('src')),
+                'ext': mimetype2ext(m.group('type')),
+                'acodec': 'none',
+                'width': width,
+                'height': height,
+                'http_headers': {
+                    'User-Agent': 'youtube-dl (like wget)',
+                },
+            })
+
+        gif_json = self._search_regex(
+            r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
+            webpage, 'GIF code', fatal=False)
+        if gif_json:
+            gifd = self._parse_json(
+                gif_json, video_id, transform_source=js_to_json)
+            formats.append({
+                'format_id': 'gif',
+                'preference': -10,
+                'width': width,
+                'height': height,
+                'ext': 'gif',
+                'acodec': 'none',
+                'vcodec': 'gif',
+                'container': 'gif',
+                'url': self._proto_relative_url(gifd['gifUrl']),
+                'filesize': gifd.get('size'),
+                'http_headers': {
+                    'User-Agent': 'youtube-dl (like wget)',
+                },
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'description': self._og_search_description(webpage),
+            'title': self._og_search_title(webpage),
+        }
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@ -1,9 +1,6 @@
 # encoding: utf-8
 from __future__ import unicode_literals

-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
    js_to_json,
@ -11,7 +8,7 @@ from ..utils import (


 class PatreonIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
+    _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
    _TESTS = [
        {
            'url': 'http://www.patreon.com/creation?hid=743933',
@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor):
                'thumbnail': 're:^https?://.*$',
            },
        },
+        {
+            'url': 'https://www.patreon.com/creation?hid=1682498',
+            'info_dict': {
+                'id': 'SU4fj_aEMVw',
+                'ext': 'mp4',
+                'title': 'I\'m on Patreon!',
+                'uploader': 'TraciJHines',
+                'thumbnail': 're:^https?://.*$',
+                'upload_date': '20150211',
+                'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+                'uploader_id': 'TraciJHines',
+            },
+            'params': {
+                'noplaylist': True,
+                'skip_download': True,
+            }
+        }
    ]

    # Currently Patreon exposes download URL via hidden CSS, so login is not
@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor):
    '''

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage).strip()

        attach_fn = self._html_search_regex(
            r'<div class="attach"><a target="_blank" href="([^"]+)">',
            webpage, 'attachment URL', default=None)
+        embed = self._html_search_regex(
+            r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"',
+            webpage, 'embedded URL', default=None)
+
        if attach_fn is not None:
            video_url = 'http://www.patreon.com' + attach_fn
            thumbnail = self._og_search_thumbnail(webpage)
            uploader = self._html_search_regex(
                r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
+        elif embed is not None:
+            return self.url_result(embed)
        else:
-            playlist_js = self._search_regex(
+            playlist = self._parse_json(self._search_regex(
                r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
-                webpage, 'playlist JSON')
-            playlist_json = js_to_json(playlist_js)
-            playlist = json.loads(playlist_json)
+                webpage, 'playlist JSON'),
+                video_id, transform_source=js_to_json)
            data = playlist[0]
            video_url = self._proto_relative_url(data['mp3'])
            thumbnail = self._proto_relative_url(data.get('cover'))
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2015.02.19'
+__version__ = '2015.02.19.2'
Author	SHA1	Message	Date
Philipp Hagemeister	1ac1af9b47	release 2015.02.19.2	2015-02-19 01:43:28 +01:00
Philipp Hagemeister	3bf5705316	[imgur] Add new extractor	2015-02-19 01:43:20 +01:00
Philipp Hagemeister	1c2528c8a3	[cbs] Modernize	2015-02-19 01:22:50 +01:00
Philipp Hagemeister	7bd15b1a03	release 2015.02.19.1	2015-02-19 01:04:24 +01:00
Philipp Hagemeister	6b961a85fd	[patreon] Add support for embedlies (fixes #4969 )	2015-02-19 01:04:19 +01:00
Philipp Hagemeister	7707004043	[patreon] Modernize	2015-02-19 00:38:05 +01:00