release 2014.04.01.1

[comedycentral] Allow URLs with query parts (fixes #2661 )
release 2014.04.01
2014-04-01 00:25:17 +02:00 · 2014-04-01 00:25:11 +02:00 · 2014-04-01 00:02:39 +02:00 · 2014-04-01 00:02:32 +02:00 · 2014-03-31 19:31:48 +07:00
4 changed files with 53 additions and 34 deletions
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -144,7 +144,15 @@ class TestAllURLsMatching(unittest.TestCase):
        self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS'])

    def test_ComedyCentralShows(self):
-        self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
+            ['ComedyCentralShows'])
+        self.assertMatch(
+            'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
+            ['ComedyCentralShows'])

 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -43,11 +43,13 @@ class ComedyCentralShowsIE(InfoExtractor):
                          (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
                         (full-episodes/(?P<episode>.*)|
                          (?P<clip>
-                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
-                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
+                              (?:videos/[^/]+/(?P<videotitle>[^/?#]+))
+                              |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
+                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
+                          )|
                          (?P<interview>
                              extended-interviews/(?P<interID>[0-9a-z]+)/(?:playlist_tds_extended_)?(?P<interview_title>.*?)(/.*?)?)))
-                     $'''
+                     (?:[?#].*|$)'''
    _TEST = {
        'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
        'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
@@ -102,7 +104,9 @@ class ComedyCentralShowsIE(InfoExtractor):
            assert mobj is not None

        if mobj.group('clip'):
-            if mobj.group('showname') == 'thedailyshow':
+            if mobj.group('videotitle'):
+                epTitle = mobj.group('videotitle')
+            elif mobj.group('showname') == 'thedailyshow':
                epTitle = mobj.group('tdstitle')
            else:
                epTitle = mobj.group('cntitle')
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
 import re
 import os

@@ -5,45 +7,50 @@ from .common import InfoExtractor


 class PyvideoIE(InfoExtractor):
-    _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
-    _TESTS = [{
-        u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
-        u'file': u'24_4WWkSmNo.mp4',
-        u'md5': u'de317418c8bc76b1fd8633e4f32acbc6',
-        u'info_dict': {
-            u"title": u"Become a logging expert in 30 minutes",
-            u"description": u"md5:9665350d466c67fb5b1598de379021f7",
-            u"upload_date": u"20130320",
-            u"uploader": u"NextDayVideo",
-            u"uploader_id": u"NextDayVideo",
+    _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'
+
+    _TESTS = [
+        {
+            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
+            'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+            'info_dict': {
+                'id': '24_4WWkSmNo',
+                'ext': 'mp4',
+                'title': 'Become a logging expert in 30 minutes',
+                'description': 'md5:9665350d466c67fb5b1598de379021f7',
+                'upload_date': '20130320',
+                'uploader': 'NextDayVideo',
+                'uploader_id': 'NextDayVideo',
+            },
+            'add_ie': ['Youtube'],
        },
-        u'add_ie': ['Youtube'],
-    },
-    {
-        u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
-        u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12',
-        u'info_dict': {
-            u'id': u'2542',
-            u'ext': u'm4v',
-            u'title': u'Gloriajw-SpotifyWithErikBernhardsson182',
+        {
+            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
+            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
+            'info_dict': {
+                'id': '2542',
+                'ext': 'm4v',
+                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
+            },
        },
-    },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)

+        webpage = self._download_webpage(url, video_id)
+
+        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')

-        title = self._html_search_regex(r'<div class="section">.*?<h3>([^>]+?)</h3>',
-            webpage, u'title', flags=re.DOTALL)
-        video_url = self._search_regex([r'<source src="(.*?)"',
-            r'<dt>Download</dt>.*?<a href="(.+?)"'],
-            webpage, u'video url', flags=re.DOTALL)
+        title = self._html_search_regex(
+            r'<div class="section">.*?<h3>([^>]+?)</h3>', webpage, 'title', flags=re.DOTALL)
+        video_url = self._search_regex(
+            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
+            webpage, 'video url', flags=re.DOTALL)
+
        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.03.30.1'
+__version__ = '2014.04.01.1'
Author	SHA1	Message	Date
Philipp Hagemeister	017e4dd58c	release 2014.04.01.1	2014-04-01 00:25:17 +02:00
Philipp Hagemeister	651486621d	[comedycentral] Allow URLs with query parts (fixes #2661 )	2014-04-01 00:25:11 +02:00
Philipp Hagemeister	28d9032c88	release 2014.04.01	2014-04-01 00:02:39 +02:00
Philipp Hagemeister	16f4eb723a	[comedycentral] Add support for /videos URLs (Fixes #2660 )	2014-04-01 00:02:32 +02:00
Sergey M․	1cbd410620	[pyvideo] Modernize	2014-03-31 19:31:48 +07:00