release 2014.12.13

Merge remote-tracking branch 'fstirlitz/master'
Merge branch 'master' of github.com:rg3/youtube-dl
2014-12-13 23:13:48 +01:00 · 2014-12-13 23:05:41 +01:00 · 2014-12-13 23:05:28 +01:00 · 2014-12-13 23:05:22 +01:00 · 2014-12-14 03:42:42 +06:00 · 2014-12-14 03:41:17 +06:00
141 changed files with 889 additions and 565 deletions
--- a/1
+++ b/1
@@ -92,3 +92,4 @@ Tithen-Firion
 Zack Fernandes
 cryptonaut
 Adrian Kretz
+Mathias Rav
--- a/test/test_unicode_literals.py
+++ b/test/test_unicode_literals.py
@@ -20,7 +20,7 @@ IGNORED_FILES = [
 ]


-from helper import assertRegexpMatches
+from test.helper import assertRegexpMatches


 class TestUnicodeLiterals(unittest.TestCase):
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -5,8 +5,8 @@ import re
 import sys
 import time

+from ..compat import compat_str
 from ..utils import (
-    compat_str,
    encodeFilename,
    format_bytes,
    timeconvert,
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -9,10 +9,12 @@ import xml.etree.ElementTree as etree

 from .common import FileDownloader
 from .http import HttpFD
+from ..compat import (
+    compat_urlparse,
+)
 from ..utils import (
    struct_pack,
    struct_unpack,
-    compat_urlparse,
    format_bytes,
    encodeFilename,
    sanitize_open,
@@ -231,6 +233,7 @@ class F4mFD(FileDownloader):
                'continuedl': True,
                'quiet': True,
                'noprogress': True,
+                'ratelimit': self.params.get('ratelimit', None),
                'test': self.params.get('test', False),
            }
        )
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -6,9 +6,11 @@ import subprocess

 from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from .common import FileDownloader
-from ..utils import (
+from ..compat import (
    compat_urlparse,
    compat_urllib_request,
+)
+from ..utils import (
    check_executable,
    encodeFilename,
 )
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -4,11 +4,12 @@ import os
 import time

 from .common import FileDownloader
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
    compat_urllib_error,
+)
+from ..utils import (
    ContentTooShortError,
-
    encodeFilename,
    sanitize_open,
    format_bytes,
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -7,9 +7,9 @@ import sys
 import time

 from .common import FileDownloader
+from ..compat import compat_str
 from ..utils import (
    check_executable,
-    compat_str,
    encodeFilename,
    format_bytes,
    get_exe_version,
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -65,6 +65,7 @@ from .cnn import (
 )
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
+from .comcarcoff import ComCarCoffIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .criterion import CriterionIE
@@ -159,6 +160,7 @@ from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
 from .goshgay import GoshgayIE
 from .grooveshark import GroovesharkIE
+from .groupon import GrouponIE
 from .hark import HarkIE
 from .heise import HeiseIE
 from .helsinki import HelsinkiIE
@@ -314,6 +316,7 @@ from .radiofrance import RadioFranceIE
 from .rai import RaiIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
+from .restudy import RestudyIE
 from .reverbnation import ReverbNationIE
 from .ringtv import RingTVIE
 from .ro220 import Ro220IE
@@ -322,6 +325,7 @@ from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
+from .rtp import RTPIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE
 from .ruhd import RUHDIE
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -7,6 +7,8 @@ import json
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
+    xpath_text,
+    float_or_none,
 )


@@ -128,7 +130,8 @@ class AdultSwimIE(InfoExtractor):
                segment_url, segment_title,
                'Downloading segment information', 'Unable to download segment information')

-            segment_duration = idoc.find('.//trt').text.strip()
+            segment_duration = float_or_none(
+                xpath_text(idoc, './/trt', 'segment duration').strip())

            formats = []
            file_els = idoc.findall('.//files/file')
--- a/youtube_dl/extractor/allocine.py
+++ b/youtube_dl/extractor/allocine.py
@@ -5,10 +5,9 @@ import re
 import json

 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
-    compat_str,
    qualities,
-    determine_ext,
 )


@@ -75,9 +74,7 @@ class AllocineIE(InfoExtractor):
                    'format_id': format_id,
                    'quality': quality(format_id),
                    'url': v,
-                    'ext': determine_ext(v),
                })
-
        self._sort_formats(formats)

        return {
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from .fivemin import FiveMinIE


 class AolIE(InfoExtractor):
@@ -42,9 +41,10 @@ class AolIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-
        playlist_id = mobj.group('playlist_id')
-        if playlist_id and not self._downloader.params.get('noplaylist'):
+        if not playlist_id or self._downloader.params.get('noplaylist'):
+            return self.url_result('5min:%s' % video_id)
+
        self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

        webpage = self._download_webpage(url, playlist_id)
@@ -68,5 +68,3 @@ class AolIE(InfoExtractor):
            'title': title,
            'entries': entries,
        }
-
-        return FiveMinIE._build_result(video_id)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -4,8 +4,8 @@ import re
 import json

 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
-    compat_urlparse,
    int_or_none,
 )

--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@@ -3,8 +3,8 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
 from ..utils import (
-    compat_urllib_parse,
    determine_ext,
    ExtractorError,
 )
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@@ -5,7 +5,7 @@ import json
 import itertools

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
 )

--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -4,9 +4,11 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_str,
    compat_urlparse,
+)
+from ..utils import (
    ExtractorError,
 )

@@ -104,7 +106,7 @@ class BandcampIE(InfoExtractor):

 class BandcampAlbumIE(InfoExtractor):
    IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))?'

    _TESTS = [{
        'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -139,6 +141,12 @@ class BandcampAlbumIE(InfoExtractor):
            'title': 'Hierophany of the Open Grave',
        },
        'playlist_mincount': 9,
+    }, {
+        'url': 'http://dotscale.bandcamp.com',
+        'info_dict': {
+            'title': 'Loom',
+        },
+        'playlist_mincount': 7,
    }]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -209,7 +209,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
        webpage = self._download_webpage(url, group_id, 'Downloading video page')

        programme_id = self._search_regex(
-            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False)
+            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None)
        if programme_id:
            player = self._download_json(
                'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id,
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dl/extractor/bet.py
@@ -1,8 +1,8 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..compat import compat_urllib_parse
 from ..utils import (
-    compat_urllib_parse,
    xpath_text,
    xpath_with_ns,
    int_or_none,
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -4,8 +4,8 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import compat_parse_qs
 from ..utils import (
-    compat_parse_qs,
    ExtractorError,
    int_or_none,
    unified_strdate,
@@ -29,10 +29,9 @@ class BiliBiliIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
+
        video_code = self._search_regex(
            r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')

--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -6,20 +6,21 @@ import json
 import xml.etree.ElementTree

 from .common import InfoExtractor
-from ..utils import (
-    compat_urllib_parse,
-    find_xpath_attr,
-    fix_xml_ampersands,
-    compat_urlparse,
-    compat_str,
-    compat_urllib_request,
+from ..compat import (
    compat_parse_qs,
+    compat_str,
+    compat_urllib_parse,
    compat_urllib_parse_urlparse,
-
+    compat_urllib_request,
+    compat_urlparse,
+)
+from ..utils import (
    determine_ext,
    ExtractorError,
-    unsmuggle_url,
+    find_xpath_attr,
+    fix_xml_ampersands,
    unescapeHTML,
+    unsmuggle_url,
 )


--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -4,10 +4,12 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
    compat_urllib_parse,
    compat_urllib_parse_urlparse,
+)
+from ..utils import (
    ExtractorError,
 )

--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@@ -0,0 +1,57 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import parse_iso8601
+
+
+class ComCarCoffIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
+    _TESTS = [{
+        'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
+        'info_dict': {
+            'id': 'miranda-sings-happy-thanksgiving-miranda',
+            'ext': 'mp4',
+            'upload_date': '20141127',
+            'timestamp': 1417107600,
+            'title': 'Happy Thanksgiving Miranda',
+            'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
+            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
+        },
+        'params': {
+            'skip_download': 'requires ffmpeg',
+        }
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        if not display_id:
+            display_id = 'comediansincarsgettingcoffee.com'
+        webpage = self._download_webpage(url, display_id)
+
+        full_data = json.loads(self._search_regex(
+            r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
+            webpage, 'full data json'))
+
+        video_id = full_data['activeVideo']['video']
+        video_data = full_data['videos'][video_id]
+        thumbnails = [{
+            'url': video_data['images']['thumb'],
+        }, {
+            'url': video_data['images']['poster'],
+        }]
+        formats = self._extract_m3u8_formats(
+            video_data['mediaUrl'], video_id, ext='mp4')
+
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': video_data['title'],
+            'description': video_data.get('description'),
+            'timestamp': parse_iso8601(video_data.get('pubDate')),
+            'thumbnails': thumbnails,
+            'formats': formats,
+            'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
+        }
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re

 from .mtv import MTVServicesInfoExtractor
-from ..utils import (
+from ..compat import (
    compat_str,
    compat_urllib_parse,
+)
+from ..utils import (
    ExtractorError,
    float_or_none,
    unified_strdate,
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -5,12 +5,14 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
-    orderedSet,
    compat_urllib_parse_urlparse,
    compat_urlparse,
 )
+from ..utils import (
+    orderedSet,
+)


 class CondeNastIE(InfoExtractor):
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -10,10 +10,12 @@ import xml.etree.ElementTree
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
    bytes_to_intlist,
    intlist_to_bytes,
    unified_strdate,
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -8,13 +8,15 @@ import itertools
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor

-from ..utils import (
-    compat_urllib_request,
+from ..compat import (
    compat_str,
+    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
+    int_or_none,
    orderedSet,
    str_to_int,
-    int_or_none,
-    ExtractorError,
    unescapeHTML,
 )

--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -5,7 +5,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
 )

--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


@@ -20,8 +18,7 @@ class EbaumsWorldIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        config = self._download_xml(
            'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
        video_url = config.find('file').text
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dl/extractor/ehow.py
@@ -1,8 +1,6 @@
 from __future__ import unicode_literals

-import re
-
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
 )
 from .common import InfoExtractor
@@ -24,11 +22,10 @@ class EHowIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
-                                       webpage, 'video URL')
+        video_url = self._search_regex(
+            r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL')
        final_url = compat_urllib_parse.unquote(video_url)
        uploader = self._html_search_meta('uploader', webpage)
        title = self._og_search_title(webpage).replace(' | eHow', '')
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@@ -6,7 +6,7 @@ import random
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_str,
 )

--- a/youtube_dl/extractor/engadget.py
+++ b/youtube_dl/extractor/engadget.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from .fivemin import FiveMinIE
 from ..utils import (
    url_basename,
 )
@@ -27,11 +26,10 @@ class EngadgetIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)

        if video_id is not None:
-            return FiveMinIE._build_result(video_id)
+            return self.url_result('5min:%s' % video_id)
        else:
            title = url_basename(url)
            webpage = self._download_webpage(url, title)
@@ -39,5 +37,5 @@ class EngadgetIE(InfoExtractor):
            return {
                '_type': 'playlist',
                'title': title,
-                'entries': [FiveMinIE._build_result(id) for id in ids]
+                'entries': [self.url_result('5min:%s' % vid) for vid in ids]
            }
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
-
+)
+from ..utils import (
    ExtractorError,
 )

--- a/youtube_dl/extractor/everyonesmixtape.py
+++ b/youtube_dl/extractor/everyonesmixtape.py
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
+)
+from ..utils import (
    ExtractorError,
 )

--- a/youtube_dl/extractor/extremetube.py
+++ b/youtube_dl/extractor/extremetube.py
@@ -3,16 +3,18 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
+)
+from ..utils import (
    str_to_int,
 )


 class ExtremeTubeIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<videoid>[0-9]+))(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
    _TESTS = [{
        'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
        'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
@@ -31,7 +33,7 @@ class ExtremeTubeIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
        url = 'http://www.' + mobj.group('url')

        req = compat_urllib_request.Request(url)
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -1,19 +1,20 @@
 #! -*- coding: utf-8 -*-
 from __future__ import unicode_literals

-import re
 import hashlib

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
    compat_urllib_request,
    compat_urlparse,
 )
+from ..utils import (
+    ExtractorError,
+)


 class FC2IE(InfoExtractor):
-    _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)'
+    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
    IE_NAME = 'fc2'
    _TEST = {
        'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
@@ -26,9 +27,7 @@ class FC2IE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        self._downloader.cookiejar.clear_session_cookies()  # must clear

--- a/youtube_dl/extractor/firedrive.py
+++ b/youtube_dl/extractor/firedrive.py
@@ -4,11 +4,13 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)


 class FiredriveIE(InfoExtractor):
@@ -28,11 +30,8 @@ class FiredriveIE(InfoExtractor):
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        url = 'http://firedrive.com/file/%s' % video_id
-
        webpage = self._download_webpage(url, video_id)

        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@@ -1,11 +1,11 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_str,
    compat_urllib_parse,
+)
+from ..utils import (
    ExtractorError,
 )

@@ -13,7 +13,7 @@ from ..utils import (
 class FiveMinIE(InfoExtractor):
    IE_NAME = '5min'
    _VALID_URL = r'''(?x)
-        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(.*?&)?playList=|
+        (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
            5min:)
        (?P<id>\d+)
        '''
@@ -41,13 +41,8 @@ class FiveMinIE(InfoExtractor):
        },
    ]

-    @classmethod
-    def _build_result(cls, video_id):
-        return cls.url_result('5min:%s' % video_id, cls.ie_key())
-
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
        embed_page = self._download_webpage(embed_url, video_id,
                                            'Downloading embed page')
--- a/youtube_dl/extractor/fourtube.py
+++ b/youtube_dl/extractor/fourtube.py
@@ -3,12 +3,14 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
-    unified_strdate,
-    str_to_int,
-    parse_duration,
+)
+from ..utils import (
    clean_html,
+    parse_duration,
+    str_to_int,
+    unified_strdate,
 )


@@ -31,9 +33,7 @@ class FourTubeIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage_url = 'http://www.4tube.com/videos/' + video_id
        webpage = self._download_webpage(webpage_url, video_id)

--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@@ -5,7 +5,7 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_parse_qs,
    compat_urlparse,
 )
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -6,13 +6,15 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
-    compat_urlparse,
-    ExtractorError,
-    clean_html,
-    parse_duration,
+from ..compat import (
    compat_urllib_parse_urlparse,
+    compat_urlparse,
+)
+from ..utils import (
+    clean_html,
+    ExtractorError,
    int_or_none,
+    parse_duration,
 )


--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -4,9 +4,11 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urlparse,
+)
+from ..utils import (
    unescapeHTML,
 )

--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
--- a/youtube_dl/extractor/golem.py
+++ b/youtube_dl/extractor/golem.py
@@ -2,8 +2,10 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urlparse,
+)
+from ..utils import (
    determine_ext,
 )

--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dl/extractor/googlesearch.py
@@ -4,7 +4,7 @@ import itertools
 import re

 from .common import SearchInfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
 )

--- a/youtube_dl/extractor/gorillavid.py
+++ b/youtube_dl/extractor/gorillavid.py
@@ -4,11 +4,12 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    determine_ext,
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
    int_or_none,
 )

@@ -106,7 +107,6 @@ class GorillaVidIE(InfoExtractor):
        formats = [{
            'format_id': 'sd',
            'url': video_url,
-            'ext': determine_ext(video_url),
            'quality': 1,
        }]

--- a/youtube_dl/extractor/groupon.py
+++ b/youtube_dl/extractor/groupon.py
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class GrouponIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)'
+
+    _TEST = {
+        'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
+        'info_dict': {
+            'id': 'bikram-yoga-huntington-beach-2',
+            'title': '$49 for 10 Yoga Classes or One Month of Unlimited Classes at Bikram Yoga Huntington Beach ($180 Value)',
+            'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
+        },
+        'playlist': [{
+            'info_dict': {
+                'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',
+                'ext': 'mp4',
+                'title': 'Bikram Yoga Huntington Beach | Orange County',
+            },
+        }],
+        'params': {
+            'skip_download': 'HLS',
+        }
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+
+        payload = self._parse_json(self._search_regex(
+            r'var\s+payload\s*=\s*(.*?);\n', webpage, 'payload'), playlist_id)
+        videos = payload['carousel'].get('dealVideos', [])
+        entries = []
+        for v in videos:
+            if v.get('provider') != 'OOYALA':
+                self.report_warning(
+                    '%s: Unsupported video provider %s, skipping video' %
+                    (playlist_id, v.get('provider')))
+                continue
+            entries.append(self.url_result('ooyala:%s' % v['media']))
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'entries': entries,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+        }
--- a/youtube_dl/extractor/hostingbulk.py
+++ b/youtube_dl/extractor/hostingbulk.py
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
    ExtractorError,
-    compat_urllib_request,
    int_or_none,
    urlencode_postdata,
 )
@@ -30,9 +32,7 @@ class HostingBulkIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        url = 'http://hostingbulk.com/{0:}.html'.format(video_id)

        # Custom request with cookie to set language to English, so our file
--- a/youtube_dl/extractor/hypem.py
+++ b/youtube_dl/extractor/hypem.py
@@ -1,20 +1,20 @@
 from __future__ import unicode_literals

 import json
-import re
 import time

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
-
+)
+from ..utils import (
    ExtractorError,
 )


 class HypemIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
    _TEST = {
        'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
        'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
@@ -27,8 +27,7 @@ class HypemIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        track_id = mobj.group(1)
+        track_id = self._match_id(url)

        data = {'ax': 1, 'ts': time.time()}
        data_encoded = compat_urllib_parse.urlencode(data)
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -4,7 +4,7 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urlparse,
 )

--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -1,10 +1,9 @@
 from __future__ import unicode_literals

 import base64
-import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
 )

@@ -24,9 +23,7 @@ class InfoQIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urlparse,
    compat_urllib_parse,
+)
+from ..utils import (
    xpath_with_ns,
 )

--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -6,8 +6,10 @@ from random import random
 from math import floor

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
+)
+from ..utils import (
    ExtractorError,
 )

--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -5,8 +5,10 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
+)
+from ..utils import (
    ExtractorError,
 )

--- a/youtube_dl/extractor/keek.py
+++ b/youtube_dl/extractor/keek.py
@@ -1,34 +1,39 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


 class KeekIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<videoID>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P<id>\w+)'
    IE_NAME = 'keek'
    _TEST = {
        'url': 'https://www.keek.com/ytdl/keeks/NODfbab',
-        'file': 'NODfbab.mp4',
-        'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
+        'md5': '09c5c109067536c1cec8bac8c21fea05',
        'info_dict': {
-            'uploader': 'ytdl',
+            'id': 'NODfbab',
+            'ext': 'mp4',
+            'uploader': 'youtube-dl project',
+            'uploader_id': 'ytdl',
            'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .',
        },
    }

    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('videoID')
+        video_id = self._match_id(url)

        video_url = 'http://cdn.keek.com/keek/video/%s' % video_id
        thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id
        webpage = self._download_webpage(url, video_id)

+        raw_desc = self._html_search_meta('description', webpage)
+        if raw_desc:
            uploader = self._html_search_regex(
-            r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>',
-            webpage, 'uploader', fatal=False)
+                r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False)
+            uploader_id = self._html_search_regex(
+                r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False)
+        else:
+            uploader = None
+            uploader_id = None

        return {
            'id': video_id,
@@ -36,5 +41,6 @@ class KeekIE(InfoExtractor):
            'ext': 'mp4',
            'title': self._og_search_title(webpage),
            'thumbnail': thumbnail,
-            'uploader': uploader
+            'uploader': uploader,
+            'uploader_id': uploader_id,
        }
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dl/extractor/keezmovies.py
@@ -4,7 +4,7 @@ import os
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
@@ -15,7 +15,7 @@ from ..aes import (


 class KeezMoviesIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?keezmovies\.com/video/.+?(?P<videoid>[0-9]+)(?:[/?&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'
    _TEST = {
        'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711',
        'file': '1214711.mp4',
@@ -27,8 +27,7 @@ class KeezMoviesIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = self._match_id(url)

        req = compat_urllib_request.Request(url)
        req.add_header('Cookie', 'age_verified=1')
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -4,10 +4,12 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_str,
    compat_urllib_parse_urlparse,
    compat_urlparse,
+)
+from ..utils import (
    ExtractorError,
    find_xpath_attr,
    int_or_none,
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -5,12 +5,14 @@ import json

 from .subtitles import SubtitlesInfoExtractor
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_str,
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
    ExtractorError,
    int_or_none,
-    compat_str,
 )


--- a/youtube_dl/extractor/malemotion.py
+++ b/youtube_dl/extractor/malemotion.py
@@ -1,43 +1,33 @@
+# coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
 )


 class MalemotionIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
+    _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)'
    _TEST = {
-        'url': 'http://malemotion.com/video/bien-dur.10ew',
-        'file': '10ew.mp4',
-        'md5': 'b3cc49f953b107e4a363cdff07d100ce',
+        'url': 'http://malemotion.com/video/bete-de-concours.ltc',
+        'md5': '3013e53a0afbde2878bc39998c33e8a5',
        'info_dict': {
-            "title": "Bien dur",
-            "age_limit": 18,
+            'id': 'ltc',
+            'ext': 'mp4',
+            'title': 'Bête de Concours',
+            'age_limit': 18,
        },
-        'skip': 'This video has been deleted.'
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group("id")
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

-        self.report_extraction(video_id)
-
-        # Extract video URL
-        video_url = compat_urllib_parse.unquote(
-            self._search_regex(r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
-
-        # Extract title
+        video_url = compat_urllib_parse.unquote(self._search_regex(
+            r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL'))
        video_title = self._html_search_regex(
            r'<title>(.*?)</title', webpage, 'title')
-
-        # Extract video thumbnail
        video_thumbnail = self._search_regex(
            r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False)

@@ -47,14 +37,12 @@ class MalemotionIE(InfoExtractor):
            'format_id': 'mp4',
            'preference': 1,
        }]
+        self._sort_formats(formats)

        return {
            'id': video_id,
            'formats': formats,
-            'uploader': None,
-            'upload_date': None,
            'title': video_title,
            'thumbnail': video_thumbnail,
-            'description': None,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -3,10 +3,12 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_parse_qs,
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -5,8 +5,10 @@ import json

 from .common import InfoExtractor
 from .youtube import YoutubeIE
-from ..utils import (
+from ..compat import (
    compat_urlparse,
+)
+from ..utils import (
    clean_html,
    ExtractorError,
    get_element_by_id,
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -1,12 +1,13 @@
 from __future__ import unicode_literals

-import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urlparse,
+)
+from ..utils import (
    get_element_by_attribute,
    parse_duration,
    strip_jsonp,
@@ -15,7 +16,7 @@ from ..utils import (

 class MiTeleIE(InfoExtractor):
    IE_NAME = 'mitele.es'
-    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
+    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'

    _TEST = {
        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
@@ -31,12 +32,10 @@ class MiTeleIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        episode = mobj.group('episode')
+        episode = self._match_id(url)
        webpage = self._download_webpage(url, episode)
        embed_data_json = self._search_regex(
-            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
-            flags=re.DOTALL
+            r'(?s)MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
        ).replace('\'', '"')
        embed_data = json.loads(embed_data_json)

--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
+)
+from ..utils import (
    ExtractorError,
    HEADRequest,
    int_or_none,
--- a/youtube_dl/extractor/moevideo.py
+++ b/youtube_dl/extractor/moevideo.py
@@ -5,10 +5,12 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
+    ExtractorError,
    int_or_none,
 )

--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@@ -4,7 +4,7 @@ import os
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse_urlparse,
    compat_urllib_request,
    compat_urllib_parse,
@@ -12,7 +12,7 @@ from ..utils import (


 class MofosexIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>mofosex\.com/videos/(?P<id>[0-9]+)/.*?\.html)'
    _TEST = {
        'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
        'md5': '1b2eb47ac33cc75d4a80e3026b613c5a',
@@ -26,7 +26,7 @@ class MofosexIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
+        video_id = mobj.group('id')
        url = 'http://www.' + mobj.group('url')

        req = compat_urllib_request.Request(url)
--- a/youtube_dl/extractor/moniker.py
+++ b/youtube_dl/extractor/moniker.py
@@ -5,7 +5,7 @@ import os.path
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
@@ -37,10 +37,9 @@ class MonikerIE(InfoExtractor):
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        orig_webpage = self._download_webpage(url, video_id)
+
        fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
        data = dict(fields)

--- a/youtube_dl/extractor/mooshare.py
+++ b/youtube_dl/extractor/mooshare.py
@@ -4,11 +4,13 @@ import re
 import time

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
    compat_urllib_request,
    compat_urllib_parse,
 )
+from ..utils import (
+    ExtractorError,
+)


 class MooshareIE(InfoExtractor):
@@ -43,9 +45,7 @@ class MooshareIE(InfoExtractor):
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        page = self._download_webpage(url, video_id, 'Downloading page')

        if re.search(r'>Video Not Found or Deleted<', page) is not None:
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dl/extractor/motorsport.py
@@ -3,13 +3,14 @@ from __future__ import unicode_literals

 import hashlib
 import json
-import re
 import time

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_parse_qs,
    compat_str,
+)
+from ..utils import (
    int_or_none,
 )

@@ -32,10 +33,9 @@ class MotorsportIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('id')
-
+        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
+
        flashvars_code = self._html_search_regex(
            r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
        flashvars = compat_parse_qs(flashvars_code)
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dl/extractor/movieclips.py
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
    ExtractorError,
-    compat_str,
    clean_html,
 )

--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
    ExtractorError,
    find_xpath_attr,
    fix_xml_ampersands,
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -2,9 +2,10 @@ from __future__ import unicode_literals
 import os.path

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse_urlparse,
-
+)
+from ..utils import (
    ExtractorError,
 )

--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
+)
+from ..utils import (
    ExtractorError,
    clean_html,
 )
@@ -26,9 +28,9 @@ class NaverIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group(1)
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
+
        m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
                         webpage)
        if m_id is None:
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -4,8 +4,10 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_str,
+)
+from ..utils import (
    ExtractorError,
    find_xpath_attr,
 )
--- a/youtube_dl/extractor/nfb.py
+++ b/youtube_dl/extractor/nfb.py
@@ -1,9 +1,7 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
    compat_urllib_parse,
 )
@@ -12,7 +10,7 @@ from ..utils import (
 class NFBIE(InfoExtractor):
    IE_NAME = 'nfb'
    IE_DESC = 'National Film Board of Canada'
-    _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'

    _TEST = {
        'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
@@ -32,10 +30,10 @@ class NFBIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
+        video_id = self._match_id(url)
+        page = self._download_webpage(
+            'https://www.nfb.ca/film/%s' % video_id, video_id,
+            'Downloading film page')

        uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
                                              page, 'director id', fatal=False)
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dl/extractor/nfl.py
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
    ExtractorError,
-    compat_urllib_parse_urlparse,
    int_or_none,
    remove_end,
 )
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -5,14 +5,16 @@ import re
 import json

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
-    unified_strdate,
-    parse_duration,
-    int_or_none,
+)
+from ..utils import (
    ExtractorError,
+    int_or_none,
+    parse_duration,
+    unified_strdate,
 )


--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@@ -23,6 +23,9 @@ class NineGagIE(InfoExtractor):
            "ext": "mp4",
            "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
            "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
+            'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
+            'uploader': 'CompilationChannel',
+            'upload_date': '20131110',
            "view_count": int,
            "thumbnail": "re:^https?://",
        },
@@ -35,6 +38,9 @@ class NineGagIE(InfoExtractor):
            'display_id': 'alternate-banned-opening-scene-of-gravity',
            "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
            'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
+            'uploader': 'Krishna Shenoi',
+            'upload_date': '20140401',
+            'uploader_id': 'krishnashenoi93',
        },
    }]

--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -6,13 +6,15 @@ import time
 import hashlib

 from .common import InfoExtractor
-from ..utils import (
-    compat_urllib_request,
-    compat_urllib_parse,
-    ExtractorError,
-    clean_html,
-    unified_strdate,
+from ..compat import (
    compat_str,
+    compat_urllib_parse,
+    compat_urllib_request,
+)
+from ..utils import (
+    clean_html,
+    ExtractorError,
+    unified_strdate,
 )


--- a/youtube_dl/extractor/nosvideo.py
+++ b/youtube_dl/extractor/nosvideo.py
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
    ExtractorError,
-    compat_urllib_request,
    urlencode_postdata,
    xpath_text,
    xpath_with_ns,
@@ -32,8 +34,7 @@ class NosVideoIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)

        fields = {
            'id': video_id,
--- a/youtube_dl/extractor/novamov.py
+++ b/youtube_dl/extractor/novamov.py
@@ -3,9 +3,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_urlparse,
+)
 from ..utils import (
    ExtractorError,
-    compat_urlparse
 )


--- a/youtube_dl/extractor/nowvideo.py
+++ b/youtube_dl/extractor/nowvideo.py
@@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE):
    IE_NAME = 'nowvideo'
    IE_DESC = 'NowVideo'

-    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'}
+    _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co|li)'}

    _HOST = 'www.nowvideo.ch'

--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dl/extractor/nuvid.py
@@ -3,15 +3,17 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
    parse_duration,
    unified_strdate,
-    compat_urllib_request,
 )


 class NuvidIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://m.nuvid.com/video/1310741/',
        'md5': 'eab207b7ac4fccfb4e23c86201f11277',
@@ -26,8 +28,7 @@ class NuvidIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)

        formats = []

--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -16,7 +16,6 @@ class OoyalaIE(InfoExtractor):
        {
            # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
            'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
-            'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
            'info_dict': {
                'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
                'ext': 'mp4',
@@ -26,7 +25,6 @@ class OoyalaIE(InfoExtractor):
        }, {
            # Only available for ipad
            'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
-            'md5': '4b9754921fddb68106e48c142e2a01e6',
            'info_dict': {
                'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
                'ext': 'mp4',
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -20,21 +20,23 @@ class ORFTVthekIE(InfoExtractor):
    _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'

    _TEST = {
-        'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747',
-        'file': '7319747.mp4',
-        'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375',
+        'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
+        'playlist': [{
+            'md5': '2942210346ed779588f428a92db88712',
            'info_dict': {
-            'title': 'Was Sie schon immer über Klassik wissen wollten',
-            'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4',
-            'duration': 3508,
-            'upload_date': '20140105',
+                'id': '8896777',
+                'ext': 'mp4',
+                'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
+                'description': 'md5:c1272f0245537812d4e36419c207b67d',
+                'duration': 2668,
+                'upload_date': '20141208',
            },
+        }],
        'skip': 'Blocked outside of Austria',
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        playlist_id = mobj.group('id')
+        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)

        data_json = self._search_regex(
@@ -120,9 +122,7 @@ class ORFOE1IE(InfoExtractor):
    _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        show_id = mobj.group('id')
-
+        show_id = self._match_id(url)
        data = self._download_json(
            'http://oe1.orf.at/programm/%s/konsole' % show_id,
            show_id
--- a/youtube_dl/extractor/photobucket.py
+++ b/youtube_dl/extractor/photobucket.py
@@ -4,16 +4,17 @@ import json
 import re

 from .common import InfoExtractor
-from ..utils import compat_urllib_parse
+from ..compat import compat_urllib_parse


 class PhotobucketIE(InfoExtractor):
    _VALID_URL = r'http://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))'
    _TEST = {
        'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0',
-        'file': 'zpsc0c3b9fa.mp4',
        'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99',
        'info_dict': {
+            'id': 'zpsc0c3b9fa',
+            'ext': 'mp4',
            'timestamp': 1367669341,
            'upload_date': '20130504',
            'uploader': 'rachaneronas',
--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -5,11 +5,13 @@ import re
 import os.path

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)


 class PlayedIE(InfoExtractor):
@@ -28,7 +30,6 @@ class PlayedIE(InfoExtractor):

    def _real_extract(self, url):
        video_id = self._match_id(url)
-
        orig_webpage = self._download_webpage(url, video_id)

        m_error = re.search(
--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
    ExtractorError,
    float_or_none,
    int_or_none,
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@@ -3,31 +3,31 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    clean_html,
+from ..compat import (
    compat_urllib_parse,
 )
+from ..utils import (
+    clean_html,
+    ExtractorError,
+)


 class PlayvidIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
+    _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)'
    _TEST = {
-        'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
-        'md5': '44930f8afa616efdf9482daf4fe53e1e',
+        'url': 'http://www.playvid.com/watch/RnmBNgtrrJu',
+        'md5': 'ffa2f6b2119af359f544388d8c01eb6c',
        'info_dict': {
-            'id': 'agbDDi7WZTV',
+            'id': 'RnmBNgtrrJu',
            'ext': 'mp4',
-            'title': 'Michelle Lewin in Miami Beach',
-            'duration': 240,
+            'title': 'md5:9256d01c6317e3f703848b5906880dc8',
+            'duration': 82,
            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        m_error = re.search(
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -4,10 +4,12 @@ import os
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
+    compat_urllib_parse,
    compat_urllib_parse_urlparse,
    compat_urllib_request,
-    compat_urllib_parse,
+)
+from ..utils import (
    str_to_int,
 )
 from ..aes import (
@@ -16,7 +18,7 @@ from ..aes import (


 class PornHubIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
    _TEST = {
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
        'md5': '882f488fa1f0026f023f33576004a2ed',
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dl/extractor/pornotube.py
@@ -1,56 +1,94 @@
 from __future__ import unicode_literals

-import re
+import json

 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_request,
+)
 from ..utils import (
-    compat_urllib_parse,
-
-    unified_strdate,
+    int_or_none,
 )


 class PornotubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
+    _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)'
    _TEST = {
-        'url': 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
-        'md5': '374dd6dcedd24234453b295209aa69b6',
+        'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science',
+        'md5': '60fc5a4f0d93a97968fc7999d98260c9',
        'info_dict': {
-            'id': '1689755',
-            'ext': 'flv',
-            'upload_date': '20090708',
-            'title': 'Marilyn-Monroe-Bathing',
-            'age_limit': 18
+            'id': '4964',
+            'ext': 'mp4',
+            'upload_date': '20141203',
+            'title': 'Weird Hot and Wet Science',
+            'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0',
+            'categories': ['Adult Humor', 'Blondes'],
+            'uploader': 'Alpha Blue Archives',
+            'thumbnail': 're:^https?://.*\\.jpg$',
+            'timestamp': 1417582800,
+            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        video_id = self._match_id(url)

-        video_id = mobj.group('videoid')
-        video_title = mobj.group('title')
+        # Fetch origin token
+        js_config = self._download_webpage(
+            'http://www.pornotube.com/assets/src/app/config.js', video_id,
+            note='Download JS config')
+        originAuthenticationSpaceKey = self._search_regex(
+            r"constant\('originAuthenticationSpaceKey',\s*'([^']+)'",
+            js_config, 'originAuthenticationSpaceKey')

-        # Get webpage content
-        webpage = self._download_webpage(url, video_id)
+        # Fetch actual token
+        token_req_data = {
+            'authenticationSpaceKey': originAuthenticationSpaceKey,
+            'credentials': 'Clip Application',
+        }
+        token_req = compat_urllib_request.Request(
+            'https://api.aebn.net/auth/v1/token/primal',
+            data=json.dumps(token_req_data).encode('utf-8'))
+        token_req.add_header('Content-Type', 'application/json')
+        token_req.add_header('Origin', 'http://www.pornotube.com')
+        token_answer = self._download_json(
+            token_req, video_id, note='Requesting primal token')
+        token = token_answer['tokenKey']

-        # Get the video URL
-        VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
-        video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
-        video_url = compat_urllib_parse.unquote(video_url)
+        # Get video URL
+        delivery_req = compat_urllib_request.Request(
+            'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id)
+        delivery_req.add_header('Authorization', token)
+        delivery_info = self._download_json(
+            delivery_req, video_id, note='Downloading delivery information')
+        video_url = delivery_info['mediaUrl']

-        # Get the uploaded date
-        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
-        upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
-        if upload_date:
-            upload_date = unified_strdate(upload_date)
-        age_limit = self._rta_search(webpage)
+        # Get additional info (title etc.)
+        info_req = compat_urllib_request.Request(
+            'https://api.aebn.net/content/v1/clips/%s?expand='
+            'title,description,primaryImageNumber,startSecond,endSecond,'
+            'movie.title,movie.MovieId,movie.boxCoverFront,movie.stars,'
+            'movie.studios,stars.name,studios.name,categories.name,'
+            'clipActive,movieActive,publishDate,orientations' % video_id)
+        info_req.add_header('Authorization', token)
+        info = self._download_json(
+            info_req, video_id, note='Downloading metadata')
+
+        timestamp = int_or_none(info.get('publishDate'), scale=1000)
+        uploader = info.get('studios', [{}])[0].get('name')
+        movie_id = info['movie']['movieId']
+        thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % (
+            movie_id, movie_id, info['primaryImageNumber'])
+        categories = [c['name'] for c in info.get('categories')]

        return {
            'id': video_id,
            'url': video_url,
-            'upload_date': upload_date,
-            'title': video_title,
-            'ext': 'flv',
-            'format': 'flv',
-            'age_limit': age_limit,
+            'title': info['title'],
+            'description': info.get('description'),
+            'timestamp': timestamp,
+            'uploader': uploader,
+            'thumbnail': thumbnail,
+            'categories': categories,
+            'age_limit': 18,
        }
--- a/youtube_dl/extractor/promptfile.py
+++ b/youtube_dl/extractor/promptfile.py
@@ -4,12 +4,14 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-    determine_ext,
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
 )
+from ..utils import (
+    determine_ext,
+    ExtractorError,
+)


 class PromptFileIE(InfoExtractor):
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -5,8 +5,10 @@ import re

 from hashlib import sha1
 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
+)
+from ..utils import (
    unified_strdate,
 )

--- a/youtube_dl/extractor/quickvid.py
+++ b/youtube_dl/extractor/quickvid.py
@@ -3,8 +3,10 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urlparse,
+)
+from ..utils import (
    determine_ext,
    int_or_none,
 )
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -3,10 +3,12 @@ from __future__ import unicode_literals
 import re

 from .subtitles import SubtitlesInfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+)
 from ..utils import (
    parse_duration,
    unified_strdate,
-    compat_urllib_parse,
 )


--- a/youtube_dl/extractor/restudy.py
+++ b/youtube_dl/extractor/restudy.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RestudyIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?restudy\.dk/video/play/id/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://www.restudy.dk/video/play/id/1637',
+        'info_dict': {
+            'id': '1637',
+            'ext': 'flv',
+            'title': 'Leiden-frosteffekt',
+            'description': 'Denne video er et eksperiment med flydende kvælstof.',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage).strip()
+        description = self._og_search_description(webpage).strip()
+
+        formats = self._extract_smil_formats(
+            'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
+            video_id)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class RTPIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/e(?P<id>[0-9]+)/?'
+    _TEST = {
+        'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+        'info_dict': {
+            'id': '174042',
+            'ext': 'mp3',
+            'title': 'Paixões Cruzadas',
+            'description': 'As paixões musicais de António Cartaxo e António Macedo',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+        'params': {
+            'skip_download': True,  # RTMP download
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_meta(
+            'twitter:title', webpage, display_name='title', fatal=True)
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        player_config = self._search_regex(
+            r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
+        config = json.loads(js_to_json(player_config))
+
+        path, ext = config.get('file').rsplit('.', 1)
+        formats = [{
+            'app': config.get('application'),
+            'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
+            'page_url': url,
+            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
+            'rtmp_live': config.get('live', False),
+            'ext': ext,
+            'vcodec': config.get('type') == 'audio' and 'none' or None,
+            'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dl/extractor/rts.py
@@ -4,12 +4,14 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+)
 from ..utils import (
    int_or_none,
    parse_duration,
    parse_iso8601,
    unescapeHTML,
-    compat_str,
 )


--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -5,10 +5,12 @@ import re
 import itertools

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_str,
-    unified_strdate,
+)
+from ..utils import (
    ExtractorError,
+    unified_strdate,
 )


@@ -36,9 +38,7 @@ class RutubeIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
        video = self._download_json(
            'http://rutube.ru/api/video/%s/?format=json' % video_id,
            video_id, 'Downloading video JSON')
@@ -114,8 +114,7 @@ class RutubeMovieIE(RutubeChannelIE):
    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        movie_id = mobj.group('id')
+        movie_id = self._match_id(url)
        movie = self._download_json(
            self._MOVIE_TEMPLATE % movie_id, movie_id,
            'Downloading movie JSON')
--- a/youtube_dl/extractor/screencast.py
+++ b/youtube_dl/extractor/screencast.py
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
+from ..compat import (
    compat_parse_qs,
    compat_urllib_request,
 )
+from ..utils import (
+    ExtractorError,
+)


 class ScreencastIE(InfoExtractor):
@@ -57,8 +57,7 @@ class ScreencastIE(InfoExtractor):
    ]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        video_url = self._html_search_regex(
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -4,10 +4,12 @@ import re
 import base64

 from .common import InfoExtractor
+from ..compat import (
+    compat_urllib_parse,
+    compat_urllib_request,
+)
 from ..utils import (
    ExtractorError,
-    compat_urllib_request,
-    compat_urllib_parse,
    int_or_none,
 )

@@ -26,26 +28,30 @@ class SharedIE(InfoExtractor):
    }

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)

-        page = self._download_webpage(url, video_id)
+        if '>File does not exist<' in webpage:
+            raise ExtractorError(
+                'Video %s does not exist' % video_id, expected=True)

-        if re.search(r'>File does not exist<', page) is not None:
-            raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
-        download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
-
-        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
+        download_form = dict(re.findall(
+            r'<input type="hidden" name="([^"]+)" value="([^"]*)"', webpage))
+        request = compat_urllib_request.Request(
+            url, compat_urllib_parse.urlencode(download_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')

-        video_page = self._download_webpage(request, video_id, 'Downloading video page')
+        video_page = self._download_webpage(
+            request, video_id, 'Downloading video page')

-        video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
-        title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
-        filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
+        video_url = self._html_search_regex(
+            r'data-url="([^"]+)"', video_page, 'video URL')
+        title = base64.b64decode(self._html_search_meta(
+            'full:title', webpage, 'title')).decode('utf-8')
+        filesize = int_or_none(self._html_search_meta(
+            'full:size', webpage, 'file size', fatal=False))
        thumbnail = self._html_search_regex(
-            r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
+            r'data-poster="([^"]+)"', video_page, 'thumbnail', default=None)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/sharesix.py
+++ b/youtube_dl/extractor/sharesix.py
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
+)
+from ..utils import (
    parse_duration,
 )

--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import (
+from ..compat import (
    compat_urllib_request,
    compat_urllib_parse,
 )
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Philipp Hagemeister	051c46256b	release 2014.12.13	2014-12-13 23:13:48 +01:00
Philipp Hagemeister	d5524947b5	Merge remote-tracking branch 'fstirlitz/master'	2014-12-13 23:05:41 +01:00
Philipp Hagemeister	74f91c4af7	Merge branch 'master' of github.com:rg3/youtube-dl	2014-12-13 23:05:28 +01:00
Philipp Hagemeister	da4d4191a9	Merge branch 'master' of github.com:rg3/youtube-dl	2014-12-13 23:05:22 +01:00
Sergey M․	2564300e55	Credit @Mortal for restudy (#4463 )	2014-12-14 03:42:42 +06:00
Sergey M․	cb0713d2c9	Merge branch 'Mortal-restudy'	2014-12-14 03:41:17 +06:00
Sergey M․	ac265bef1e	[restudy] Simplify and extract all formats	2014-12-14 03:41:00 +06:00
Mathias Rav	4a0132c570	[Restudy] Add new extractor for restudy.dk	2014-12-13 22:25:32 +01:00
Sergey M․	1fa174692a	[bandcamp:album] Make path optional (Closes #4461 )	2014-12-14 02:00:54 +06:00
Sergey M․	04c9544187	[bbccouk] Fix vpid warning	2014-12-13 18:47:34 +06:00
Sergey M․	8085fc15cc	[adultswim] Improve segment duration extraction	2014-12-13 18:42:29 +06:00
Philipp Hagemeister	2f15832f56	Merge pull request #3927 from qrtt1/master apply ratelimit to f4m	2014-12-13 12:59:12 +01:00
Jaime Marquínez Ferrándiz	1557ed153c	[test_unicode_literals] Import from test.helper	2014-12-13 12:45:09 +01:00
Philipp Hagemeister	a6620ac28d	[orf] Modernize	2014-12-13 12:41:38 +01:00
Philipp Hagemeister	89e36657cc	[keek] remove unused import	2014-12-13 12:36:46 +01:00
Philipp Hagemeister	7129bed51b	[keek] Modernize and extract uploader	2014-12-13 12:35:45 +01:00
Philipp Hagemeister	1cc79574fc	Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions	2014-12-13 12:35:45 +01:00
Philipp Hagemeister	20e35880bf	[streamcz] Update extractor	2014-12-13 12:35:45 +01:00
Philipp Hagemeister	5e1912cfc1	[5min] Remove helper method and modernize Previously, other extractor would go call a private(!) helper method. Instead, just hardcode the 5min:video_id format - it's not if that would ever change.	2014-12-13 12:35:45 +01:00
Jaime Marquínez Ferrándiz	293f0f39ce	[utils] make_HTTPS_handler: Remove try/except block that would always raise an exception This code is only run for Python < 3.4, where context.load_default_certs doesn't exist	2014-12-12 23:43:25 +01:00
Jaime Marquínez Ferrándiz	0db261ba56	[utils] make_HTTPS_handler: Use ssl.create_default_context in Python 2.7.9 The new features in the ssl module have been backported from 3.4, see https://docs.python.org/dev/whatsnew/2.7.html#pep-466-network-security-enhancements-for-python-2-7	2014-12-12 23:35:17 +01:00
felix	7668a2c5cb	[comcarcoff] add webpage_url datum	2014-12-12 23:20:34 +01:00
Jaime Marquínez Ferrándiz	26c06f0c51	[youtube:playlist] Remove unused property	2014-12-12 22:26:50 +01:00
Jaime Marquínez Ferrándiz	23d3608c6b	[youtube:channel] Fix extraction (fixes #4435 ) It uses now the same pagination system as playlists	2014-12-12 22:23:54 +01:00
Philipp Hagemeister	baa7081d68	[urort] Update to new multi-format protocol	2014-12-12 20:55:18 +01:00
Philipp Hagemeister	19bf2b4e88	[comcarcoff] Add unicode_literals declaration	2014-12-12 20:37:58 +01:00
Philipp Hagemeister	6a1b20de2a	[urort] Modernize	2014-12-12 20:37:28 +01:00
Philipp Hagemeister	3c864e930d	[comcarcoff] Adapt c62159ea91a04ef82560472b254aef1cc9f70a11	2014-12-12 20:35:17 +01:00
Philipp Hagemeister	dc5596ff54	[comcarcoff] (#4454 )	2014-12-12 20:32:02 +01:00
Philipp Hagemeister	46d9760f5e	Merge remote-tracking branch 'fstirlitz/master'	2014-12-12 20:17:26 +01:00
Philipp Hagemeister	90d71d3f08	[ooyala] Remove test md5sums	2014-12-12 20:12:51 +01:00
Philipp Hagemeister	e9404524cc	[ninegag] Test for additional properties	2014-12-12 20:10:15 +01:00
felix	dc65a213fd	comediansincarsgettingcoffee.com support	2014-12-12 19:58:44 +01:00
Philipp Hagemeister	4237ba10dc	[pornotube] Adapt to new interface	2014-12-12 19:44:25 +01:00
Naglis Jonaitis	c3f3b29b92	[rtp] Add new extractor (Closes #4382 )	2014-12-12 20:22:24 +02:00
Philipp Hagemeister	1c985da0ca	release 2014.12.12.7	2014-12-12 18:25:58 +01:00
Philipp Hagemeister	7a60322abf	release 2014.12.12.6	2014-12-12 17:52:50 +01:00
Sergey M․	07bc9a3530	[nowvideo] Add .li domain (Closes #4453 )	2014-12-12 22:44:16 +06:00
Philipp Hagemeister	a099965bad	release 2014.12.12.5	2014-12-12 17:40:27 +01:00
Philipp Hagemeister	146323a7f8	[groupon] Add extractor (Fixes #4386 )	2014-12-12 17:39:33 +01:00
Philipp Hagemeister	57e086dcea	[ebaumsworld] Modernize	2014-12-12 17:24:05 +01:00
Ching Yi, Chan	b1c3a49fff	apply ratelimit to f4m	2014-10-12 08:32:26 +08:00