release 2014.09.04

[youtube] Move cache into its own module
Merge branch 'peugeot-tnaflix'
2014-09-04 01:30:43 +02:00 · 2014-09-03 17:29:19 +02:00 · 2014-09-03 21:08:50 +07:00 · 2014-09-03 21:08:36 +07:00 · 2014-09-03 21:07:18 +07:00 · 2014-09-03 21:03:36 +07:00
18 changed files with 458 additions and 164 deletions
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import shutil
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from test.helper import FakeYDL
+from youtube_dl.cache import Cache
+
+
+def _is_empty(d):
+    return not bool(os.listdir(d))
+
+
+def _mkdir(d):
+    if not os.path.exists(d):
+        os.mkdir(d)
+
+
+class TestCache(unittest.TestCase):
+    def setUp(self):
+        TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+        TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+        _mkdir(TESTDATA_DIR)
+        self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test')
+        self.tearDown()
+
+    def tearDown(self):
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+
+    def test_cache(self):
+        ydl = FakeYDL({
+            'cachedir': self.test_dir,
+        })
+        c = Cache(ydl)
+        obj = {'x': 1, 'y': ['ä', '\\a', True]}
+        self.assertEqual(c.load('test_cache', 'k'), None)
+        c.store('test_cache', 'k', obj)
+        self.assertEqual(c.load('test_cache', 'k2'), None)
+        self.assertFalse(_is_empty(self.test_dir))
+        self.assertEqual(c.load('test_cache', 'k'), obj)
+        self.assertEqual(c.load('test_cache', 'y'), None)
+        self.assertEqual(c.load('test_cache2', 'k'), None)
+        c.remove()
+        self.assertFalse(os.path.exists(self.test_dir))
+        self.assertEqual(c.load('test_cache', 'k'), None)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -57,6 +57,7 @@ from .utils import (
    YoutubeDLHandler,
    prepend_extension,
 )
+from .cache import Cache
 from .extractor import get_info_extractor, gen_extractors
 from .downloader import get_suitable_downloader
 from .postprocessor import FFmpegMergerPP
@@ -133,7 +134,7 @@ class YoutubeDL(object):
    daterange:         A DateRange object, download only if the upload_date is in the range.
    skip_download:     Skip the actual download of the video file
    cachedir:          Location of the cache files in the filesystem.
-                       None to disable filesystem cache.
+                       False to disable filesystem cache.
    noplaylist:        Download single video instead of a playlist if in doubt.
    age_limit:         An integer representing the user's age in years.
                       Unsuitable videos for the given age are skipped.
@@ -195,6 +196,7 @@ class YoutubeDL(object):
        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
        self._err_file = sys.stderr
        self.params = params
+        self.cache = Cache(self)

        if params.get('bidi_workaround', False):
            try:
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@@ -84,7 +84,6 @@ import optparse
 import os
 import random
 import shlex
-import shutil
 import sys


@@ -96,7 +95,6 @@ from .utils import (
    decodeOption,
    get_term_width,
    DownloadError,
-    get_cachedir,
    MaxDownloadsReached,
    preferredencoding,
    read_batch_urls,
@@ -518,10 +516,10 @@ def parseOpts(overrideArguments=None):
    filesystem.add_option('--cookies',
            dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
    filesystem.add_option(
-        '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
+        '--cache-dir', dest='cachedir', default=None, metavar='DIR',
        help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
    filesystem.add_option(
-        '--no-cache-dir', action='store_const', const=None, dest='cachedir',
+        '--no-cache-dir', action='store_const', const=False, dest='cachedir',
        help='Disable filesystem caching')
    filesystem.add_option(
        '--rm-cache-dir', action='store_true', dest='rm_cachedir',
@@ -872,20 +870,7 @@ def _real_main(argv=None):

        # Remove cache dir
        if opts.rm_cachedir:
-            if opts.cachedir is None:
-                ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
-            else:
-                if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
-                    ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir' % opts.cachedir)
-                    retcode = 141
-                else:
-                    ydl.to_screen(
-                        u'Removing cache dir %s .' % opts.cachedir,
-                        skip_eol=True)
-                    if os.path.exists(opts.cachedir):
-                        ydl.to_screen(u'.', skip_eol=True)
-                        shutil.rmtree(opts.cachedir)
-                    ydl.to_screen(u'.')
+            ydl.cache.remove()

        # Maybe do nothing
        if (len(all_urls) < 1) and (opts.load_info_filename is None):
--- a/youtube_dl/cache.py
+++ b/youtube_dl/cache.py
@@ -0,0 +1,93 @@
+from __future__ import unicode_literals
+
+import errno
+import io
+import json
+import os
+import re
+import shutil
+import traceback
+
+from .utils import (
+    write_json_file,
+)
+
+
+class Cache(object):
+    def __init__(self, ydl):
+        self._ydl = ydl
+
+    def _get_root_dir(self):
+        res = self._ydl.params.get('cachedir')
+        if res is None:
+            cache_root = os.environ.get('XDG_CACHE_HOME', '~/.cache')
+            res = os.path.join(cache_root, 'youtube-dl')
+        return os.path.expanduser(res)
+
+    def _get_cache_fn(self, section, key, dtype):
+        assert re.match(r'^[a-zA-Z0-9_-]+$', section)
+        assert re.match(r'^[a-zA-Z0-9_-]+$', key)
+        return os.path.join(
+            self._get_root_dir(), section, '%s.%s' % (key, dtype))
+
+    @property
+    def enabled(self):
+        return self._ydl.params.get('cachedir') is not False
+
+    def store(self, section, key, data, dtype='json'):
+        assert dtype in ('json',)
+
+        if not self.enabled:
+            return
+
+        fn = self._get_cache_fn(section, key, dtype)
+        try:
+            try:
+                os.makedirs(os.path.dirname(fn))
+            except OSError as ose:
+                if ose.errno != errno.EEXIST:
+                    raise
+            write_json_file(data, fn)
+        except Exception:
+            tb = traceback.format_exc()
+            self._ydl.report_warning(
+                'Writing cache to %r failed: %s' % (fn, tb))
+
+    def load(self, section, key, dtype='json', default=None):
+        assert dtype in ('json',)
+
+        if not self.enabled:
+            return default
+
+        cache_fn = self._get_cache_fn(section, key, dtype)
+        try:
+            try:
+                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+                    return json.load(cachef)
+            except ValueError:
+                try:
+                    file_size = os.path.getsize(cache_fn)
+                except (OSError, IOError) as oe:
+                    file_size = str(oe)
+                self._ydl.report_warning(
+                    'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
+        except IOError:
+            pass  # No cache available
+
+        return default
+
+    def remove(self):
+        if not self.enabled:
+            self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
+            return
+
+        cachedir = self._get_root_dir()
+        if not any((term in cachedir) for term in ('cache', 'tmp')):
+            raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
+
+        self._ydl.to_screen(
+            'Removing cache dir %s .' % cachedir, skip_eol=True)
+        if os.path.exists(cachedir):
+            self._ydl.to_screen('.', skip_eol=True)
+            shutil.rmtree(cachedir)
+        self._ydl.to_screen('.')
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@@ -86,7 +86,7 @@ from .ellentv import (
    EllenTVClipsIE,
 )
 from .elpais import ElPaisIE
-from .empflix import EmpflixIE
+from .empflix import EMPFlixIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
 from .escapist import EscapistIE
@@ -235,6 +235,7 @@ from .niconico import NiconicoIE
 from .ninegag import NineGagIE
 from .noco import NocoIE
 from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
 from .novamov import NovaMovIE
 from .nowness import NownessIE
 from .nowvideo import NowVideoIE
@@ -347,6 +348,7 @@ from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
+from .tnaflix import TNAFlixIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -27,8 +27,16 @@ class BeegIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

-        video_url = self._html_search_regex(
-            r"'480p'\s*:\s*'([^']+)'", webpage, 'video URL')
+        quality_arr = self._search_regex(
+            r'(?s)var\s+qualityArr\s*=\s*{\s*(.+?)\s*}', webpage, 'quality formats')
+
+        formats = [{
+            'url': fmt[1],
+            'format_id': fmt[0],
+            'height': int(fmt[0][:-1]),
+        } for fmt in re.findall(r"'([^']+)'\s*:\s*'([^']+)'", quality_arr)]
+
+        self._sort_formats(formats)

        title = self._html_search_regex(
            r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title')
@@ -48,10 +56,10 @@ class BeegIE(InfoExtractor):

        return {
            'id': video_id,
-            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'categories': categories,
+            'formats': formats,
            'age_limit': 18,
        }
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -3,17 +3,22 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
+from ..utils import str_to_int


 class DrTuberIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<title_dash>[\w-]+)'
+    _VALID_URL = r'https?://(?:www\.)?drtuber\.com/video/(?P<id>\d+)/(?P<display_id>[\w-]+)'
    _TEST = {
        'url': 'http://www.drtuber.com/video/1740434/hot-perky-blonde-naked-golf',
        'md5': '93e680cf2536ad0dfb7e74d94a89facd',
        'info_dict': {
            'id': '1740434',
+            'display_id': 'hot-perky-blonde-naked-golf',
            'ext': 'mp4',
            'title': 'Hot Perky Blonde Naked Golf',
+            'like_count': int,
+            'dislike_count': int,
+            'comment_count': int,
            'categories': list,  # NSFW
            'thumbnail': 're:https?://.*\.jpg$',
            'age_limit': 18,
@@ -23,8 +28,9 @@ class DrTuberIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')

-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, display_id)

        video_url = self._html_search_regex(
            r'<source src="([^"]+)"', webpage, 'video URL')
@@ -36,15 +42,29 @@ class DrTuberIE(InfoExtractor):
            r'poster="([^"]+)"',
            webpage, 'thumbnail', fatal=False)

+        like_count = str_to_int(self._html_search_regex(
+            r'<span id="rate_likes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
+            webpage, 'like count', fatal=False))
+        dislike_count = str_to_int(self._html_search_regex(
+            r'<span id="rate_dislikes">\s*<img[^>]+>\s*<span>([\d,\.]+)</span>',
+            webpage, 'like count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'<span class="comments_count">([\d,\.]+)</span>',
+            webpage, 'comment count', fatal=False))
+
        cats_str = self._html_search_regex(
            r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False)
        categories = None if cats_str is None else cats_str.split(' ')

        return {
            'id': video_id,
+            'display_id': display_id,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'comment_count': comment_count,
            'categories': categories,
            'age_limit': self._rta_search(webpage),
        }
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -1,58 +1,25 @@
 from __future__ import unicode_literals

-import re
-
-from .common import InfoExtractor
-from ..utils import fix_xml_ampersands
+from .tnaflix import TNAFlixIE


-class EmpflixIE(InfoExtractor):
-    _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
+class EMPFlixIE(TNAFlixIE):
+    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
+
+    _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
+    _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
+    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
+
    _TEST = {
        'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
        'info_dict': {
            'id': '33051',
+            'display_id': 'Amateur-Finger-Fuck',
            'ext': 'mp4',
            'title': 'Amateur Finger Fuck',
            'description': 'Amateur solo finger fucking.',
+            'thumbnail': 're:https?://.*\.jpg$',
            'age_limit': 18,
        }
    }
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-        age_limit = self._rta_search(webpage)
-
-        video_title = self._html_search_regex(
-            r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
-        video_description = self._html_search_regex(
-            r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
-
-        cfg_url = self._html_search_regex(
-            r'flashvars\.config = escape\("([^"]+)"',
-            webpage, 'flashvars.config')
-
-        cfg_xml = self._download_xml(
-            cfg_url, video_id, note='Downloading metadata',
-            transform_source=fix_xml_ampersands)
-
-        formats = [
-            {
-                'url': item.find('videoLink').text,
-                'format_id': item.find('res').text,
-            } for item in cfg_xml.findall('./quality/item')
-        ]
-        thumbnail = cfg_xml.find('./startThumb').text
-
-        return {
-            'id': video_id,
-            'title': video_title,
-            'description': video_description,
-            'thumbnail': thumbnail,
-            'formats': formats,
-            'age_limit': age_limit,
-        }
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@@ -11,12 +11,13 @@ from ..utils import (


 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<title_dash>[\w-]+)/?'
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
    _TEST = {
        'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
        'md5': '3b427ae4b9d60619106de3185c2987cd',
        'info_dict': {
            'id': '95008',
+            'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
            'ext': 'flv',
            'title': 'Infamous Tiffany Teen Strip Tease Video',
            'duration': 194,
@@ -28,7 +29,9 @@ class EpornerIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
        title = self._html_search_regex(
            r'<title>(.*?) - EPORNER', webpage, 'title')

@@ -37,9 +40,22 @@ class EpornerIE(InfoExtractor):
            webpage, 'redirect_code')
        redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code)
        player_code = self._download_webpage(
-            redirect_url, video_id, note='Downloading player config')
-        video_url = self._html_search_regex(
-            r'file: "(.*?)",', player_code, 'video_url')
+            redirect_url, display_id, note='Downloading player config')
+
+        sources = self._search_regex(
+            r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', player_code, 'sources')
+
+        formats = []
+        for video_url, format_id in re.findall(r'file\s*:\s*"([^"]+)",\s*label\s*:\s*"([^"]+)"', sources):
+            fmt = {
+                'url': video_url,
+                'format_id': format_id,
+            }
+            m = re.search(r'^(\d+)', format_id)
+            if m:
+                fmt['height'] = int(m.group(1))
+            formats.append(fmt)
+        self._sort_formats(formats)

        duration = parse_duration(self._search_regex(
            r'class="mbtim">([0-9:]+)</div>', webpage, 'duration',
@@ -50,9 +66,10 @@ class EpornerIE(InfoExtractor):

        return {
            'id': video_id,
-            'url': video_url,
+            'display_id': display_id,
            'title': title,
            'duration': duration,
            'view_count': view_count,
+            'formats': formats,
            'age_limit': self._rta_search(webpage),
        }
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -79,7 +79,8 @@ class FacebookIE(InfoExtractor):

            check_form = {
                'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'),
-                'h': self._search_regex(r'name="h" value="(\w*?)"', login_results, 'h'),
+                'h': self._search_regex(
+                    r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'),
                'name_action_selected': 'dont_save',
            }
            check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -366,7 +366,22 @@ class GenericIE(InfoExtractor):
                'extract_flat': False,
                'skip_download': True,
            }
-        }
+        },
+        # MLB embed
+        {
+            'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+            'md5': '96f09a37e44da40dd083e12d9a683327',
+            'info_dict': {
+                'id': '33322633',
+                'ext': 'mp4',
+                'title': 'Ump changes call to ball',
+                'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+                'duration': 48,
+                'timestamp': 1401537900,
+                'upload_date': '20140531',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
    ]

    def report_download_webpage(self, video_id):
@@ -809,6 +824,12 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'SBS')

+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'MLB')
+
        # Start with something easy: JW Player in SWFObject
        found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
        if not found:
--- a/youtube_dl/extractor/mlb.py
+++ b/youtube_dl/extractor/mlb.py
@@ -11,7 +11,7 @@ from ..utils import (


 class MLBIE(InfoExtractor):
-    _VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
+    _VALID_URL = r'https?://m\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|shared/video/embed/embed\.html\?.*?\bcontent_id=)(?P<id>n?\d+)'
    _TESTS = [
        {
            'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -69,6 +69,10 @@ class MLBIE(InfoExtractor):
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
+        {
+            'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
+            'only_matching': True,
+        },
    ]

    def _real_extract(self, url):
--- a/youtube_dl/extractor/nosvideo.py
+++ b/youtube_dl/extractor/nosvideo.py
@@ -0,0 +1,65 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    compat_urllib_request,
+    xpath_with_ns,
+)
+
+_x = lambda p: xpath_with_ns(p, {'xspf': 'http://xspf.org/ns/0/'})
+_find = lambda el, p: el.find(_x(p)).text.strip()
+
+
+class NosVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nosvideo\.com/' + \
+                 '(?:embed/|\?v=)(?P<id>[A-Za-z0-9]{12})/?'
+    _PLAYLIST_URL = 'http://nosvideo.com/xml/{xml_id:s}.xml'
+    _TEST = {
+        'url': 'http://nosvideo.com/?v=drlp6s40kg54',
+        'md5': '4b4ac54c6ad5d70ab88f2c2c6ccec71c',
+        'info_dict': {
+            'id': 'drlp6s40kg54',
+            'ext': 'mp4',
+            'title': 'big_buck_bunny_480p_surround-fix.avi.mp4',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        fields = {
+            'id': video_id,
+            'op': 'download1',
+            'method_free': 'Continue to Video',
+        }
+        post = compat_urllib_parse.urlencode(fields)
+        req = compat_urllib_request.Request(url, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+        webpage = self._download_webpage(req, video_id,
+                                         'Downloading download page')
+        xml_id = self._search_regex(r'php\|([^\|]+)\|', webpage, 'XML ID')
+        playlist_url = self._PLAYLIST_URL.format(xml_id=xml_id)
+        playlist = self._download_xml(playlist_url, video_id)
+
+        track = playlist.find(_x('.//xspf:track'))
+        title = _find(track, './xspf:title')
+        url = _find(track, './xspf:file')
+        thumbnail = _find(track, './xspf:image')
+
+        formats = [{
+            'format_id': 'sd',
+            'url': url,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -0,0 +1,84 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    fix_xml_ampersands,
+)
+
+
+class TNAFlixIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
+
+    _TITLE_REGEX = None
+    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
+    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
+
+    _TEST = {
+        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+        'info_dict': {
+            'id': '553878',
+            'display_id': 'Carmella-Decesare-striptease',
+            'ext': 'mp4',
+            'title': 'Carmella Decesare - striptease',
+            'description': '',
+            'thumbnail': 're:https?://.*\.jpg$',
+            'duration': 91,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')
+
+        webpage = self._download_webpage(url, display_id)
+
+        title = self._html_search_regex(
+            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
+        description = self._html_search_regex(
+            self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
+
+        age_limit = self._rta_search(webpage)
+
+        duration = self._html_search_meta('duration', webpage, 'duration', default=None)
+        if duration:
+            duration = parse_duration(duration[1:])
+
+        cfg_url = self._html_search_regex(
+            self._CONFIG_REGEX, webpage, 'flashvars.config')
+
+        cfg_xml = self._download_xml(
+            cfg_url, display_id, note='Downloading metadata',
+            transform_source=fix_xml_ampersands)
+
+        thumbnail = cfg_xml.find('./startThumb').text
+
+        formats = []
+        for item in cfg_xml.findall('./quality/item'):
+            video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
+            format_id = item.find('res').text
+            fmt = {
+                'url': video_url,
+                'format_id': format_id,
+            }
+            m = re.search(r'^(\d+)', format_id)
+            if m:
+                fmt['height'] = int(m.group(1))
+            formats.append(fmt)
+        self._sort_formats(formats)
+        
+        return {
+            'id': video_id,
+            'display_id': display_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -5,80 +5,82 @@ import re

 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
-    clean_html,
-    int_or_none,
+    float_or_none,
+    str_to_int,
 )


 class TvigleIE(InfoExtractor):
    IE_NAME = 'tvigle'
    IE_DESC = 'Интернет-телевидение Tvigle.ru'
-    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/category/.+?[\?&]v(?:ideo)?=(?P<id>\d+)'
+    _VALID_URL = r'http://(?:www\.)?tvigle\.ru/(?:[^/]+/)+(?P<display_id>[^/]+)/$'

    _TESTS = [
        {
-            'url': 'http://www.tvigle.ru/category/cinema/1608/?video=503081',
-            'md5': '09afba4616666249f087efc6dcf83cb3',
+            'url': 'http://www.tvigle.ru/video/brat-2/',
+            'md5': '72cb7eab33e54314e1790da402d3c9c3',
            'info_dict': {
-                'id': '503081',
-                'ext': 'flv',
+                'id': '5119390',
+                'display_id': 'brat-2',
+                'ext': 'mp4',
                'title': 'Брат 2 ',
-                'description': 'md5:f5a42970f50648cee3d7ad740f3ae769',
-                'upload_date': '20110919',
+                'description': 'md5:5751f4fe345a58e1692585c361294bd8',
+                'duration': 7356.369,
+                'age_limit': 0,
            },
        },
        {
-            'url': 'http://www.tvigle.ru/category/men/vysotskiy_vospominaniya02/?flt=196&v=676433',
-            'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
+            'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
+            'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574',
            'info_dict': {
-                'id': '676433',
-                'ext': 'flv',
+                'id': '5142516',
+                'ext': 'mp4',
                'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком',
                'description': 'md5:027f7dc872948f14c96d19b4178428a4',
-                'upload_date': '20121218',
+                'duration': 186.080,
+                'age_limit': 0,
            },
        },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        display_id = mobj.group('display_id')

-        video_data = self._download_xml(
-            'http://www.tvigle.ru/xml/single.php?obj=%s' % video_id, video_id, 'Downloading video XML')
+        webpage = self._download_webpage(url, display_id)

-        video = video_data.find('./video')
+        video_id = self._html_search_regex(
+            r'<li class="video-preview current_playing" id="(\d+)">', webpage, 'video id')

-        title = video.get('name')
-        description = video.get('anons')
-        if description:
-            description = clean_html(description)
-        thumbnail = video_data.get('img')
-        upload_date = unified_strdate(video.get('date'))
-        like_count = int_or_none(video.get('vtp'))
+        video_data = self._download_json(
+            'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id)
+
+        item = video_data['playlist']['items'][0]
+
+        title = item['title']
+        description = item['description']
+        thumbnail = item['thumbnail']
+        duration = float_or_none(item['durationMilliseconds'], 1000)
+        age_limit = str_to_int(item['ageRestrictions'])

        formats = []
-        for num, (format_id, format_note) in enumerate([['low_file', 'SQ'], ['file', 'HQ'], ['hd', 'HD 720']]):
-            video_url = video.get(format_id)
-            if not video_url:
-                continue
-            formats.append({
-                'url': video_url,
-                'format_id': format_id,
-                'format_note': format_note,
-                'quality': num,
-            })
-
+        for vcodec, fmts in item['videos'].items():
+            for quality, video_url in fmts.items():
+                formats.append({
+                    'url': video_url,
+                    'format_id': '%s-%s' % (vcodec, quality),
+                    'vcodec': vcodec,
+                    'height': int(quality[:-1]),
+                })
        self._sort_formats(formats)

        return {
            'id': video_id,
+            'display_id': display_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-            'like_count': like_count,
-            'age_limit': 18,
+            'duration': duration,
+            'age_limit': age_limit,
            'formats': formats,
        }
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1,7 +1,5 @@
 # coding: utf-8

-import errno
-import io
 import itertools
 import json
 import os.path
@@ -21,7 +19,6 @@ from ..utils import (
    compat_str,

    clean_html,
-    get_cachedir,
    get_element_by_id,
    get_element_by_attribute,
    ExtractorError,
@@ -30,7 +27,6 @@ from ..utils import (
    unescapeHTML,
    unified_strdate,
    orderedSet,
-    write_json_file,
    uppercase_escape,
 )

@@ -435,26 +431,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        func_id = '%s_%s_%s' % (
            player_type, player_id, self._signature_cache_id(example_sig))
        assert os.path.basename(func_id) == func_id
-        cache_dir = get_cachedir(self._downloader.params)

-        cache_enabled = cache_dir is not None
-        if cache_enabled:
-            cache_fn = os.path.join(os.path.expanduser(cache_dir),
-                                    u'youtube-sigfuncs',
-                                    func_id + '.json')
-            try:
-                with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
-                    cache_spec = json.load(cachef)
-                return lambda s: u''.join(s[i] for i in cache_spec)
-            except IOError:
-                pass  # No cache available
-            except ValueError:
-                try:
-                    file_size = os.path.getsize(cache_fn)
-                except (OSError, IOError) as oe:
-                    file_size = str(oe)
-                self._downloader.report_warning(
-                    u'Cache %s failed (%s)' % (cache_fn, file_size))
+        cache_spec = self._downloader.cache.load(u'youtube-sigfuncs', func_id)
+        if cache_spec is not None:
+            return lambda s: u''.join(s[i] for i in cache_spec)

        if player_type == 'js':
            code = self._download_webpage(
@@ -472,22 +452,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
        else:
            assert False, 'Invalid player type %r' % player_type

-        if cache_enabled:
-            try:
-                test_string = u''.join(map(compat_chr, range(len(example_sig))))
-                cache_res = res(test_string)
-                cache_spec = [ord(c) for c in cache_res]
-                try:
-                    os.makedirs(os.path.dirname(cache_fn))
-                except OSError as ose:
-                    if ose.errno != errno.EEXIST:
-                        raise
-                write_json_file(cache_spec, cache_fn)
-            except Exception:
-                tb = traceback.format_exc()
-                self._downloader.report_warning(
-                    u'Writing cache to %r failed: %s' % (cache_fn, tb))
+        if cache_spec is None:
+            test_string = u''.join(map(compat_chr, range(len(example_sig))))
+            cache_res = res(test_string)
+            cache_spec = [ord(c) for c in cache_res]

+        self._downloader.cache.store(u'youtube-sigfuncs', func_id, cache_spec)
        return res

    def _print_sig_code(self, func, example_sig):
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1076,12 +1076,6 @@ def intlist_to_bytes(xs):
        return bytes(xs)


-def get_cachedir(params={}):
-    cache_root = os.environ.get('XDG_CACHE_HOME',
-                                os.path.expanduser('~/.cache'))
-    return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
-
-
 # Cross-platform file locking
 if sys.platform == 'win32':
    import ctypes.wintypes
@@ -1321,7 +1315,7 @@ def str_to_int(int_str):
    """ A more relaxed version of int_or_none """
    if int_str is None:
        return None
-    int_str = re.sub(r'[,\.]', u'', int_str)
+    int_str = re.sub(r'[,\.\+]', u'', int_str)
    return int(int_str)


@@ -1336,7 +1330,7 @@ def parse_duration(s):
    s = s.strip()

    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
+        r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2014.09.01.2'
+__version__ = '2014.09.04'
Author	SHA1	Message	Date
Philipp Hagemeister	10710ae386	release 2014.09.04	2014-09-04 01:30:43 +02:00
Philipp Hagemeister	a0e07d3161	[youtube] Move cache into its own module	2014-09-03 17:29:19 +02:00
Sergey M․	88fc294f7f	Merge branch 'peugeot-tnaflix'	2014-09-03 21:08:50 +07:00
Sergey M․	a232bb9551	[empflix] Rewrite in terms of tnaflix	2014-09-03 21:08:36 +07:00
Sergey M․	eb833b7f5a	[tnaflix] Improve and make generic	2014-09-03 21:07:18 +07:00
Sergey M․	f164038b79	[utils] Make parse_duration case insensitive	2014-09-03 21:03:36 +07:00
Sergey M․	f7a361c4f1	Merge branch 'tnaflix' of https://github.com/peugeot/youtube-dl into peugeot-tnaflix	2014-09-03 20:11:49 +07:00
Sergey M․	884ae74785	[tvigle] Adapt to the new API	2014-09-03 19:59:36 +07:00
peugeot	1dba4a2185	Add support for TNAFlix	2014-09-03 14:10:06 +02:00
Philipp Hagemeister	7d4d5f25ed	[facebook] Fix login (Fixes #3667 )	2014-09-03 09:50:10 +02:00
Sergey M․	33422c056d	[drtuber] Add display_id to test	2014-09-02 21:40:03 +07:00
Sergey M․	a7862a1bc8	[eporner] Extract all formats	2014-09-02 21:39:22 +07:00
Sergey M․	3baa62e8d1	[beeg] Extract all formats	2014-09-02 20:54:00 +07:00
Sergey M․	1bf8cf5c2c	[drtuber] Extract display_id	2014-09-02 20:39:16 +07:00
Sergey M․	eade1d7eab	[drtuber] Extract counters	2014-09-02 20:36:26 +07:00
Sergey M․	1a94ff6865	[mlb] Add support for embedded videos (Closes #3653 )	2014-09-02 20:19:28 +07:00
Philipp Hagemeister	b47ed50aaf	[nosvideo] Remove determine_ext usage (#3655 )	2014-09-02 00:17:04 +02:00
Philipp Hagemeister	1b8477729a	Merge remote-tracking branch 'naglis/nosvideo'	2014-09-02 00:14:58 +02:00
Naglis Jonaitis	49fa38adf2	[nosvideo] Add new extractor	2014-09-01 23:47:14 +03:00