release 2013.02.02

Better help for new options
Disable Stanford OC test for now, and enable escapist
2013-02-02 14:45:38 +01:00 · 2013-02-02 14:44:22 +01:00 · 2013-02-02 14:40:41 +01:00 · 2013-02-02 13:51:54 +01:00 · 2013-02-01 18:23:20 +01:00 · 2013-02-01 18:09:34 +01:00
12 changed files with 225 additions and 202 deletions
--- a/3
+++ b/3
@ -7,6 +7,7 @@ PREFIX=/usr/local
 BINDIR=$(PREFIX)/bin
 MANDIR=$(PREFIX)/man
 SYSCONFDIR=/etc
+PYTHON=/usr/bin/env python

 install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
 	install -d $(DESTDIR)$(BINDIR)
@ -27,7 +28,7 @@ tar: youtube-dl.tar.gz
 youtube-dl: youtube_dl/*.py
 	zip --quiet youtube-dl youtube_dl/*.py
 	zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
-	echo '#!/usr/bin/env python' > youtube-dl
+	echo '#!$(PYTHON)' > youtube-dl
 	cat youtube-dl.zip >> youtube-dl
 	rm youtube-dl.zip
 	chmod a+x youtube-dl
--- a/README.md
+++ b/README.md
@ -38,6 +38,10 @@ which means you can modify it, redistribute it or use it however you like.
    --reject-title REGEX     skip download for matching titles (regex or
                             caseless sub-string)
    --max-downloads NUMBER   Abort after downloading NUMBER files
+    --min-filesize SIZE      Do not download any videos smaller than SIZE (e.g.
+                             50k or 44.6m)
+    --max-filesize SIZE      Do not download any videos larger than SIZE (e.g.
+                             50k or 44.6m)

 ## Filesystem Options:
    -t, --title              use title in file name
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -25,7 +25,7 @@ make clean
 nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1

 echo "\n### Changing version in version.py..."
-sed -i~ "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
+sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py

 echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..."
 make README.md
@ -81,6 +81,9 @@ ROOT=$(pwd)
    git push "$ROOT" gh-pages
    git push "$ORIGIN_URL" gh-pages
 )
-rm -r build
+rm -rf build
+
+echo "Uploading to PyPi ..."
+pip sdist upload

 echo "\n### DONE!"
--- a/setup.py
+++ b/setup.py
@ -2,10 +2,14 @@
 # -*- coding: utf-8 -*-

 from __future__ import print_function
-from distutils.core import setup
 import pkg_resources
 import sys

+try:
+    from setuptools import setup
+except ImportError:
+    from distutils.core import setup
+
 try:
    import py2exe
    """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""
--- a/test/test_download.py
+++ b/test/test_download.py
@ -98,7 +98,7 @@ def generator(test_case):

            for tc in test_cases:
                if not test_case.get('params', {}).get('skip_download', False):
-                    self.assertTrue(os.path.exists(tc['file']))
+                    self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
                    self.assertTrue(tc['file'] in finished_hook_called)
                self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
                if 'md5' in tc:
@ -107,11 +107,7 @@ def generator(test_case):
                with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, value) in tc.get('info_dict', {}).items():
-                    if value.startswith('md5:'):
-                        md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
-                        self.assertEqual(value[3:], md5_info_value)
-                    else:
-                        self.assertEqual(value, info_dict.get(info_field))
+                    self.assertEqual(value, info_dict.get(info_field))
        finally:
            for tc in test_cases:
                _try_rm(tc['file'])
--- a/test/tests.json
+++ b/test/tests.json
@ -76,7 +76,8 @@
    "name": "StanfordOpenClassroom",
    "md5":  "544a9468546059d4e80d76265b0443b8",
    "url":  "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
-    "file":  "PracticalUnix_intro-environment.mp4"
+    "file":  "PracticalUnix_intro-environment.mp4",
+    "skip": "Currently offline"
  },
  {
    "name": "XNXX",
@ -113,8 +114,7 @@
    "name": "Escapist",
    "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",
    "file": "6618-Breaking-Down-Baldurs-Gate.flv",
-    "md5": "c6793dbda81388f4264c1ba18684a74d",
-    "skip": "Fails with timeout on Travis"
+    "md5": "c6793dbda81388f4264c1ba18684a74d"
  },
  {
    "name": "GooglePlus",
@ -181,37 +181,12 @@
  },
  {
    "name": "ComedyCentral",
-    "url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart",
-    "playlist": [
-      {
-        "file": "422204.mp4",
-        "md5": "7a7abe068b31ff03e7b8a37596e72380",
-        "info_dict": {
-            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1"
-        }
-      },
-      {
-        "file": "422205.mp4",
-        "md5": "30552b7274c94dbb933f64600eadddd2",
-        "info_dict": {
-            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2"
-        }
-      },
-      {
-        "file": "422206.mp4",
-        "md5": "1f4c0664b352cb8e8fe85d5da4fbee91",
-        "info_dict": {
-            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3"
-        }
-      },
-      {
-        "file": "422207.mp4",
-        "md5": "f61ee8a4e6bd1308438e03badad78554",
-        "info_dict": {
-            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4"
-        }
-      }
-    ]
+    "url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
+    "file": "422212.mp4",
+    "md5": "4e2f5cb088a83cd8cdb7756132f9739d",
+    "info_dict": {
+        "title": "thedailyshow-kristen-stewart part 1"
+    }
  },
  {
    "name": "RBMARadio",
@ -225,5 +200,80 @@
        "uploader_id": "ford-lopatin",
        "location": "Spain"
    }
+  },
+  {
+    "name": "Facebook",
+    "url": "https://www.facebook.com/photo.php?v=120708114770723",
+    "file": "120708114770723.mp4",
+    "md5": "48975a41ccc4b7a581abd68651c1a5a8",
+    "info_dict": {
+      "title": "PEOPLE ARE AWESOME 2013",
+      "duration": 279
+    }
+  },
+  {
+    "name": "EightTracks",
+    "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
+    "playlist": [
+      {
+        "file": "11885610.m4a",
+        "md5": "96ce57f24389fc8734ce47f4c1abcc55",
+        "info_dict": {
+          "title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
+        }
+      },
+      {
+        "file": "11885608.m4a",
+        "md5": "4ab26f05c1f7291ea460a3920be8021f",
+        "info_dict": {
+          "title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
+          "uploader_id": "ytdl"
+
+        }
+      },
+      {
+        "file": "11885679.m4a",
+        "md5": "d30b5b5f74217410f4689605c35d1fd7",
+        "info_dict": {
+          "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885680.m4a",
+        "md5": "4eb0a669317cd725f6bbd336a29f923a",
+        "info_dict": {
+          "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885682.m4a",
+        "md5": "1893e872e263a2705558d1d319ad19e8",
+        "info_dict": {
+          "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885683.m4a",
+        "md5": "b673c46f47a216ab1741ae8836af5899",
+        "info_dict": {
+          "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885684.m4a",
+        "md5": "1d74534e95df54986da7f5abf7d842b7",
+        "info_dict": {
+          "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885685.m4a",
+        "md5": "f081f47af8f6ae782ed131d38b9cd1c0",
+        "info_dict": {
+          "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
+        }
+      }
+    ]
  }
 ]
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@ -82,6 +82,8 @@ class FileDownloader(object):
    subtitleslang:     Language of the subtitles to download
    test:              Download only first bytes to test the downloader.
    keepvideo:         Keep the video file after post-processing
+    min_filesize:      Skip files smaller than this size
+    max_filesize:      Skip files larger than this size
    """

    params = None
@ -712,6 +714,15 @@ class FileDownloader(object):
        data_len = data.info().get('Content-length', None)
        if data_len is not None:
            data_len = int(data_len) + resume_len
+            min_data_len = self.params.get("min_filesize", None)
+            max_data_len =  self.params.get("max_filesize", None)
+            if min_data_len is not None and data_len < min_data_len:
+                self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+                return False
+            if max_data_len is not None and data_len > max_data_len:
+                self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+                return False
+
        data_len_str = self.format_bytes(data_len)
        byte_counter = 0 + resume_len
        block_size = self.params.get('buffersize', 1024)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@ -5,6 +5,7 @@ from __future__ import absolute_import

 import base64
 import datetime
+import itertools
 import netrc
 import os
 import re
@ -1980,62 +1981,14 @@ class DepositFilesIE(InfoExtractor):
 class FacebookIE(InfoExtractor):
    """Information Extractor for Facebook"""

-    _WORKING = False
    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
    _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
    _NETRC_MACHINE = 'facebook'
-    _available_formats = ['video', 'highqual', 'lowqual']
-    _video_extensions = {
-        'video': 'mp4',
-        'highqual': 'mp4',
-        'lowqual': 'mp4',
-    }
    IE_NAME = u'facebook'

-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def _reporter(self, message):
-        """Add header and report message."""
-        self._downloader.to_screen(u'[facebook] %s' % message)
-
    def report_login(self):
        """Report attempt to log in."""
-        self._reporter(u'Logging in')
-
-    def report_video_webpage_download(self, video_id):
-        """Report attempt to download video webpage."""
-        self._reporter(u'%s: Downloading video webpage' % video_id)
-
-    def report_information_extraction(self, video_id):
-        """Report attempt to extract video information."""
-        self._reporter(u'%s: Extracting video information' % video_id)
-
-    def _parse_page(self, video_webpage):
-        """Extract video information from page"""
-        # General data
-        data = {'title': r'\("video_title", "(.*?)"\)',
-            'description': r'<div class="datawrap">(.*?)</div>',
-            'owner': r'\("video_owner_name", "(.*?)"\)',
-            'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
-            }
-        video_info = {}
-        for piece in data.keys():
-            mobj = re.search(data[piece], video_webpage)
-            if mobj is not None:
-                video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
-
-        # Video urls
-        video_urls = {}
-        for fmt in self._available_formats:
-            mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
-            if mobj is not None:
-                # URL is in a Javascript segment inside an escaped Unicode format within
-                # the generally utf-8 page
-                video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
-        video_info['video_urls'] = video_urls
-
-        return video_info
+        self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)

    def _real_initialize(self):
        if self._downloader is None:
@ -2088,100 +2041,35 @@ class FacebookIE(InfoExtractor):
            return
        video_id = mobj.group('ID')

-        # Get video webpage
-        self.report_video_webpage_download(video_id)
-        request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
-        try:
-            page = compat_urllib_request.urlopen(request)
-            video_webpage = page.read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
-            return
+        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
+        webpage = self._download_webpage(url, video_id)

-        # Start extracting information
-        self.report_information_extraction(video_id)
+        BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
+        AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
+        m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
+        if not m:
+            raise ExtractorError(u'Cannot parse data')
+        data = dict(json.loads(m.group(1)))
+        params_raw = compat_urllib_parse.unquote(data['params'])
+        params = json.loads(params_raw)
+        video_url = params['hd_src']
+        video_duration = int(params['video_duration'])

-        # Extract information
-        video_info = self._parse_page(video_webpage)
+        m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
+        if not m:
+            raise ExtractorError(u'Cannot find title in webpage')
+        video_title = unescapeHTML(m.group(1))

-        # uploader
-        if 'owner' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
-            return
-        video_uploader = video_info['owner']
+        info = {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'ext': 'mp4',
+            'duration': video_duration,
+            'thumbnail': params['thumbnail_src'],
+        }
+        return [info]

-        # title
-        if 'title' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract video title')
-            return
-        video_title = video_info['title']
-        video_title = video_title.decode('utf-8')
-
-        # thumbnail image
-        if 'thumbnail' not in video_info:
-            self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
-            video_thumbnail = ''
-        else:
-            video_thumbnail = video_info['thumbnail']
-
-        # upload date
-        upload_date = None
-        if 'upload_date' in video_info:
-            upload_time = video_info['upload_date']
-            timetuple = email.utils.parsedate_tz(upload_time)
-            if timetuple is not None:
-                try:
-                    upload_date = time.strftime('%Y%m%d', timetuple[0:9])
-                except:
-                    pass
-
-        # description
-        video_description = video_info.get('description', 'No description available.')
-
-        url_map = video_info['video_urls']
-        if url_map:
-            # Decide which formats to download
-            req_format = self._downloader.params.get('format', None)
-            format_limit = self._downloader.params.get('format_limit', None)
-
-            if format_limit is not None and format_limit in self._available_formats:
-                format_list = self._available_formats[self._available_formats.index(format_limit):]
-            else:
-                format_list = self._available_formats
-            existing_formats = [x for x in format_list if x in url_map]
-            if len(existing_formats) == 0:
-                self._downloader.trouble(u'ERROR: no known formats available for video')
-                return
-            if req_format is None:
-                video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
-            elif req_format == 'worst':
-                video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
-            elif req_format == '-1':
-                video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
-            else:
-                # Specific format
-                if req_format not in url_map:
-                    self._downloader.trouble(u'ERROR: requested format not available')
-                    return
-                video_url_list = [(req_format, url_map[req_format])] # Specific format
-
-        results = []
-        for format_param, video_real_url in video_url_list:
-            # Extension
-            video_extension = self._video_extensions.get(format_param, 'mp4')
-
-            results.append({
-                'id':       video_id.decode('utf-8'),
-                'url':      video_real_url.decode('utf-8'),
-                'uploader': video_uploader.decode('utf-8'),
-                'upload_date':  upload_date,
-                'title':    video_title,
-                'ext':      video_extension.decode('utf-8'),
-                'format':   (format_param is None and u'NA' or format_param.decode('utf-8')),
-                'thumbnail':    video_thumbnail.decode('utf-8'),
-                'description':  video_description.decode('utf-8'),
-            })
-        return results

 class BlipTVIE(InfoExtractor):
    """Information extractor for blip.tv"""
@ -2983,8 +2871,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
-            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
-            return
+            raise ExtractorError(u'Invalid URL: %s' % url)

        if mobj.group('course') and mobj.group('video'): # A specific video
            course = mobj.group('course')
@ -3021,12 +2908,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
                'upload_date': None,
            }

-            self.report_download_webpage(info['id'])
-            try:
-                coursepage = compat_urllib_request.urlopen(url).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
-                return
+            coursepage = self._download_webpage(url, info['id'],
+                                        note='Downloading course info page',
+                                        errnote='Unable to download course info page')

            m = re.search('<h1>([^<]+)</h1>', coursepage)
            if m:
@ -3050,7 +2934,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
                assert entry['type'] == 'reference'
                results += self.extract(entry['url'])
            return results
-
        else: # Root page
            info = {
                'id': 'Stanford OpenClassroom',
@ -3541,10 +3424,12 @@ class JustinTVIE(InfoExtractor):
                video_extension = os.path.splitext(video_url)[1][1:]
                video_date = re.sub('-', '', clip['start_time'][:10])
                video_uploader_id = clip.get('user_id', clip.get('channel_id'))
+                video_id = clip['id']
+                video_title = clip.get('title', video_id)
                info.append({
-                    'id': clip['id'],
+                    'id': video_id,
                    'url': video_url,
-                    'title': clip['title'],
+                    'title': video_title,
                    'uploader': clip.get('channel_name', video_uploader_id),
                    'uploader_id': video_uploader_id,
                    'upload_date': video_date,
@ -3925,8 +3810,6 @@ class PornotubeIE(InfoExtractor):

        return [info]

-
-
 class YouJizzIE(InfoExtractor):
    """Information extractor for youjizz.com."""
    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
@ -3973,6 +3856,49 @@ class YouJizzIE(InfoExtractor):

        return [info]

+class EightTracksIE(InfoExtractor):
+    IE_NAME = '8tracks'
+    _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        playlist_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL)
+        if not m:
+            raise ExtractorError(u'Cannot find trax information')
+        json_like = m.group(1)
+        data = json.loads(json_like)
+
+        session = str(random.randint(0, 1000000000))
+        mix_id = data['id']
+        track_count = data['tracks_count']
+        first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
+        next_url = first_url
+        res = []
+        for i in itertools.count():
+            api_json = self._download_webpage(next_url, playlist_id,
+                note=u'Downloading song information %s/%s' % (str(i+1), track_count),
+                errnote=u'Failed to download song information')
+            api_data = json.loads(api_json)
+            track_data = api_data[u'set']['track']
+            info = {
+                'id': track_data['id'],
+                'url': track_data['track_file_stream_url'],
+                'title': track_data['performer'] + u' - ' + track_data['name'],
+                'raw_title': track_data['name'],
+                'uploader_id': data['user']['login'],
+                'ext': 'm4a',
+            }
+            res.append(info)
+            if api_data['set']['at_last_track']:
+                break
+            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
+        return res

 def gen_extractors():
    """ Return a list of an instance of every supported extractor.
@ -4019,6 +3945,7 @@ def gen_extractors():
        SteamIE(),
        UstreamIE(),
        RBMARadioIE(),
+        EightTracksIE(),
        GenericIE()
    ]

--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@ -143,10 +143,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):

        more_opts = []
        if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
-            if self._preferredcodec == 'm4a' and filecodec == 'aac':
+            if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
                # Lossless, but in another container
                acodec = 'copy'
-                extension = self._preferredcodec
+                extension = 'm4a'
                more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
            elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
                # Lossless if possible
--- a/youtube_dl/init.py
+++ b/youtube_dl/init.py
@ -150,6 +150,9 @@ def parseOpts():
    selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
    selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
    selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
+    selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
+    selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
+

    authentication.add_option('-u', '--username',
            dest='username', metavar='USERNAME', help='account username')
@ -286,10 +289,13 @@ def _real_main():
    else:
        try:
            jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
-            if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
+            if os.access(opts.cookiefile, os.R_OK):
                jar.load()
        except (IOError, OSError) as err:
-            sys.exit(u'ERROR: unable to open cookie file')
+            if opts.verbose:
+                traceback.print_exc()
+            sys.stderr.write(u'ERROR: unable to open cookie file\n')
+            sys.exit(101)
    # Set user agent
    if opts.user_agent is not None:
        std_headers['User-Agent'] = opts.user_agent
@ -349,6 +355,16 @@ def _real_main():
        if numeric_limit is None:
            parser.error(u'invalid rate limit specified')
        opts.ratelimit = numeric_limit
+    if opts.min_filesize is not None:
+        numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
+        if numeric_limit is None:
+            parser.error(u'invalid min_filesize specified')
+        opts.min_filesize = numeric_limit
+    if opts.max_filesize is not None:
+        numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
+        if numeric_limit is None:
+            parser.error(u'invalid max_filesize specified')
+        opts.max_filesize = numeric_limit
    if opts.retries is not None:
        try:
            opts.retries = int(opts.retries)
@ -438,6 +454,8 @@ def _real_main():
        'verbose': opts.verbose,
        'test': opts.test,
        'keepvideo': opts.keepvideo,
+        'min_filesize': opts.min_filesize,
+        'max_filesize': opts.max_filesize
        })

    if opts.verbose:
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
            lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
        lines[-1] = lines[-1][:self.result[2][1]]
        return '\n'.join(lines).strip()
+# Hack for https://github.com/rg3/youtube-dl/issues/662
+if sys.version_info < (2, 7, 3):
+    AttrParser.parse_endtag = (lambda self, i:
+        i + len("</scr'+'ipt>")
+        if self.rawdata[i:].startswith("</scr'+'ipt>")
+        else compat_html_parser.HTMLParser.parse_endtag(self, i))

 def get_element_by_id(id, html):
    """Return the content of the tag with the specified ID in the passed HTML document"""
@ -409,7 +415,10 @@ def encodeFilename(s):
        # match Windows 9x series as well. Besides, NT 4 is obsolete.)
        return s
    else:
-        return s.encode(sys.getfilesystemencoding(), 'ignore')
+        encoding = sys.getfilesystemencoding()
+        if encoding is None:
+            encoding = 'utf-8'
+        return s.encode(encoding, 'ignore')


 class ExtractorError(Exception):
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,2 +1,2 @@

-__version__ = '2013.01.13'
+__version__ = '2013.02.02'
Author	SHA1	Message	Date
Philipp Hagemeister	085c8b75a6	release 2013.02.02	2013-02-02 14:45:38 +01:00
Philipp Hagemeister	dbf2ba3d61	Better help for new options	2013-02-02 14:44:22 +01:00
Philipp Hagemeister	b47bbac393	Disable Stanford OC test for now, and enable escapist	2013-02-02 14:40:41 +01:00
Philipp Hagemeister	229cac754a	Improve cookie error handling	2013-02-02 13:51:54 +01:00
Philipp Hagemeister	0e33684194	Switch to m4a by default (Closes #240 )	2013-02-01 18:23:20 +01:00
Jeff Crouse	9e982f9e4e	Added "min-filesize" and "max-filesize" options	2013-02-01 18:09:34 +01:00
Philipp Hagemeister	c7a725cfad	Merge remote-tracking branch 'dcoppa/master'	2013-02-01 18:05:42 +01:00
Philipp Hagemeister	450a30cae8	Add PyPi upload to release script	2013-02-01 18:01:53 +01:00
Philipp Hagemeister	9cd5e4fce8	release 2013.02.01	2013-02-01 17:57:32 +01:00
Philipp Hagemeister	edba5137b8	Fix Facebook IE	2013-02-01 17:56:22 +01:00
Philipp Hagemeister	233a22960a	Switch ComedyCentral test to a permanent URL (They delete full episodes older than a month)	2013-02-01 17:46:03 +01:00
Philipp Hagemeister	3b024e17af	Work around buggy HTML Parser in Python < 2.7.3 (Closes #662 )	2013-02-01 17:29:50 +01:00
David Coppa	a32b573ccb	Try setuptools first, then fallback to distutils.core	2013-01-30 15:31:38 +01:00
Philipp Hagemeister	ec71c13ab8	release 2013.01.28	2013-01-27 18:33:58 +01:00
Philipp Hagemeister	f0bad2b026	Fix Stanford (Closes #653 )	2013-01-27 15:23:26 +01:00
Philipp Hagemeister	25580f3251	8tracks: Ignore hashes	2013-01-27 04:15:12 +01:00
Philipp Hagemeister	da4de959df	8tracks: Better default titles	2013-01-27 04:05:53 +01:00
Philipp Hagemeister	d0d51a8afa	8tracks: Include performer as uploader	2013-01-27 03:27:46 +01:00
Philipp Hagemeister	c67598c3e1	Remove space before shebang	2013-01-27 03:07:07 +01:00
Philipp Hagemeister	811d253bc2	Merge remote-tracking branch 'jaimeMF/makefilePythonversion'	2013-01-27 03:06:32 +01:00
Philipp Hagemeister	c3a1642ead	release 2013.01.27	2013-01-27 03:03:02 +01:00
Philipp Hagemeister	ccf65f9dee	8tracks IE (Closes #652 )	2013-01-27 03:01:23 +01:00
Philipp Hagemeister	b954070d70	Fix Facebook (Closes #375 )	2013-01-25 16:54:48 +01:00
Philipp Hagemeister	30e9f4496b	Drop md5: spec for now (unused and breaks int values)	2013-01-25 16:54:25 +01:00
Jaime Marquínez Ferrándiz	271d3fbdaa	Option in makefile to select python interpreter	2013-01-25 15:11:03 +01:00
Philipp Hagemeister	6df40dcbe0	Guard against sys.getfilesystemencoding() == None (#503 )	2013-01-20 01:48:05 +01:00
Philipp Hagemeister	97f194c1fb	twitch.tv: Use id as title if no title is present (Closes #638 )	2013-01-16 09:55:45 +01:00
Philipp Hagemeister	4da769ccca	Do not backup version.py (under version control and frankly, not that complex)	2013-01-12 23:04:46 +01:00
Philipp Hagemeister	253d96f2e2	Force build removal	2013-01-12 22:25:54 +01:00