release 2013.01.08

Merge pull request #621 from atomizer/master
justin.tv tweaks
2013-01-08 10:29:53 +01:00 · 2013-01-08 00:57:46 -08:00 · 2013-01-07 17:03:58 +04:00 · 2013-01-07 16:59:39 +04:00 · 2013-01-07 12:48:07 +01:00 · 2013-01-06 23:38:36 +01:00
9 changed files with 122 additions and 83 deletions
--- a/.tarignore
+++ b/.tarignore
@@ -1,17 +0,0 @@
-updates_key.pem
-*.pyc
-*.pyo
-youtube-dl.exe
-wine-py2exe/
-py2exe.log
-*.kate-swp
-build/
-dist/
-MANIFEST
-*.DS_Store
-youtube-dl.tar.gz
-.coverage
-cover/
-__pycache__/
-.git/
-*~
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,7 +8,7 @@ notifications:
  email:
    - filippo.valsorda@gmail.com
    - phihag@phihag.de
-  irc:
-    channels:
-      - "irc.freenode.org#youtube-dl"
-    skip_join: true
+#  irc:
+#    channels:
+#      - "irc.freenode.org#youtube-dl"
+#    skip_join: true
--- a/19
+++ b/19
@@ -20,7 +20,9 @@ test:
 	#nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
 	nosetests --verbose test

-.PHONY: all clean install test
+tar: youtube-dl.tar.gz
+
+.PHONY: all clean install test tar

 youtube-dl: youtube_dl/*.py
 	zip --quiet youtube-dl youtube_dl/*.py
@@ -43,5 +45,16 @@ youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in
 	python devscripts/bash-completion.py

 youtube-dl.tar.gz: all
-	tar -cvzf youtube-dl.tar.gz -s "|^./|./youtube-dl/|" \
-		--exclude-from=".tarignore" -- .
+	@tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
+		--exclude '*.DS_Store' \
+		--exclude '*.kate-swp' \
+		--exclude '*.pyc' \
+		--exclude '*.pyo' \
+		--exclude '*~' \
+		--exclude '__pycache' \
+		--exclude '.git' \
+		-- \
+		bin devscripts test youtube_dl \
+		CHANGELOG LICENSE README.md README.txt \
+		MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \
+		youtube-dl
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -1,13 +1,17 @@
-#!/bin/sh
+#!/bin/bash

 # IMPORTANT: the following assumptions are made
-# * you did --set-upstream
+# * the GH repo is on the origin remote
 # * the gh-pages branch is named so locally
 # * the git config user.signingkey is properly set

 # You will need
 # pip install coverage nose rsa

+# TODO
+# release notes
+# make hash on local files
+
 set -e

 if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
@@ -34,7 +38,9 @@ git show "$version"
 read -p "Is it good, can I push? (y/n) " -n 1
 if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
 echo
-git push
+MASTER=$(git rev-parse --abbrev-ref HEAD)
+git push origin $MASTER:master
+git push origin "$version"

 echo "\n### OK, now it is time to build the binaries..."
 REV=$(git rev-parse HEAD)
@@ -44,15 +50,19 @@ wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=
 mkdir -p "update_staging/$version"
 mv youtube-dl youtube-dl.exe "update_staging/$version"
 mv youtube-dl.tar.gz "update_staging/$version/youtube-dl-$version.tar.gz"
+RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
+(cd update_staging/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
+(cd update_staging/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
+(cd update_staging/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
+(cd update_staging/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 git checkout HEAD -- youtube-dl youtube-dl.exe

 echo "\n### Signing and uploading the new binaries to youtube-dl.org..."
-for f in update_staging/$version/*; do gpg --detach-sig "$f"; done
+for f in $RELEASE_FILES; do gpg --detach-sig "update_staging/$version/$f"; done
 scp -r "update_staging/$version" ytdl@youtube-dl.org:html/downloads/
 rm -r update_staging

 echo "\n### Now switching to gh-pages..."
-MASTER=$(git rev-parse --abbrev-ref HEAD)
 git checkout gh-pages
 git checkout "$MASTER" -- devscripts/gh-pages/
 git reset devscripts/gh-pages/
@@ -60,15 +70,14 @@ devscripts/gh-pages/add-version.py $version
 devscripts/gh-pages/sign-versions.py < updates_key.pem
 devscripts/gh-pages/generate-download.py
 devscripts/gh-pages/update-copyright.py
-rm -r test_coverage
-mv cover test_coverage
-git add *.html *.html.in update test_coverage
+git add *.html *.html.in update
 git commit -m "release $version"
 git show HEAD
 read -p "Is it good, can I push? (y/n) " -n 1
 if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
 echo
-git push
+git push origin gh-pages

 echo "\n### DONE!"
+rm -r devscripts
 git checkout $MASTER
--- a/test/tests.json
+++ b/test/tests.json
@@ -160,5 +160,39 @@
    "params": {
      "skip_download": true
    }
+  },
+  {
+    "name": "ComedyCentral",
+    "url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart",
+    "playlist": [
+      {
+        "file": "422204.mp4",
+        "md5": "7a7abe068b31ff03e7b8a37596e72380",
+        "info_dict": {
+            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1"
+        }
+      },
+      {
+        "file": "422205.mp4",
+        "md5": "30552b7274c94dbb933f64600eadddd2",
+        "info_dict": {
+            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2"
+        }
+      },
+      {
+        "file": "422206.mp4",
+        "md5": "1f4c0664b352cb8e8fe85d5da4fbee91",
+        "info_dict": {
+            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3"
+        }
+      },
+      {
+        "file": "422207.mp4",
+        "md5": "f61ee8a4e6bd1308438e03badad78554",
+        "info_dict": {
+            "title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4"
+        }
+      }
+    ]
  }
 ]
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -216,12 +216,15 @@ class FileDownloader(object):
        Depending on if the downloader has been configured to ignore
        download errors or not, this method may throw an exception or
        not when errors are found, after printing the message.
+
+        tb, if given, is additional traceback information.
        """
        if message is not None:
            self.to_stderr(message)
        if self.params.get('verbose'):
            if tb is None:
-                tb = u''.join(traceback.format_list(traceback.extract_stack()))
+                tb_data = traceback.format_list(traceback.extract_stack())
+                tb = u''.join(tb_data)
            self.to_stderr(tb)
        if not self.params.get('ignoreerrors', False):
            raise DownloadError(message)
@@ -497,7 +500,7 @@ class FileDownloader(object):
                try:
                    videos = ie.extract(url)
                except ExtractorError as de: # An error we somewhat expected
-                    self.trouble(u'ERROR: ' + compat_str(de), compat_str(u''.join(traceback.format_tb(de.traceback))))
+                    self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
                    break
                except Exception as e:
                    if self.params.get('ignoreerrors', False):
@@ -534,7 +537,7 @@ class FileDownloader(object):
            if info is None:
                break

-    def _download_with_rtmpdump(self, filename, url, player_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url):
        self.report_destination(filename)
        tmpfilename = self.temp_name(filename)

@@ -548,7 +551,11 @@ class FileDownloader(object):
        # Download using rtmpdump. rtmpdump returns exit code 2 when
        # the connection was interrumpted and resuming appears to be
        # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
+        basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
+        if player_url is not None:
+            basic_args += ['-W', player_url]
+        if page_url is not None:
+            basic_args += ['--pageUrl', page_url]
        args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
        if self.params.get('verbose', False):
            try:
@@ -581,7 +588,6 @@ class FileDownloader(object):

    def _do_download(self, filename, info_dict):
        url = info_dict['url']
-        player_url = info_dict.get('player_url', None)

        # Check file already present
        if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
@@ -590,7 +596,9 @@ class FileDownloader(object):

        # Attempt to download using rtmpdump
        if url.startswith('rtmp'):
-            return self._download_with_rtmpdump(filename, url, player_url)
+            return self._download_with_rtmpdump(filename, url,
+                                                info_dict.get('player_url', None),
+                                                info_dict.get('page_url', None))

        tmpfilename = self.temp_name(filename)
        stream = None
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -117,7 +117,7 @@ class InfoExtractor(object):
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            if errnote is None:
                errnote = u'Unable to download webpage'
-            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)))
+            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])


 class YoutubeIE(InfoExtractor):
@@ -2333,7 +2333,6 @@ class ComedyCentralIE(InfoExtractor):
                              (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                              |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
                     $"""
-    IE_NAME = u'comedycentral'

    _available_formats = ['3500', '2200', '1700', '1200', '750', '400']

@@ -2361,16 +2360,12 @@ class ComedyCentralIE(InfoExtractor):
    def report_extraction(self, episode_id):
        self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)

-    def report_config_download(self, episode_id):
-        self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
+    def report_config_download(self, episode_id, media_id):
+        self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration for %s' % (episode_id, media_id))

    def report_index_download(self, episode_id):
        self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)

-    def report_player_url(self, episode_id):
-        self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
-
-
    def _print_formats(self, formats):
        print('Available formats:')
        for x in formats:
@@ -2409,6 +2404,7 @@ class ComedyCentralIE(InfoExtractor):
        try:
            htmlHandle = compat_urllib_request.urlopen(req)
            html = htmlHandle.read()
+            webpage = html.decode('utf-8')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
            return
@@ -2423,29 +2419,20 @@ class ComedyCentralIE(InfoExtractor):
                return
            epTitle = mobj.group('episode')

-        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', html)
+        mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)

        if len(mMovieParams) == 0:
            # The Colbert Report embeds the information in a without
            # a URL prefix; so extract the alternate reference
            # and then add the URL prefix manually.

-            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', html)
+            altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
            if len(altMovieParams) == 0:
                self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
                return
            else:
                mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]

-        playerUrl_raw = mMovieParams[0][0]
-        self.report_player_url(epTitle)
-        try:
-            urlHandle = compat_urllib_request.urlopen(playerUrl_raw)
-            playerUrl = urlHandle.geturl()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: unable to find out player URL: ' + compat_str(err))
-            return
-
        uri = mMovieParams[0][1]
        indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
        self.report_index_download(epTitle)
@@ -2459,7 +2446,7 @@ class ComedyCentralIE(InfoExtractor):

        idoc = xml.etree.ElementTree.fromstring(indexXml)
        itemEls = idoc.findall('.//item')
-        for itemEl in itemEls:
+        for partNum,itemEl in enumerate(itemEls):
            mediaId = itemEl.findall('./guid')[0].text
            shortMediaId = mediaId.split(':')[-1]
            showId = mediaId.split(':')[-2].replace('.com', '')
@@ -2469,7 +2456,7 @@ class ComedyCentralIE(InfoExtractor):
            configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                        compat_urllib_parse.urlencode({'uri': mediaId}))
            configReq = compat_urllib_request.Request(configUrl)
-            self.report_config_download(epTitle)
+            self.report_config_download(epTitle, shortMediaId)
            try:
                configXml = compat_urllib_request.urlopen(configReq).read()
            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -2491,7 +2478,7 @@ class ComedyCentralIE(InfoExtractor):
                return

            # For now, just pick the highest bitrate
-            format,video_url = turls[-1]
+            format,rtmp_video_url = turls[-1]

            # Get the format arg from the arg stream
            req_format = self._downloader.params.get('format', None)
@@ -2499,18 +2486,16 @@ class ComedyCentralIE(InfoExtractor):
            # Select format if we can find one
            for f,v in turls:
                if f == req_format:
-                    format, video_url = f, v
+                    format, rtmp_video_url = f, v
                    break

-            # Patch to download from alternative CDN, which does not
-            # break on current RTMPDump builds
-            broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
-            better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
+            m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+            if not m:
+                raise ExtractorError(u'Cannot transform RTMP url')
+            base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
+            video_url = base + m.group('finalid')

-            if video_url.startswith(broken_cdn):
-                video_url = video_url.replace(broken_cdn, better_cdn)
-
-            effTitle = showId + u'-' + epTitle
+            effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1)
            info = {
                'id': shortMediaId,
                'url': video_url,
@@ -2521,9 +2506,7 @@ class ComedyCentralIE(InfoExtractor):
                'format': format,
                'thumbnail': None,
                'description': officialTitle,
-                'player_url': None #playerUrl
            }
-
            results.append(info)

        return results
@@ -2603,7 +2586,6 @@ class EscapistIE(InfoExtractor):

        return [info]

-
 class CollegeHumorIE(InfoExtractor):
    """Information extractor for collegehumor.com"""

@@ -3542,17 +3524,23 @@ class JustinTVIE(InfoExtractor):
            return

        response = json.loads(webpage)
+        if type(response) != list:
+            error_text = response.get('error', 'unknown error')
+            self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text)
+            return
        info = []
        for clip in response:
            video_url = clip['video_file_url']
            if video_url:
                video_extension = os.path.splitext(video_url)[1][1:]
-                video_date = re.sub('-', '', clip['created_on'][:10])
+                video_date = re.sub('-', '', clip['start_time'][:10])
+                video_uploader_id = clip.get('user_id', clip.get('channel_id'))
                info.append({
                    'id': clip['id'],
                    'url': video_url,
                    'title': clip['title'],
-                    'uploader': clip.get('user_id', clip.get('channel_id')),
+                    'uploader': clip.get('channel_name', video_uploader_id),
+                    'uploader_id': video_uploader_id,
                    'upload_date': video_date,
                    'ext': video_extension,
                })
@@ -3571,7 +3559,7 @@ class JustinTVIE(InfoExtractor):
            paged = True
            api += '/channel/archives/%s.json'
        else:
-            api += '/clip/show/%s.json'
+            api += '/broadcast/by_archive/%s.json'
        api = api % (video_id,)

        self.report_extraction(video_id)
@@ -3694,8 +3682,8 @@ class SteamIE(InfoExtractor):
        videourl = 'http://store.steampowered.com/video/%s/' % gameID
        webpage = self._download_webpage(videourl, gameID)
        mweb = re.finditer(urlRE, webpage)
-        namesRE = r'<span class=\"title\">(?P<videoName>[\w:/\.\?=\+\s-]+)</span>'
-        titles = list(re.finditer(namesRE, webpage))
+        namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
+        titles = re.finditer(namesRE, webpage)
        videos = []
        for vid,vtitle in zip(mweb,titles):
            video_id = vid.group('videoID')
@@ -3707,7 +3695,7 @@ class SteamIE(InfoExtractor):
                'id':video_id,
                'url':video_url,
                'ext': 'flv',
-                'title': title
+                'title': unescapeHTML(title)
                  }
            videos.append(info)
        return videos
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -8,6 +8,7 @@ import locale
 import os
 import re
 import sys
+import traceback
 import zlib
 import email.utils
 import json
@@ -414,12 +415,15 @@ def encodeFilename(s):
 class ExtractorError(Exception):
    """Error during info extraction."""
    def __init__(self, msg, tb=None):
-        """ tb is the original traceback (so that it can be printed out) """
+        """ tb, if given, is the original traceback (so that it can be printed out). """
        super(ExtractorError, self).__init__(msg)
-        if tb is None:
-            tb = sys.exc_info()[2]
        self.traceback = tb

+    def format_traceback(self):
+        if self.traceback is None:
+            return None
+        return u''.join(traceback.format_tb(self.traceback))
+

 class DownloadError(Exception):
    """Download Error exception.
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@

-__version__ = '2013.01.02'
+__version__ = '2013.01.08'
Author	SHA1	Message	Date
Philipp Hagemeister	74fdba620d	release 2013.01.08	2013-01-08 10:29:53 +01:00
Philipp Hagemeister	dc1c479a6f	Merge pull request #621 from atomizer/master justin.tv tweaks	2013-01-08 00:57:46 -08:00
atomizer	119d536e07	Merge branch 'my-origin/master'	2013-01-07 17:03:58 +04:00
atomizer	fa1bf9c653	justin.tv tweaks - download all parts of a broadcast, fixes #614 - set "uploader" variable to channel_name if available - catch api errors even if http status is 200	2013-01-07 16:59:39 +04:00
Philipp Hagemeister	814eed0ea1	Fix tar target (--exclude-vcs is not supported everywhere, and reading . while writing to it can fail randomly)	2013-01-07 12:48:07 +01:00
Philipp Hagemeister	0aa3068e9e	Do not check in test_coverage	2013-01-06 23:38:36 +01:00
Philipp Hagemeister	db2d6124b1	correct quoting	2013-01-06 23:14:56 +01:00
Philipp Hagemeister	039dc61bd2	Simplify Makefile	2013-01-06 23:02:31 +01:00
Philipp Hagemeister	4b879984ea	release 2013.01.06	2013-01-06 22:52:04 +01:00
Philipp Hagemeister	55e286ba55	read -n is bash-specific	2013-01-06 22:50:20 +01:00
Philipp Hagemeister	9314810243	fix ComedyCentral IE in Python3	2013-01-06 21:36:01 +01:00
Philipp Hagemeister	7717ae19fa	Add tests for ComedyCentral IE	2013-01-06 21:35:20 +01:00
Philipp Hagemeister	32635ec685	Switch comedycentral IE to http downloads	2013-01-06 21:26:31 +01:00
Philipp Hagemeister	20759b340a	Disable travis irc notifications travis is much to verbose for that, with random IEs constantly failing	2013-01-04 00:34:02 +01:00
Philipp Hagemeister	8e5f761870	Merge pull request #617 from jaimeMF/steamIE [steamIE]Allow downloading videos with other characters in their titles	2013-01-03 15:16:27 -08:00
Jaime Marquínez Ferrándiz	26714799c9	steamIE remove the HTMLparser object	2013-01-03 23:56:02 +01:00
Jaime Marquínez Ferrándiz	5e9d042d8f	steamIE follow @phihag suggestions	2013-01-03 23:51:48 +01:00
Jaime Marquínez Ferrándiz	9cf98a2bcc	Allow downloading videos with other characters in their titles Especially html entities	2013-01-03 21:17:35 +01:00
Philipp Hagemeister	f5ebb61495	Support page URL in RTMP downloads	2013-01-03 20:26:38 +01:00
Philipp Hagemeister	431d88dd31	Also generate SHA2-256	2013-01-03 19:49:06 +01:00
Philipp Hagemeister	876f1a86af	Also publish hashsums	2013-01-03 19:18:55 +01:00
Philipp Hagemeister	01951dda7a	Make ExtractorError usable for other causes	2013-01-03 15:39:55 +01:00
Filippo Valsorda	6e3dba168b	release.sh edits based on 2013.01.02 experience	2013-01-02 23:40:24 +01:00