Compare commits
12 Commits
2013.07.19
...
2013.07.23
Author | SHA1 | Date | |
---|---|---|---|
|
a7af0ebaf5 | ||
|
67ae7b4760 | ||
|
de48addae2 | ||
|
ddbfd0f0c5 | ||
|
d7ae0639b4 | ||
|
0382435990 | ||
|
b390d85d95 | ||
|
8e5e059d7d | ||
|
2b1b511f6b | ||
|
233ad24ecf | ||
|
c4949c50f9 | ||
|
b6ef402905 |
@@ -9,6 +9,7 @@ notifications:
|
||||
- filippo.valsorda@gmail.com
|
||||
- phihag@phihag.de
|
||||
- jaime.marquinez.ferrandiz+travis@gmail.com
|
||||
- yasoob.khld@gmail.com
|
||||
# irc:
|
||||
# channels:
|
||||
# - "irc.freenode.org#youtube-dl"
|
||||
|
@@ -19,6 +19,7 @@ from .dreisat import DreiSatIE
|
||||
from .ehow import EHowIE
|
||||
from .eighttracks import EightTracksIE
|
||||
from .escapist import EscapistIE
|
||||
from .exfm import ExfmIE
|
||||
from .facebook import FacebookIE
|
||||
from .flickr import FlickrIE
|
||||
from .freesound import FreesoundIE
|
||||
@@ -91,6 +92,7 @@ from .youtube import (
|
||||
YoutubeChannelIE,
|
||||
YoutubeShowIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeRecommendedIE,
|
||||
)
|
||||
from .zdf import ZDFIE
|
||||
|
||||
|
@@ -1,6 +1,8 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
@@ -17,17 +19,20 @@ class BreakIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1).split("-")[-1]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = re.search(r"videoPath: '(.+?)',",webpage).group(1)
|
||||
key = re.search(r"icon: '(.+?)',",webpage).group(1)
|
||||
final_url = str(video_url)+"?"+str(key)
|
||||
thumbnail_url = re.search(r"thumbnailURL: '(.+?)'",webpage).group(1)
|
||||
title = re.search(r"sVidTitle: '(.+)',",webpage).group(1)
|
||||
ext = video_url.split('.')[-1]
|
||||
embed_url = 'http://www.break.com/embed/%s' % video_id
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
|
||||
u'info json', flags=re.DOTALL)
|
||||
info = json.loads(info_json)
|
||||
video_url = info['videoUri']
|
||||
m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
|
||||
if m_youtube is not None:
|
||||
return self.url_result(m_youtube.group(1), 'Youtube')
|
||||
final_url = video_url + '?' + info['AuthToken']
|
||||
return [{
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'ext': ext,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail_url,
|
||||
'ext': determine_ext(final_url),
|
||||
'title': info['contentName'],
|
||||
'thumbnail': info['thumbUri'],
|
||||
}]
|
||||
|
@@ -1,26 +1,26 @@
|
||||
import re
|
||||
import socket
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_request,
|
||||
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class CollegeHumorIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
|
||||
|
||||
def report_manifest(self, video_id):
|
||||
"""Report information extraction."""
|
||||
self.to_screen(u'%s: Downloading XML manifest' % video_id)
|
||||
_TEST = {
|
||||
u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||
u'file': u'6902724.mp4',
|
||||
u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
|
||||
u'info_dict': {
|
||||
u'title': u'Comic-Con Cosplay Catastrophe',
|
||||
u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -36,10 +36,9 @@ class CollegeHumorIE(InfoExtractor):
|
||||
|
||||
self.report_extraction(video_id)
|
||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||
try:
|
||||
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
||||
u'Downloading info XML',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
||||
try:
|
||||
@@ -52,11 +51,9 @@ class CollegeHumorIE(InfoExtractor):
|
||||
raise ExtractorError(u'Invalid metadata XML file')
|
||||
|
||||
manifest_url += '?hdcore=2.10.3'
|
||||
self.report_manifest(video_id)
|
||||
try:
|
||||
manifestXml = compat_urllib_request.urlopen(manifest_url).read()
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
||||
u'Downloading XML manifest',
|
||||
u'Unable to download video info XML')
|
||||
|
||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
||||
try:
|
||||
@@ -66,9 +63,8 @@ class CollegeHumorIE(InfoExtractor):
|
||||
except IndexError as err:
|
||||
raise ExtractorError(u'Invalid manifest file')
|
||||
|
||||
url_pr = compat_urllib_parse_urlparse(manifest_url)
|
||||
url = url_pr.scheme + '://' + url_pr.netloc + '/z' + video_id[:-2] + '/' + node_id + 'Seg1-Frag1'
|
||||
url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
|
||||
|
||||
info['url'] = url
|
||||
info['ext'] = 'f4f'
|
||||
info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
|
||||
info['ext'] = 'mp4'
|
||||
return [info]
|
||||
|
@@ -24,7 +24,9 @@ class ComedyCentralIE(InfoExtractor):
|
||||
(full-episodes/(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))|
|
||||
(?P<interview>
|
||||
extended-interviews/(?P<interID>[0-9]+)/playlist_tds_extended_(?P<interview_title>.*?)/.*?)))
|
||||
$"""
|
||||
_TEST = {
|
||||
u'url': u'http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart',
|
||||
@@ -87,6 +89,9 @@ class ComedyCentralIE(InfoExtractor):
|
||||
else:
|
||||
epTitle = mobj.group('cntitle')
|
||||
dlNewest = False
|
||||
elif mobj.group('interview'):
|
||||
epTitle = mobj.group('interview_title')
|
||||
dlNewest = False
|
||||
else:
|
||||
dlNewest = not mobj.group('episode')
|
||||
if dlNewest:
|
||||
|
42
youtube_dl/extractor/exfm.py
Normal file
42
youtube_dl/extractor/exfm.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ExfmIE(InfoExtractor):
|
||||
IE_NAME = u'exfm'
|
||||
IE_DESC = u'ex.fm'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?ex\.fm/song/([^/]+)'
|
||||
_SOUNDCLOUD_URL_ = r'(?:http://)?(?:www\.)?api\.soundcloud.com/tracks/([^/]+)/stream'
|
||||
_TEST = {
|
||||
u'url': u'http://ex.fm/song/1bgtzg',
|
||||
u'file': u'1bgtzg.mp3',
|
||||
u'md5': u'8a7967a3fef10e59a1d6f86240fd41cf',
|
||||
u'info_dict': {
|
||||
u"title": u"We Can't Stop",
|
||||
u"uploader": u"Miley Cyrus",
|
||||
u'thumbnail': u'http://i1.sndcdn.com/artworks-000049666230-w9i7ef-t500x500.jpg?9d68d37'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
song_id = mobj.group(1)
|
||||
info_url = "http://ex.fm/api/v3/song/%s" %(song_id)
|
||||
webpage = self._download_webpage(info_url, song_id)
|
||||
info = json.loads(webpage)
|
||||
song_url = re.match(self._SOUNDCLOUD_URL_,info['song']['url'])
|
||||
if song_url is not None:
|
||||
song_url = song_url.group() + "?client_id=b45b1aa10f1ac2941910a7f0d10f8e28"
|
||||
else:
|
||||
song_url = info['song']['url']
|
||||
return [{
|
||||
'id': song_id,
|
||||
'url': song_url,
|
||||
'ext': 'mp3',
|
||||
'title': info['song']['title'],
|
||||
'thumbnail': info['song']['image']['large'],
|
||||
'uploader': info['song']['artist'],
|
||||
'view_count': info['song']['loved_count'],
|
||||
}]
|
@@ -898,12 +898,12 @@ class YoutubeShowIE(InfoExtractor):
|
||||
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
|
||||
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubeIE):
|
||||
"""It's a subclass of YoutubeIE because we need to login"""
|
||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||
IE_NAME = u'youtube:subscriptions'
|
||||
_FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
|
||||
class YoutubeFeedsInfoExtractor(YoutubeIE):
|
||||
"""
|
||||
Base class for extractors that fetch info from
|
||||
http://www.youtube.com/feed_ajax
|
||||
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
|
||||
"""
|
||||
_PAGING_STEP = 30
|
||||
|
||||
# Overwrite YoutubeIE properties we don't want
|
||||
@@ -912,18 +912,27 @@ class YoutubeSubscriptionsIE(YoutubeIE):
|
||||
def suitable(cls, url):
|
||||
return re.match(cls._VALID_URL, url) is not None
|
||||
|
||||
@property
|
||||
def _FEED_TEMPLATE(self):
|
||||
return 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=%s&paging=%%s' % self._FEED_NAME
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return u'youtube:%s' % self._FEED_NAME
|
||||
|
||||
def _real_initialize(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
|
||||
super(YoutubeSubscriptionsIE, self)._real_initialize()
|
||||
super(YoutubeFeedsInfoExtractor, self)._real_initialize()
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_entries = []
|
||||
# The step argument is available only in 2.7 or higher
|
||||
for i in itertools.count(0):
|
||||
paging = i*self._PAGING_STEP
|
||||
info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
|
||||
info = self._download_webpage(self._FEED_TEMPLATE % paging,
|
||||
u'%s feed' % self._FEED_NAME,
|
||||
u'Downloading page %s' % i)
|
||||
info = json.loads(info)
|
||||
feed_html = info['feed_html']
|
||||
@@ -932,4 +941,16 @@ class YoutubeSubscriptionsIE(YoutubeIE):
|
||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
||||
if info['paging'] is None:
|
||||
break
|
||||
return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')
|
||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||
|
||||
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
|
||||
_FEED_NAME = 'subscriptions'
|
||||
_PLAYLIST_TITLE = u'Youtube Subscriptions'
|
||||
|
||||
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
|
||||
IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
|
||||
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
|
||||
_FEED_NAME = 'recommended'
|
||||
_PLAYLIST_TITLE = u'Youtube Recommended videos'
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.07.19'
|
||||
__version__ = '2013.07.23'
|
||||
|
Reference in New Issue
Block a user