Compare commits
88 Commits
2014.08.21
...
2014.08.24
Author | SHA1 | Date | |
---|---|---|---|
|
a71d1414eb | ||
|
423817c468 | ||
|
51ed9fce09 | ||
|
d43aeb1d00 | ||
|
4d805e063c | ||
|
24e5e24166 | ||
|
4d54ef20a2 | ||
|
54036b3991 | ||
|
e5402ac120 | ||
|
f56f8399c7 | ||
|
cf0c5fa3a1 | ||
|
8c2ccefae6 | ||
|
1f8b6af773 | ||
|
8f9b683eeb | ||
|
b5f4775b38 | ||
|
01d906ffe9 | ||
|
614582bcc4 | ||
|
e1ab5000b2 | ||
|
a5ed3e571e | ||
|
10eaeb20c5 | ||
|
fa8deaf38b | ||
|
6857590059 | ||
|
a3db22ebdf | ||
|
c8e9a235d9 | ||
|
30b871b0ca | ||
|
eb9da9b732 | ||
|
d769be6c96 | ||
|
a54bda3ae2 | ||
|
00558d9414 | ||
|
49f3c16543 | ||
|
2ef6fcb5d8 | ||
|
38fc045253 | ||
|
af1fd929c6 | ||
|
b7b04c9234 | ||
|
bc0bb6fd30 | ||
|
430826c9d4 | ||
|
68909f0c4e | ||
|
9d048a17d8 | ||
|
492641d10a | ||
|
2b9faf5542 | ||
|
ed2d6a1960 | ||
|
be843678b1 | ||
|
c71dfccc98 | ||
|
1a9ccac7c1 | ||
|
e330d59abb | ||
|
394df6d7d0 | ||
|
218f754940 | ||
|
a053c3493a | ||
|
50b294aab8 | ||
|
756b046f3e | ||
|
388ac0b18a | ||
|
ad06434bd3 | ||
|
bd9820c937 | ||
|
deda8ac376 | ||
|
e05f693942 | ||
|
b27295d2ab | ||
|
ace52c5713 | ||
|
e62e150f64 | ||
|
c44c0a775d | ||
|
5fcf2dbed0 | ||
|
91dff03217 | ||
|
a200f4cee2 | ||
|
ea6e8d5454 | ||
|
83d35817f5 | ||
|
76beff70a8 | ||
|
61882bf7c6 | ||
|
cab317a680 | ||
|
73159f99cc | ||
|
c15235cd07 | ||
|
12c3ec3382 | ||
|
55db73efdf | ||
|
af40ac054a | ||
|
a36819731b | ||
|
181c8655c7 | ||
|
3b95347bb6 | ||
|
1ce464aba9 | ||
|
6994e70651 | ||
|
c3f0b12b0f | ||
|
27ace98f51 | ||
|
a00d73c8c8 | ||
|
7e660ac113 | ||
|
37e3cbe22e | ||
|
610134730a | ||
|
212a5e28ba | ||
|
ee1a7032d5 | ||
|
7ed806d241 | ||
|
dd06c95e43 | ||
|
3442b30ab2 |
102
README.md
102
README.md
@@ -17,6 +17,14 @@ If you do not have curl, you can alternatively use a recent wget:
|
||||
|
||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||
|
||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
||||
|
||||
brew install youtube-dl
|
||||
|
||||
You can also use pip:
|
||||
|
||||
sudo pip install youtube-dl
|
||||
|
||||
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
||||
|
||||
# DESCRIPTION
|
||||
@@ -303,10 +311,12 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
|
||||
|
||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.mp4 # A simple file name
|
||||
```bash
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.mp4 # A simple file name
|
||||
```
|
||||
|
||||
# VIDEO SELECTION
|
||||
|
||||
@@ -317,14 +327,16 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
|
||||
|
||||
Examples:
|
||||
|
||||
# Download only the videos uploaded in the last 6 months
|
||||
$ youtube-dl --dateafter now-6months
|
||||
```bash
|
||||
# Download only the videos uploaded in the last 6 months
|
||||
$ youtube-dl --dateafter now-6months
|
||||
|
||||
# Download only the videos uploaded on January 1, 1970
|
||||
$ youtube-dl --date 19700101
|
||||
# Download only the videos uploaded on January 1, 1970
|
||||
$ youtube-dl --date 19700101
|
||||
|
||||
$ # will only download the videos uploaded in the 200x decade
|
||||
$ youtube-dl --dateafter 20000101 --datebefore 20091231
|
||||
$ # will only download the videos uploaded in the 200x decade
|
||||
$ youtube-dl --dateafter 20000101 --datebefore 20091231
|
||||
```
|
||||
|
||||
# FAQ
|
||||
|
||||
@@ -399,49 +411,49 @@ If you want to add support for a new site, you can follow this quick list (assum
|
||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||
```python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class YourExtractorIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://yourextractor.com/watch/42',
|
||||
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video title goes here',
|
||||
# TODO more properties, either as:
|
||||
# * A value
|
||||
# * MD5 checksum; start the string with md5:
|
||||
# * A regular expression; start the string with re:
|
||||
# * Any Python type (for example int or float)
|
||||
}
|
||||
|
||||
class YourExtractorIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://yourextractor.com/watch/42',
|
||||
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
|
||||
'info_dict': {
|
||||
'id': '42',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video title goes here',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
# TODO more properties, either as:
|
||||
# * A value
|
||||
# * MD5 checksum; start the string with md5:
|
||||
# * A regular expression; start the string with re:
|
||||
# * Any Python type (for example int or float)
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||
}
|
||||
```
|
||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
|
||||
|
@@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
'138', '137', '248', '136', '247', '135', '246',
|
||||
'245', '244', '134', '243', '133', '242', '160',
|
||||
# Dash audio
|
||||
'141', '172', '140', '139', '171',
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
|
||||
for f1id, f2id in zip(order, order[1:]):
|
||||
|
@@ -480,7 +480,10 @@ class YoutubeDL(object):
|
||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||
age_limit = self.params.get('age_limit')
|
||||
if age_limit is not None:
|
||||
if age_limit < info_dict.get('age_limit', 0):
|
||||
actual_age_limit = info_dict.get('age_limit')
|
||||
if actual_age_limit is None:
|
||||
actual_age_limit = 0
|
||||
if age_limit < actual_age_limit:
|
||||
return 'Skipping "' + title + '" because it is age restricted'
|
||||
if self.in_download_archive(info_dict):
|
||||
return '%s has already been recorded in archive' % video_title
|
||||
|
@@ -70,6 +70,8 @@ __authors__ = (
|
||||
'David Fabijan',
|
||||
'Sebastian Haas',
|
||||
'Alexander Kirk',
|
||||
'Erik Johnson',
|
||||
'Keith Beckman',
|
||||
)
|
||||
|
||||
__license__ = 'Public Domain'
|
||||
|
@@ -27,8 +27,16 @@ class HttpFD(FileDownloader):
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
if 'http_referer' in info_dict:
|
||||
headers['Referer'] = info_dict['http_referer']
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
data = info_dict.get('http_post_data')
|
||||
http_method = info_dict.get('http_method')
|
||||
basic_request = compat_urllib_request.Request(url, data, headers)
|
||||
request = compat_urllib_request.Request(url, data, headers)
|
||||
if http_method is not None:
|
||||
basic_request.get_method = lambda: http_method
|
||||
request.get_method = lambda: http_method
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
|
||||
|
@@ -69,6 +69,7 @@ from .dfb import DFBIE
|
||||
from .dotsub import DotsubIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drtv import DRTVIE
|
||||
from .dump import DumpIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .divxstage import DivxStageIE
|
||||
@@ -87,6 +88,7 @@ from .engadget import EngadgetIE
|
||||
from .escapist import EscapistIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
@@ -125,6 +127,7 @@ from .googleplus import GooglePlusIE
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .gorillavid import GorillaVidIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .grooveshark import GroovesharkIE
|
||||
from .hark import HarkIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
@@ -182,6 +185,7 @@ from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
@@ -193,6 +197,7 @@ from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movshare import MovShareIE
|
||||
from .mtv import (
|
||||
@@ -239,8 +244,10 @@ from .orf import (
|
||||
ORFFM4IE,
|
||||
)
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .playfm import PlayFMIE
|
||||
from .playvid import PlayvidIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pornhd import PornHdIE
|
||||
@@ -261,7 +268,7 @@ from .rtbf import RTBFIE
|
||||
from .rtlnl import RtlXlIE
|
||||
from .rtlnow import RTLnowIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@@ -272,6 +279,7 @@ from .rutube import (
|
||||
from .rutv import RUTVIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .servingsys import ServingSysIE
|
||||
@@ -384,6 +392,7 @@ from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wayofthemaster import WayOfTheMasterIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -13,13 +15,14 @@ class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.aparat.com/v/wP8On',
|
||||
u'file': u'wP8On.mp4',
|
||||
u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
|
||||
u'info_dict': {
|
||||
u"title": u"تیم گلکسی 11 - زومیت",
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
'title': 'تیم گلکسی 11 - زومیت',
|
||||
},
|
||||
#u'skip': u'Extremely unreliable',
|
||||
# 'skip': 'Extremely unreliable',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -29,8 +32,8 @@ class AparatIE(InfoExtractor):
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
|
||||
video_id + u'/vt/frame')
|
||||
embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
|
||||
video_id + '/vt/frame')
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
||||
|
@@ -177,16 +177,26 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/magazine?/(?P<id>.+)'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de)/(?:magazine?/)?(?P<id>[^?#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||
'info_dict': {
|
||||
'id': '050489-002',
|
||||
'id': '72176',
|
||||
'ext': 'mp4',
|
||||
'title': 'Agentur Amateur / Agence Amateur #2 : Corporate Design',
|
||||
'title': 'Folge 2 - Corporate Design',
|
||||
'upload_date': '20131004',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
|
||||
'info_dict': {
|
||||
'id': '160676',
|
||||
'ext': 'mp4',
|
||||
'title': 'Monty Python live (mostly)',
|
||||
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
|
||||
'upload_date': '20140805',
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BlipTVIE(SubtitlesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_TESTS]+)))'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -49,6 +49,21 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
'uploader_id': '792887',
|
||||
'duration': 279,
|
||||
}
|
||||
},
|
||||
{
|
||||
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
|
||||
'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
|
||||
'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
|
||||
'info_dict': {
|
||||
'id': '6573122',
|
||||
'ext': 'mov',
|
||||
'upload_date': '20130520',
|
||||
'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
|
||||
'title': 'Red vs. Blue Season 11 Trailer',
|
||||
'timestamp': 1369029609,
|
||||
'uploader': 'redvsblue',
|
||||
'uploader_id': '792887',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -150,7 +165,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
|
||||
|
||||
|
||||
class BlipTVUserIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$'
|
||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
|
||||
_PAGE_SIZE = 12
|
||||
IE_NAME = 'blip.tv:user'
|
||||
|
||||
|
@@ -154,12 +154,14 @@ class BrightcoveIE(InfoExtractor):
|
||||
def _extract_brightcove_urls(cls, webpage):
|
||||
"""Return a list of all Brightcove URLs from the webpage """
|
||||
|
||||
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
|
||||
url_m = re.search(
|
||||
r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"',
|
||||
webpage)
|
||||
if url_m:
|
||||
url = unescapeHTML(url_m.group(1))
|
||||
# Some sites don't add it, we can't download with this url, for example:
|
||||
# http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
|
||||
if 'playerKey' in url:
|
||||
if 'playerKey' in url or 'videoId' in url:
|
||||
return [url]
|
||||
|
||||
matches = re.findall(
|
||||
@@ -188,9 +190,13 @@ class BrightcoveIE(InfoExtractor):
|
||||
referer = smuggled_data.get('Referer', url)
|
||||
return self._get_video_info(
|
||||
videoPlayer[0], query_str, query, referer=referer)
|
||||
else:
|
||||
elif 'playerKey' in query:
|
||||
player_key = query['playerKey']
|
||||
return self._get_playlist_info(player_key[0])
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _get_video_info(self, video_id, query_str, query, referer=None):
|
||||
request_url = self._FEDERATED_URL_TEMPLATE % query_str
|
||||
@@ -202,6 +208,13 @@ class BrightcoveIE(InfoExtractor):
|
||||
req.add_header('Referer', referer)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
raise ExtractorError(
|
||||
'brightcove said: %s' % error_msg, expected=True)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
||||
info = json.loads(info)['data']
|
||||
|
@@ -84,6 +84,12 @@ class InfoExtractor(object):
|
||||
format, irrespective of the file format.
|
||||
-1 for default (order by other properties),
|
||||
-2 or smaller for less than default.
|
||||
* http_referer HTTP Referer header value to set.
|
||||
* http_method HTTP method to use for the download.
|
||||
* http_headers A dictionary of additional HTTP headers
|
||||
to add to the request.
|
||||
* http_post_data Additional data to send with a POST
|
||||
request.
|
||||
url: Final video URL.
|
||||
ext: Video filename extension.
|
||||
format: The video format, defaults to ext (used for --get-format)
|
||||
@@ -463,8 +469,9 @@ class InfoExtractor(object):
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
regexes = self._og_regexes('video')
|
||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
||||
regexes = self._og_regexes('video') + self._og_regexes('video:url')
|
||||
if secure:
|
||||
regexes = self._og_regexes('video:secure_url') + regexes
|
||||
return self._html_search_regex(regexes, html, name, **kargs)
|
||||
|
||||
def _og_search_url(self, html, **kargs):
|
||||
|
39
youtube_dl/extractor/dump.py
Normal file
39
youtube_dl/extractor/dump.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class DumpIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.dump.com/oneus/',
|
||||
'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
|
||||
'info_dict': {
|
||||
'id': 'oneus',
|
||||
'ext': 'flv',
|
||||
'title': "He's one of us.",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
|
||||
|
||||
thumb = self._og_search_thumbnail(webpage)
|
||||
title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumb,
|
||||
}
|
@@ -1,19 +1,21 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
|
||||
|
||||
class EbaumsWorldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
|
||||
u'file': u'83367677.mp4',
|
||||
u'info_dict': {
|
||||
u'title': u'A Giant Python Opens The Door',
|
||||
u'description': u'This is how nightmares start...',
|
||||
u'uploader': u'jihadpizza',
|
||||
'url': 'http://www.ebaumsworld.com/video/watch/83367677/',
|
||||
'info_dict': {
|
||||
'id': '83367677',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Giant Python Opens The Door',
|
||||
'description': 'This is how nightmares start...',
|
||||
'uploader': 'jihadpizza',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -28,7 +30,6 @@ class EbaumsWorldIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': config.find('title').text,
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_url),
|
||||
'description': config.find('description').text,
|
||||
'thumbnail': config.find('image').text,
|
||||
'uploader': config.find('username').text,
|
||||
|
@@ -36,7 +36,7 @@ class EscapistIE(InfoExtractor):
|
||||
r'<meta name="description" content="([^"]*)"',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
||||
playerUrl = self._og_search_video_url(webpage, name='player URL')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<meta name="title" content="([^"]*)"',
|
||||
|
73
youtube_dl/extractor/expotv.py
Normal file
73
youtube_dl/extractor/expotv.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class ExpoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P<id>[0-9]+)($|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://www.expotv.com/videos/reviews/1/24/LinneCardscom/17561',
|
||||
'md5': '2985e6d7a392b2f7a05e0ca350fe41d0',
|
||||
'info_dict': {
|
||||
'id': '17561',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20060212',
|
||||
'title': 'My Favorite Online Scrapbook Store',
|
||||
'view_count': int,
|
||||
'description': 'You\'ll find most everything you need at this virtual store front.',
|
||||
'uploader': 'Anna T.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_key = self._search_regex(
|
||||
r'<param name="playerKey" value="([^"]+)"', webpage, 'player key')
|
||||
config_url = 'http://client.expotv.com/video/config/%s/%s' % (
|
||||
video_id, player_key)
|
||||
config = self._download_json(
|
||||
config_url, video_id,
|
||||
note='Downloading video configuration')
|
||||
|
||||
formats = [{
|
||||
'url': fcfg['file'],
|
||||
'height': int_or_none(fcfg.get('height')),
|
||||
'format_note': fcfg.get('label'),
|
||||
'ext': self._search_regex(
|
||||
r'filename=.*\.([a-z0-9_A-Z]+)&', fcfg['file'],
|
||||
'file extension', default=None),
|
||||
} for fcfg in config['sources']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = config.get('image')
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'<h5>Plays: ([0-9]+)</h5>', webpage, 'view counts'))
|
||||
uploader = self._search_regex(
|
||||
r'<div class="reviewer">\s*<img alt="([^"]+)"', webpage, 'uploader',
|
||||
fatal=False)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<h5>Reviewed on ([0-9/.]+)</h5>', webpage, 'upload date',
|
||||
fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
}
|
@@ -15,11 +15,14 @@ from ..utils import (
|
||||
compat_xml_parse_error,
|
||||
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
orderedSet,
|
||||
parse_xml,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
@@ -289,6 +292,58 @@ class GenericIE(InfoExtractor):
|
||||
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
||||
},
|
||||
},
|
||||
# YouTube embed via <data-embed-url="">
|
||||
{
|
||||
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
|
||||
'info_dict': {
|
||||
'id': 'jpSGZsgga_I',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asphalt 8: Airborne - Launch Trailer',
|
||||
'uploader': 'Gameloft',
|
||||
'uploader_id': 'gameloft',
|
||||
'upload_date': '20130821',
|
||||
'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Camtasia studio
|
||||
{
|
||||
'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
|
||||
'playlist': [{
|
||||
'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
|
||||
'info_dict': {
|
||||
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
|
||||
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
|
||||
'ext': 'flv',
|
||||
'duration': 2235.90,
|
||||
}
|
||||
}, {
|
||||
'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
|
||||
'info_dict': {
|
||||
'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
|
||||
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
|
||||
'ext': 'flv',
|
||||
'duration': 2235.93,
|
||||
}
|
||||
}],
|
||||
'info_dict': {
|
||||
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
|
||||
}
|
||||
},
|
||||
# Flowplayer
|
||||
{
|
||||
'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
|
||||
'md5': '9d65602bf31c6e20014319c7d07fba27',
|
||||
'info_dict': {
|
||||
'id': '5123ea6d5e5a7',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
'uploader': 'www.handjobhub.com',
|
||||
'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -372,6 +427,43 @@ class GenericIE(InfoExtractor):
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
def _extract_camtasia(self, url, video_id, webpage):
|
||||
""" Returns None if no camtasia video can be found. """
|
||||
|
||||
camtasia_cfg = self._search_regex(
|
||||
r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
|
||||
webpage, 'camtasia configuration file', default=None)
|
||||
if camtasia_cfg is None:
|
||||
return None
|
||||
|
||||
title = self._html_search_meta('DC.title', webpage, fatal=True)
|
||||
|
||||
camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
|
||||
camtasia_cfg = self._download_xml(
|
||||
camtasia_url, video_id,
|
||||
note='Downloading camtasia configuration',
|
||||
errnote='Failed to download camtasia configuration')
|
||||
fileset_node = camtasia_cfg.find('./playlist/array/fileset')
|
||||
|
||||
entries = []
|
||||
for n in fileset_node.getchildren():
|
||||
url_n = n.find('./uri')
|
||||
if url_n is None:
|
||||
continue
|
||||
|
||||
entries.append({
|
||||
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
|
||||
'title': '%s - %s' % (title, n.tag),
|
||||
'url': compat_urlparse.urljoin(url, url_n.text),
|
||||
'duration': float_or_none(n.find('./duration').text),
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': title,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return {
|
||||
@@ -408,7 +500,14 @@ class GenericIE(InfoExtractor):
|
||||
else:
|
||||
assert ':' in default_search
|
||||
return self.url_result(default_search + url)
|
||||
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
||||
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
force_videoid = None
|
||||
if smuggled_data and 'force_videoid' in smuggled_data:
|
||||
force_videoid = smuggled_data['force_videoid']
|
||||
video_id = force_videoid
|
||||
else:
|
||||
video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0]
|
||||
|
||||
self.to_screen('%s: Requesting header' % video_id)
|
||||
|
||||
@@ -419,6 +518,9 @@ class GenericIE(InfoExtractor):
|
||||
new_url = response.geturl()
|
||||
if url != new_url:
|
||||
self.report_following_redirect(new_url)
|
||||
if force_videoid:
|
||||
new_url = smuggle_url(
|
||||
new_url, {'force_videoid': force_videoid})
|
||||
return self.url_result(new_url)
|
||||
|
||||
# Check for direct link to a video
|
||||
@@ -460,6 +562,11 @@ class GenericIE(InfoExtractor):
|
||||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
# Is it a Camtasia project?
|
||||
camtasia_res = self._extract_camtasia(url, video_id, webpage)
|
||||
if camtasia_res is not None:
|
||||
return camtasia_res
|
||||
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/rg3/youtube-dl/issues/2448)
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
@@ -475,10 +582,26 @@ class GenericIE(InfoExtractor):
|
||||
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
||||
default='video')
|
||||
|
||||
# Try to detect age limit automatically
|
||||
age_limit = self._rta_search(webpage)
|
||||
# And then there are the jokers who advertise that they use RTA,
|
||||
# but actually don't.
|
||||
AGE_LIMIT_MARKERS = [
|
||||
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
|
||||
]
|
||||
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||
age_limit = 18
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(
|
||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||
|
||||
# Helper method
|
||||
def _playlist_from_matches(matches, getter, ie=None):
|
||||
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
|
||||
# Look for BrightCove:
|
||||
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||
if bc_urls:
|
||||
@@ -514,6 +637,7 @@ class GenericIE(InfoExtractor):
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:
|
||||
<iframe[^>]+?src=|
|
||||
data-video-url=|
|
||||
<embed[^>]+?src=|
|
||||
embedSWF\(?:\s*
|
||||
)
|
||||
@@ -522,19 +646,15 @@ class GenericIE(InfoExtractor):
|
||||
(?:embed|v)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
|
||||
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]))
|
||||
for tuppl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
return _playlist_from_matches(
|
||||
matches, lambda m: unescapeHTML(m[1]))
|
||||
|
||||
# Look for embedded Wistia player
|
||||
match = re.search(
|
||||
@@ -553,7 +673,7 @@ class GenericIE(InfoExtractor):
|
||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||
if mobj:
|
||||
return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV')
|
||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage)
|
||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1), 'BlipTV')
|
||||
|
||||
@@ -648,10 +768,8 @@ class GenericIE(InfoExtractor):
|
||||
# Look for funnyordie embed
|
||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
|
||||
for eurl in matches]
|
||||
return self.playlist_result(
|
||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||
return _playlist_from_matches(
|
||||
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
||||
|
||||
# Look for embedded RUTV player
|
||||
rutv_url = RUTVIE._extract_url(webpage)
|
||||
@@ -713,6 +831,13 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Yahoo')
|
||||
|
||||
# Look for embedded sbs.com.au player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'SBS')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if not found:
|
||||
@@ -730,6 +855,15 @@ class GenericIE(InfoExtractor):
|
||||
if not found:
|
||||
# Broaden the findall a little bit: JWPlayer JS loader
|
||||
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||
if not found:
|
||||
# Flow player
|
||||
found = re.findall(r'''(?xs)
|
||||
flowplayer\("[^"]+",\s*
|
||||
\{[^}]+?\}\s*,
|
||||
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||
''', webpage)
|
||||
assert found
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
@@ -739,7 +873,12 @@ class GenericIE(InfoExtractor):
|
||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
found = re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
|
||||
def check_video(vurl):
|
||||
vpath = compat_urlparse.urlparse(vurl).path
|
||||
return '.' in vpath and not vpath.endswith('.swf')
|
||||
found = list(filter(
|
||||
check_video,
|
||||
re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
|
||||
if not found:
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
|
||||
@@ -776,6 +915,7 @@ class GenericIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
|
190
youtube_dl/extractor/grooveshark.py
Normal file
190
youtube_dl/extractor/grooveshark.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import math
|
||||
import os.path
|
||||
import re
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, compat_urllib_request, compat_html_parser
|
||||
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class GroovesharkHtmlParser(compat_html_parser.HTMLParser):
|
||||
def __init__(self):
|
||||
self._current_object = None
|
||||
self.objects = []
|
||||
compat_html_parser.HTMLParser.__init__(self)
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrs = dict((k, v) for k, v in attrs)
|
||||
if tag == 'object':
|
||||
self._current_object = {'attrs': attrs, 'params': []}
|
||||
elif tag == 'param':
|
||||
self._current_object['params'].append(attrs)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag == 'object':
|
||||
self.objects.append(self._current_object)
|
||||
self._current_object = None
|
||||
|
||||
@classmethod
|
||||
def extract_object_tags(cls, html):
|
||||
p = cls()
|
||||
p.feed(html)
|
||||
p.close()
|
||||
return p.objects
|
||||
|
||||
|
||||
class GroovesharkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5',
|
||||
'md5': '7ecf8aefa59d6b2098517e1baa530023',
|
||||
'info_dict': {
|
||||
'id': '6SS1DW',
|
||||
'title': 'Jolene (Tenth Key Remix ft. Will Sessions)',
|
||||
'ext': 'mp3',
|
||||
'duration': 227,
|
||||
}
|
||||
}
|
||||
|
||||
do_playerpage_request = True
|
||||
do_bootstrap_request = True
|
||||
|
||||
def _parse_target(self, target):
|
||||
uri = compat_urlparse.urlparse(target)
|
||||
hash = uri.fragment[1:].split('?')[0]
|
||||
token = os.path.basename(hash.rstrip('/'))
|
||||
return (uri, hash, token)
|
||||
|
||||
def _build_bootstrap_url(self, target):
|
||||
(uri, hash, token) = self._parse_target(target)
|
||||
query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
|
||||
return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
|
||||
|
||||
def _build_meta_url(self, target):
|
||||
(uri, hash, token) = self._parse_target(target)
|
||||
query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts)
|
||||
return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token)
|
||||
|
||||
def _build_stream_url(self, meta):
|
||||
return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None))
|
||||
|
||||
def _build_swf_referer(self, target, obj):
|
||||
(uri, _, _) = self._parse_target(target)
|
||||
return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None))
|
||||
|
||||
def _transform_bootstrap(self, js):
|
||||
return re.split('(?m)^\s*try\s*{', js)[0] \
|
||||
.split(' = ', 1)[1].strip().rstrip(';')
|
||||
|
||||
def _transform_meta(self, js):
|
||||
return js.split('\n')[0].split('=')[1].rstrip(';')
|
||||
|
||||
def _get_meta(self, target):
|
||||
(meta_url, token) = self._build_meta_url(target)
|
||||
self.to_screen('Metadata URL: %s' % meta_url)
|
||||
|
||||
headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
|
||||
req = compat_urllib_request.Request(meta_url, headers=headers)
|
||||
res = self._download_json(req, token,
|
||||
transform_source=self._transform_meta)
|
||||
|
||||
if 'getStreamKeyWithSong' not in res:
|
||||
raise ExtractorError(
|
||||
'Metadata not found. URL may be malformed, or Grooveshark API may have changed.')
|
||||
|
||||
if res['getStreamKeyWithSong'] is None:
|
||||
raise ExtractorError(
|
||||
'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.',
|
||||
expected=True)
|
||||
|
||||
return res['getStreamKeyWithSong']
|
||||
|
||||
def _get_bootstrap(self, target):
|
||||
(bootstrap_url, token) = self._build_bootstrap_url(target)
|
||||
|
||||
headers = {'Referer': compat_urlparse.urldefrag(target)[0]}
|
||||
req = compat_urllib_request.Request(bootstrap_url, headers=headers)
|
||||
res = self._download_json(req, token, fatal=False,
|
||||
note='Downloading player bootstrap data',
|
||||
errnote='Unable to download player bootstrap data',
|
||||
transform_source=self._transform_bootstrap)
|
||||
return res
|
||||
|
||||
def _get_playerpage(self, target):
|
||||
(_, _, token) = self._parse_target(target)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
target, token,
|
||||
note='Downloading player page',
|
||||
errnote='Unable to download player page',
|
||||
fatal=False)
|
||||
|
||||
if webpage is not None:
|
||||
# Search (for example German) error message
|
||||
error_msg = self._html_search_regex(
|
||||
r'<div id="content">\s*<h2>(.*?)</h2>', webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
error_msg = error_msg.replace('\n', ' ')
|
||||
raise ExtractorError('Grooveshark said: %s' % error_msg)
|
||||
|
||||
if webpage is not None:
|
||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
||||
|
||||
return (webpage, None)
|
||||
|
||||
def _real_initialize(self):
|
||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||
|
||||
def _real_extract(self, url):
|
||||
(target_uri, _, token) = self._parse_target(url)
|
||||
|
||||
# 1. Fill cookiejar by making a request to the player page
|
||||
swf_referer = None
|
||||
if self.do_playerpage_request:
|
||||
(_, player_objs) = self._get_playerpage(url)
|
||||
if player_objs is not None:
|
||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||
|
||||
# 2. Ask preload.php for swf bootstrap data to better mimic webapp
|
||||
if self.do_bootstrap_request:
|
||||
bootstrap = self._get_bootstrap(url)
|
||||
self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken'])
|
||||
|
||||
# 3. Ask preload.php for track metadata.
|
||||
meta = self._get_meta(url)
|
||||
|
||||
# 4. Construct stream request for track.
|
||||
stream_url = self._build_stream_url(meta)
|
||||
duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000))
|
||||
post_dict = {'streamKey': meta['streamKey']['streamKey']}
|
||||
post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8')
|
||||
headers = {
|
||||
'Content-Length': len(post_data),
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}
|
||||
if swf_referer is not None:
|
||||
headers['Referer'] = swf_referer
|
||||
|
||||
return {
|
||||
'id': token,
|
||||
'title': meta['song']['Name'],
|
||||
'http_method': 'POST',
|
||||
'url': stream_url,
|
||||
'ext': 'mp3',
|
||||
'format': 'mp3 audio',
|
||||
'duration': duration,
|
||||
'http_post_data': post_data,
|
||||
'http_headers': headers,
|
||||
}
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
compat_urllib_request,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -83,6 +84,21 @@ class MetacafeIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Movieclips.com video
|
||||
{
|
||||
'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/',
|
||||
'info_dict': {
|
||||
'id': 'mv-Wy7ZU',
|
||||
'ext': 'mp4',
|
||||
'title': 'My Week with Marilyn - Do You Love Me?',
|
||||
'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.',
|
||||
'uploader': 'movie_trailers',
|
||||
'duration': 176,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires rtmpdump',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def report_disclaimer(self):
|
||||
@@ -134,6 +150,7 @@ class MetacafeIE(InfoExtractor):
|
||||
|
||||
# Extract URL, uploader and title from webpage
|
||||
self.report_extraction(video_id)
|
||||
video_url = None
|
||||
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
||||
if mobj is not None:
|
||||
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
||||
@@ -146,16 +163,17 @@ class MetacafeIE(InfoExtractor):
|
||||
else:
|
||||
gdaKey = mobj.group(1)
|
||||
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
|
||||
else:
|
||||
if video_url is None:
|
||||
mobj = re.search(r'<video src="([^"]+)"', webpage)
|
||||
if mobj:
|
||||
video_url = mobj.group(1)
|
||||
video_ext = 'mp4'
|
||||
else:
|
||||
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
||||
if mobj is None:
|
||||
raise ExtractorError('Unable to extract media URL')
|
||||
vardict = compat_parse_qs(mobj.group(1))
|
||||
if video_url is None:
|
||||
flashvars = self._search_regex(
|
||||
r' name="flashvars" value="(.*?)"', webpage, 'flashvars',
|
||||
default=None)
|
||||
if flashvars:
|
||||
vardict = compat_parse_qs(flashvars)
|
||||
if 'mediaData' not in vardict:
|
||||
raise ExtractorError('Unable to extract media URL')
|
||||
mobj = re.search(
|
||||
@@ -165,26 +183,68 @@ class MetacafeIE(InfoExtractor):
|
||||
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
||||
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
||||
video_ext = determine_ext(video_url)
|
||||
if video_url is None:
|
||||
player_url = self._search_regex(
|
||||
r"swfobject\.embedSWF\('([^']+)'",
|
||||
webpage, 'config URL', default=None)
|
||||
if player_url:
|
||||
config_url = self._search_regex(
|
||||
r'config=(.+)$', player_url, 'config URL')
|
||||
config_doc = self._download_xml(
|
||||
config_url, video_id,
|
||||
note='Downloading video config')
|
||||
smil_url = config_doc.find('.//properties').attrib['smil_file']
|
||||
smil_doc = self._download_xml(
|
||||
smil_url, video_id,
|
||||
note='Downloading SMIL document')
|
||||
base_url = smil_doc.find('./head/meta').attrib['base']
|
||||
video_url = []
|
||||
for vn in smil_doc.findall('.//video'):
|
||||
br = int(vn.attrib['system-bitrate'])
|
||||
play_path = vn.attrib['src']
|
||||
video_url.append({
|
||||
'format_id': 'smil-%d' % br,
|
||||
'url': base_url,
|
||||
'play_path': play_path,
|
||||
'page_url': url,
|
||||
'player_url': player_url,
|
||||
'ext': play_path.partition(':')[0],
|
||||
})
|
||||
|
||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
||||
if video_url is None:
|
||||
raise ExtractorError('Unsupported video type')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||
webpage, 'uploader nickname', fatal=False)
|
||||
duration = int_or_none(
|
||||
self._html_search_meta('video:duration', webpage))
|
||||
|
||||
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
||||
age_limit = 18
|
||||
age_limit = (
|
||||
18
|
||||
if re.search(r'"contentRating":"restricted"', webpage)
|
||||
else 0)
|
||||
|
||||
if isinstance(video_url, list):
|
||||
formats = video_url
|
||||
else:
|
||||
age_limit = 0
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'ext': video_ext,
|
||||
}]
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'description': description,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'thumbnail':thumbnail,
|
||||
'ext': video_ext,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
}
|
||||
|
57
youtube_dl/extractor/ministrygrid.py
Normal file
57
youtube_dl/extractor/ministrygrid.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class MinistryGridIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
|
||||
'md5': '844be0d2a1340422759c2a9101bab017',
|
||||
'info_dict': {
|
||||
'id': '3453494717001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Gospel by Numbers',
|
||||
'description': 'Coming soon from T4G 2014!',
|
||||
'uploader': 'LifeWay Christian Resources (MG)',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
portlets_json = self._search_regex(
|
||||
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
|
||||
portlets = json.loads(portlets_json)
|
||||
pl_id = self._search_regex(
|
||||
r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
|
||||
|
||||
for i, portlet in enumerate(portlets):
|
||||
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
|
||||
portlet_code = self._download_webpage(
|
||||
portlet_url, video_id,
|
||||
note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
|
||||
fatal=False)
|
||||
video_iframe_url = self._search_regex(
|
||||
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
|
||||
default=None)
|
||||
if video_iframe_url:
|
||||
surl = smuggle_url(
|
||||
video_iframe_url, {'force_videoid': video_id})
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'url': surl,
|
||||
}
|
||||
|
||||
raise ExtractorError('Could not find video iframe in any portlets')
|
78
youtube_dl/extractor/movieclips.py
Normal file
78
youtube_dl/extractor/movieclips.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_str,
|
||||
clean_html,
|
||||
)
|
||||
|
||||
|
||||
class MovieClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
|
||||
_TEST = {
|
||||
'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
|
||||
'info_dict': {
|
||||
'id': 'Wy7ZU',
|
||||
'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
|
||||
'ext': 'mp4',
|
||||
'title': 'My Week with Marilyn - Do You Love Me?',
|
||||
'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
show_id = display_id or video_id
|
||||
|
||||
config = self._download_xml(
|
||||
'http://config.movieclips.com/player/config/%s' % video_id,
|
||||
show_id, 'Downloading player config')
|
||||
|
||||
if config.find('./country-region').text == 'false':
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
|
||||
|
||||
properties = config.find('./video/properties')
|
||||
smil_file = properties.attrib['smil_file']
|
||||
|
||||
smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
|
||||
base_url = smil.find('./head/meta').attrib['base']
|
||||
|
||||
formats = []
|
||||
for video in smil.findall('./body/switch/video'):
|
||||
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||
src = video.attrib['src']
|
||||
formats.append({
|
||||
'url': base_url,
|
||||
'play_path': src,
|
||||
'ext': src.split(':')[0],
|
||||
'vbr': vbr,
|
||||
'format_id': '%dk' % vbr,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
|
||||
description = clean_html(compat_str(properties.attrib['clip_description']))
|
||||
thumbnail = properties.attrib['image']
|
||||
categories = properties.attrib['clip_categories'].split(',')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
@@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
request, video_id, 'Downloading %s page' % format_id)
|
||||
video_url = self._html_search_regex(
|
||||
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
|
||||
r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
@@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||
title = self._html_search_regex(
|
||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
|
||||
thumbnail = self._html_search_regex(
|
||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
||||
webpage, 'thumbnail URL', fatal=False)
|
||||
[r'<span title="([^"]+)">',
|
||||
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
|
||||
thumbnails = [
|
||||
{
|
||||
'url': thumb_url,
|
||||
} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
|
||||
]
|
||||
thumbnail = thumbnails[0]['url'] if thumbnails else None
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
|
||||
r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
|
||||
r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'upload_date': upload_date,
|
||||
'age_limit': 18,
|
||||
|
100
youtube_dl/extractor/patreon.py
Normal file
100
youtube_dl/extractor/patreon.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class PatreonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||
'md5': 'e25505eec1053a6e6813b8ed369875cc',
|
||||
'info_dict': {
|
||||
'id': '743933',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 166: David Smalley of Dogma Debate',
|
||||
'uploader': 'Cognitive Dissonance Podcast',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.patreon.com/creation?hid=754133',
|
||||
'md5': '3eb09345bf44bf60451b8b0b81759d0a',
|
||||
'info_dict': {
|
||||
'id': '754133',
|
||||
'ext': 'mp3',
|
||||
'title': 'CD 167 Extra',
|
||||
'uploader': 'Cognitive Dissonance Podcast',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||
# needed. Keeping this commented for when this inevitably changes.
|
||||
'''
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'redirectUrl': 'http://www.patreon.com/',
|
||||
'email': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
'https://www.patreon.com/processLogin',
|
||||
compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||
)
|
||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||
|
||||
if re.search(r'onLoginFailed', login_page):
|
||||
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group(1)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
|
||||
attach_fn = self._html_search_regex(
|
||||
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||
webpage, 'attachment URL', default=None)
|
||||
if attach_fn is not None:
|
||||
video_url = 'http://www.patreon.com' + attach_fn
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||
else:
|
||||
playlist_js = self._search_regex(
|
||||
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||
webpage, 'playlist JSON')
|
||||
playlist_json = js_to_json(playlist_js)
|
||||
playlist = json.loads(playlist_json)
|
||||
data = playlist[0]
|
||||
video_url = self._proto_relative_url(data['mp3'])
|
||||
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||
uploader = data.get('artist')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp3',
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -54,6 +54,18 @@ class PBSIE(InfoExtractor):
|
||||
'duration': 801,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
|
||||
'md5': 'c62859342be2a0358d6c9eb306595978',
|
||||
'info_dict': {
|
||||
'id': '2365297708',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||
'duration': 6559,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_ids(self, url):
|
||||
@@ -75,7 +87,7 @@ class PBSIE(InfoExtractor):
|
||||
return media_id, presumptive_id
|
||||
|
||||
url = self._search_regex(
|
||||
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||
webpage, 'player URL')
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
|
82
youtube_dl/extractor/playfm.py
Normal file
82
youtube_dl/extractor/playfm.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PlayFMIE(InfoExtractor):
|
||||
IE_NAME = 'play.fm'
|
||||
_VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
|
||||
'md5': 'c505f8307825a245d0c7ad1850001f22',
|
||||
'info_dict': {
|
||||
'id': '137220',
|
||||
'ext': 'mp3',
|
||||
'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
|
||||
'uploader': 'Sven Tasnadi',
|
||||
'uploader_id': 'sventasnadi',
|
||||
'duration': 5627.428,
|
||||
'upload_date': '20140712',
|
||||
'view_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
upload_date = mobj.group('upload_date')
|
||||
|
||||
rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
|
||||
req = compat_urllib_request.Request(
|
||||
'http://www.play.fm/flexRead/recording', data=rec_data)
|
||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
rec_doc = self._download_xml(req, video_id)
|
||||
|
||||
error_node = rec_doc.find('./error')
|
||||
if error_node is not None:
|
||||
raise ExtractorError('An error occured: %s (code %s)' % (
|
||||
error_node.text, rec_doc.find('./status').text))
|
||||
|
||||
recording = rec_doc.find('./recording')
|
||||
title = recording.find('./title').text
|
||||
view_count = int_or_none(recording.find('./stats/playcount').text)
|
||||
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
||||
thumbnail = recording.find('./image').text
|
||||
|
||||
artist = recording.find('./artists/artist')
|
||||
uploader = artist.find('./name').text
|
||||
uploader_id = artist.find('./slug').text
|
||||
|
||||
video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
|
||||
'http:', recording.find('./url').text,
|
||||
recording.find('./_class').text, recording.find('./file_id').text,
|
||||
rec_doc.find('./uuid').text, video_id,
|
||||
rec_doc.find('./jingle/file_id').text,
|
||||
'http%3A%2F%2Fwww.play.fm%2Fplayer',
|
||||
)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp3',
|
||||
'filesize': int_or_none(recording.find('./size').text),
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
}
|
@@ -1,3 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -9,15 +11,16 @@ from ..utils import (
|
||||
|
||||
|
||||
class PornotubeIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
|
||||
_TEST = {
|
||||
u'url': u'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
||||
u'file': u'1689755.flv',
|
||||
u'md5': u'374dd6dcedd24234453b295209aa69b6',
|
||||
u'info_dict': {
|
||||
u"upload_date": u"20090708",
|
||||
u"title": u"Marilyn-Monroe-Bathing",
|
||||
u"age_limit": 18
|
||||
'url': 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
|
||||
'md5': '374dd6dcedd24234453b295209aa69b6',
|
||||
'info_dict': {
|
||||
'id': '1689755',
|
||||
'ext': 'flv',
|
||||
'upload_date': '20090708',
|
||||
'title': 'Marilyn-Monroe-Bathing',
|
||||
'age_limit': 18
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,22 +35,22 @@ class PornotubeIE(InfoExtractor):
|
||||
|
||||
# Get the video URL
|
||||
VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
|
||||
video_url = self._search_regex(VIDEO_URL_RE, webpage, u'video url')
|
||||
video_url = self._search_regex(VIDEO_URL_RE, webpage, 'video url')
|
||||
video_url = compat_urllib_parse.unquote(video_url)
|
||||
|
||||
#Get the uploaded date
|
||||
VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
|
||||
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, u'upload date', fatal=False)
|
||||
if upload_date: upload_date = unified_strdate(upload_date)
|
||||
upload_date = self._html_search_regex(VIDEO_UPLOADED_RE, webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
info = {'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': None,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'format': 'flv',
|
||||
'age_limit': age_limit}
|
||||
|
||||
return [info]
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': 'flv',
|
||||
'format': 'flv',
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
@@ -35,7 +35,6 @@ class RtlXlIE(InfoExtractor):
|
||||
info = self._download_json(
|
||||
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||
uuid)
|
||||
meta = info['meta']
|
||||
material = info['material'][0]
|
||||
episode_info = info['episodes'][0]
|
||||
|
||||
|
@@ -1,21 +1,66 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
struct_unpack,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = base64.b64decode(png)
|
||||
text_index = encrypted_data.find(b'tEXt')
|
||||
text_chunk = encrypted_data[text_index - 4:]
|
||||
length = struct_unpack('!I', text_chunk[:4])[0]
|
||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
||||
data = bytearray(text_chunk[8:8 + length])
|
||||
data = [chr(b) for b in data if b != 0]
|
||||
hash_index = data.index('#')
|
||||
alphabet_data = data[:hash_index]
|
||||
url_data = data[hash_index + 1:]
|
||||
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in alphabet_data:
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in url_data:
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
return url
|
||||
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
||||
'info_dict': {
|
||||
@@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||
},
|
||||
}
|
||||
|
||||
def _decrypt_url(self, png):
|
||||
encrypted_data = base64.b64decode(png)
|
||||
text_index = encrypted_data.find(b'tEXt')
|
||||
text_chunk = encrypted_data[text_index-4:]
|
||||
length = struct_unpack('!I', text_chunk[:4])[0]
|
||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
||||
data = bytearray(text_chunk[8:8+length])
|
||||
data = [chr(b) for b in data if b != 0]
|
||||
hash_index = data.index('#')
|
||||
alphabet_data = data[:hash_index]
|
||||
url_data = data[hash_index+1:]
|
||||
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in alphabet_data:
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in url_data:
|
||||
if f == 0:
|
||||
l = int(letter)*10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
return url
|
||||
}, {
|
||||
'note': 'Live stream',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||
'info_dict': {
|
||||
'id': '1694255',
|
||||
'ext': 'flv',
|
||||
'title': 'TODO',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -74,11 +86,57 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
video_id)['page']['items'][0]
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = self._decrypt_url(png)
|
||||
video_url = _decrypt_url(png)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'thumbnail': info['image'],
|
||||
'thumbnail': info.get('image'),
|
||||
'page_url': url,
|
||||
}
|
||||
|
||||
|
||||
class RTVELiveIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/noticias/directo-la-1/',
|
||||
'info_dict': {
|
||||
'id': 'directo-la-1',
|
||||
'ext': 'flv',
|
||||
'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
start_time = time.gmtime()
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_url = self._search_regex(
|
||||
r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo')
|
||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'app': 'rtve-live-live?ovpfv=2.1.2',
|
||||
'player_url': player_url,
|
||||
'rtmp_live': True,
|
||||
}
|
||||
|
56
youtube_dl/extractor/sbs.py
Normal file
56
youtube_dl/extractor/sbs.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||
'info_dict': {
|
||||
'id': '320403011771',
|
||||
'ext': 'flv',
|
||||
'title': 'Dingo Conservation',
|
||||
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
'add_ies': ['generic'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
release_urls_json = js_to_json(self._search_regex(
|
||||
r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
|
||||
webpage, ''))
|
||||
release_urls = json.loads(release_urls_json)
|
||||
theplatform_url = (
|
||||
release_urls.get('progressive') or release_urls.get('standard'))
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' (The Feed)')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': theplatform_url,
|
||||
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -44,7 +44,7 @@ class VodlockerIE(InfoExtractor):
|
||||
req, video_id, 'Downloading video page')
|
||||
|
||||
title = self._search_regex(
|
||||
r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
|
||||
r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
|
||||
url = self._search_regex(
|
||||
|
@@ -2,27 +2,30 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.wat\.tv/.*-(?P<shortID>.*?)_.*?\.html'
|
||||
_VALID_URL = r'http://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TEST = {
|
||||
'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html',
|
||||
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
|
||||
'md5': 'ce70e9223945ed26a8056d413ca55dc9',
|
||||
'info_dict': {
|
||||
'id': '10631273',
|
||||
'id': '11713067',
|
||||
'display_id': 'soupe-figues-l-orange-aux-epices',
|
||||
'ext': 'mp4',
|
||||
'title': 'World War Z - Philadelphia VOST',
|
||||
'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr',
|
||||
},
|
||||
'params': {
|
||||
# Sometimes wat serves the whole file with the --test option
|
||||
'skip_download': True,
|
||||
'title': 'Soupe de figues à l\'orange et aux épices',
|
||||
'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
|
||||
'upload_date': '20140819',
|
||||
'duration': 120,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -36,13 +39,20 @@ class WatIE(InfoExtractor):
|
||||
def real_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
short_id = mobj.group('shortID')
|
||||
webpage = self._download_webpage(url, short_id)
|
||||
short_id = mobj.group('short_id')
|
||||
display_id = mobj.group('display_id')
|
||||
webpage = self._download_webpage(url, display_id or short_id)
|
||||
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
|
||||
|
||||
video_info = self.download_video_info(real_id)
|
||||
|
||||
if video_info.get('geolock'):
|
||||
raise ExtractorError('This content is not available in your area', expected=True)
|
||||
|
||||
chapters = video_info['chapters']
|
||||
first_chapter = chapters[0]
|
||||
files = video_info['files']
|
||||
first_file = files[0]
|
||||
|
||||
if real_id_for_chapter(first_chapter) != real_id:
|
||||
self.to_screen('Multipart video detected')
|
||||
@@ -61,12 +71,45 @@ class WatIE(InfoExtractor):
|
||||
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the short id for getting the video url
|
||||
|
||||
formats = [{
|
||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||
'format_id': 'Mobile',
|
||||
}]
|
||||
|
||||
fmts = [('SD', 'web')]
|
||||
if first_file.get('hasHD'):
|
||||
fmts.append(('HD', 'webhd'))
|
||||
|
||||
def compute_token(param):
|
||||
timestamp = '%08x' % int(time.time())
|
||||
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
|
||||
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
|
||||
|
||||
for fmt in fmts:
|
||||
webid = '/%s/%s' % (fmt[1], real_id)
|
||||
video_url = self._download_webpage(
|
||||
'http://www.wat.tv/get%s?token=%s&getURL=1' % (webid, compute_token(webid)),
|
||||
real_id,
|
||||
'Downloding %s video URL' % fmt[0],
|
||||
'Failed to download %s video URL' % fmt[0],
|
||||
False)
|
||||
if not video_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': fmt[0],
|
||||
})
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||
'display_id': display_id,
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
'description': first_chapter['description'],
|
||||
'view_count': video_info['views'],
|
||||
'upload_date': upload_date,
|
||||
'duration': first_file['duration'],
|
||||
'formats': formats,
|
||||
}
|
||||
|
52
youtube_dl/extractor/wayofthemaster.py
Normal file
52
youtube_dl/extractor/wayofthemaster.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class WayOfTheMasterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.wayofthemaster.com/hbks.shtml',
|
||||
'md5': '5316b57487ada8480606a93cb3d18d24',
|
||||
'info_dict': {
|
||||
'id': 'hbks',
|
||||
'ext': 'mp4',
|
||||
'title': 'Intelligent Design vs. Evolution',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
|
||||
webpage, 'title', default=None)
|
||||
if title is None:
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'page title')
|
||||
|
||||
url_base = self._search_regex(
|
||||
r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
|
||||
webpage, 'URL base')
|
||||
formats = [{
|
||||
'format_id': 'low',
|
||||
'quality': 1,
|
||||
'url': url_base + '_low.mp4',
|
||||
}, {
|
||||
'format_id': 'high',
|
||||
'quality': 2,
|
||||
'url': url_base + '_high.mp4',
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
"""Information Extractor for xHamster"""
|
||||
_VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
|
@@ -225,7 +225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
|
||||
# Dash webm audio
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
|
||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||
|
||||
# RTMP (unnamed)
|
||||
@@ -508,6 +508,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
sub_lang_list = {}
|
||||
for l in lang_list:
|
||||
lang = l[1]
|
||||
if lang in sub_lang_list:
|
||||
continue
|
||||
params = compat_urllib_parse.urlencode({
|
||||
'lang': lang,
|
||||
'v': video_id,
|
||||
|
@@ -24,6 +24,7 @@ import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
@@ -228,18 +229,42 @@ else:
|
||||
assert type(s) == type(u'')
|
||||
print(s)
|
||||
|
||||
# In Python 2.x, json.dump expects a bytestream.
|
||||
# In Python 3.x, it writes to a character stream
|
||||
if sys.version_info < (3,0):
|
||||
def write_json_file(obj, fn):
|
||||
with open(fn, 'wb') as f:
|
||||
json.dump(obj, f)
|
||||
else:
|
||||
def write_json_file(obj, fn):
|
||||
with open(fn, 'w', encoding='utf-8') as f:
|
||||
json.dump(obj, f)
|
||||
|
||||
if sys.version_info >= (2,7):
|
||||
def write_json_file(obj, fn):
|
||||
""" Encode obj as JSON and write it to fn, atomically """
|
||||
|
||||
args = {
|
||||
'suffix': '.tmp',
|
||||
'prefix': os.path.basename(fn) + '.',
|
||||
'dir': os.path.dirname(fn),
|
||||
'delete': False,
|
||||
}
|
||||
|
||||
# In Python 2.x, json.dump expects a bytestream.
|
||||
# In Python 3.x, it writes to a character stream
|
||||
if sys.version_info < (3, 0):
|
||||
args['mode'] = 'wb'
|
||||
else:
|
||||
args.update({
|
||||
'mode': 'w',
|
||||
'encoding': 'utf-8',
|
||||
})
|
||||
|
||||
tf = tempfile.NamedTemporaryFile(**args)
|
||||
|
||||
try:
|
||||
with tf:
|
||||
json.dump(obj, tf)
|
||||
os.rename(tf.name, fn)
|
||||
except:
|
||||
try:
|
||||
os.remove(tf.name)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
|
||||
if sys.version_info >= (2, 7):
|
||||
def find_xpath_attr(node, xpath, key, val):
|
||||
""" Find the xpath xpath[@key=val] """
|
||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
||||
@@ -830,6 +855,7 @@ def unified_strdate(date_str):
|
||||
'%Y/%m/%d',
|
||||
'%d.%m.%Y',
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
'%d.%m.%Y %H:%M',
|
||||
@@ -1260,6 +1286,12 @@ def remove_start(s, start):
|
||||
return s
|
||||
|
||||
|
||||
def remove_end(s, end):
|
||||
if s.endswith(end):
|
||||
return s[:-len(end)]
|
||||
return s
|
||||
|
||||
|
||||
def url_basename(url):
|
||||
path = compat_urlparse.urlparse(url).path
|
||||
return path.strip(u'/').split(u'/')[-1]
|
||||
@@ -1449,6 +1481,34 @@ def strip_jsonp(code):
|
||||
return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
|
||||
|
||||
|
||||
def js_to_json(code):
|
||||
def fix_kv(m):
|
||||
key = m.group(2)
|
||||
if key.startswith("'"):
|
||||
assert key.endswith("'")
|
||||
assert '"' not in key
|
||||
key = '"%s"' % key[1:-1]
|
||||
elif not key.startswith('"'):
|
||||
key = '"%s"' % key
|
||||
|
||||
value = m.group(4)
|
||||
if value.startswith("'"):
|
||||
assert value.endswith("'")
|
||||
assert '"' not in value
|
||||
value = '"%s"' % value[1:-1]
|
||||
|
||||
return m.group(1) + key + m.group(3) + value
|
||||
|
||||
res = re.sub(r'''(?x)
|
||||
([{,]\s*)
|
||||
("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
|
||||
(:\s*)
|
||||
([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{)
|
||||
''', fix_kv, code)
|
||||
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||
return res
|
||||
|
||||
|
||||
def qualities(quality_ids):
|
||||
""" Get a numeric quality value out of a list of possible values """
|
||||
def q(qid):
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2014.08.21.1'
|
||||
__version__ = '2014.08.24.4'
|
||||
|
Reference in New Issue
Block a user