Compare commits
123 Commits
2014.08.02
...
2014.08.23
Author | SHA1 | Date | |
---|---|---|---|
49f3c16543 | |||
2ef6fcb5d8 | |||
38fc045253 | |||
af1fd929c6 | |||
b7b04c9234 | |||
bc0bb6fd30 | |||
430826c9d4 | |||
68909f0c4e | |||
9d048a17d8 | |||
492641d10a | |||
2b9faf5542 | |||
ed2d6a1960 | |||
be843678b1 | |||
c71dfccc98 | |||
1a9ccac7c1 | |||
e330d59abb | |||
394df6d7d0 | |||
218f754940 | |||
a053c3493a | |||
50b294aab8 | |||
756b046f3e | |||
388ac0b18a | |||
ad06434bd3 | |||
bd9820c937 | |||
deda8ac376 | |||
e05f693942 | |||
b27295d2ab | |||
ace52c5713 | |||
e62e150f64 | |||
c44c0a775d | |||
5fcf2dbed0 | |||
91dff03217 | |||
a200f4cee2 | |||
ea6e8d5454 | |||
83d35817f5 | |||
76beff70a8 | |||
61882bf7c6 | |||
cab317a680 | |||
73159f99cc | |||
c15235cd07 | |||
12c3ec3382 | |||
55db73efdf | |||
af40ac054a | |||
a36819731b | |||
181c8655c7 | |||
3b95347bb6 | |||
3b88ee9a7d | |||
55c49908d2 | |||
db9b0b67b7 | |||
35f76e0061 | |||
3f338cd6de | |||
1d01f26ab1 | |||
266c71f971 | |||
e8ee972c6e | |||
f83dda12ad | |||
696d49815e | |||
fe556f1b0c | |||
d5638d974f | |||
938dd254e5 | |||
6493f5d704 | |||
cd6b48365e | |||
4d9bd478f9 | |||
c1d293cfa6 | |||
49807b4ac6 | |||
c990bb3633 | |||
af8322d2f9 | |||
df866e7f2a | |||
664718ff63 | |||
3258263371 | |||
3cfafc4a9b | |||
6f600ff5d6 | |||
90e075da3a | |||
9572013de9 | |||
3a5beb0ca1 | |||
a6da7b6b96 | |||
173a7026d5 | |||
40a90862f4 | |||
511c4325dc | |||
85a699246a | |||
4dc5286e13 | |||
c767dc74b8 | |||
56ca04f662 | |||
eb3680123a | |||
f5273890ee | |||
c7a088a816 | |||
fb17b60811 | |||
1e58804260 | |||
31bf213032 | |||
1cccc41ddc | |||
a91cf27767 | |||
64d02399d8 | |||
5961017202 | |||
d9760fd43c | |||
d42b2d2985 | |||
cccfab6412 | |||
4665664c92 | |||
0adc996bc3 | |||
b42a2a720b | |||
37edd7dd4a | |||
f87b3500c5 | |||
66420a2db4 | |||
6b8492a782 | |||
6de0595eb8 | |||
e48a2c646d | |||
0f831a1a92 | |||
1ce464aba9 | |||
6994e70651 | |||
3e510af38d | |||
5ecd7b0a92 | |||
a229909fa6 | |||
548f31d99c | |||
78b296b0ff | |||
be79b07907 | |||
5537dce84d | |||
493987fefe | |||
c3f0b12b0f | |||
27ace98f51 | |||
a00d73c8c8 | |||
7e660ac113 | |||
37e3cbe22e | |||
610134730a | |||
212a5e28ba | |||
3442b30ab2 |
8
Makefile
8
Makefile
@ -6,10 +6,10 @@ clean:
|
|||||||
cleanall: clean
|
cleanall: clean
|
||||||
rm -f youtube-dl youtube-dl.exe
|
rm -f youtube-dl youtube-dl.exe
|
||||||
|
|
||||||
PREFIX=/usr/local
|
PREFIX ?= /usr/local
|
||||||
BINDIR=$(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
MANDIR=$(PREFIX)/man
|
MANDIR ?= $(PREFIX)/man
|
||||||
PYTHON=/usr/bin/env python
|
PYTHON ?= /usr/bin/env python
|
||||||
|
|
||||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||||
ifeq ($(PREFIX),/usr)
|
ifeq ($(PREFIX),/usr)
|
||||||
|
100
README.md
100
README.md
@ -17,6 +17,14 @@ If you do not have curl, you can alternatively use a recent wget:
|
|||||||
|
|
||||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||||
|
|
||||||
|
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
||||||
|
|
||||||
|
brew install youtube-dl
|
||||||
|
|
||||||
|
You can also use pip:
|
||||||
|
|
||||||
|
sudo pip install youtube-dl
|
||||||
|
|
||||||
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
|
||||||
|
|
||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
@ -303,10 +311,12 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
|
|||||||
|
|
||||||
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
|
||||||
|
|
||||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
```bash
|
||||||
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||||
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
|
||||||
youtube-dl_test_video_.mp4 # A simple file name
|
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||||
|
youtube-dl_test_video_.mp4 # A simple file name
|
||||||
|
```
|
||||||
|
|
||||||
# VIDEO SELECTION
|
# VIDEO SELECTION
|
||||||
|
|
||||||
@ -317,14 +327,16 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
|
|||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
# Download only the videos uploaded in the last 6 months
|
```bash
|
||||||
$ youtube-dl --dateafter now-6months
|
# Download only the videos uploaded in the last 6 months
|
||||||
|
$ youtube-dl --dateafter now-6months
|
||||||
|
|
||||||
# Download only the videos uploaded on January 1, 1970
|
# Download only the videos uploaded on January 1, 1970
|
||||||
$ youtube-dl --date 19700101
|
$ youtube-dl --date 19700101
|
||||||
|
|
||||||
$ # will only download the videos uploaded in the 200x decade
|
$ # will only download the videos uploaded in the 200x decade
|
||||||
$ youtube-dl --dateafter 20000101 --datebefore 20091231
|
$ youtube-dl --dateafter 20000101 --datebefore 20091231
|
||||||
|
```
|
||||||
|
|
||||||
# FAQ
|
# FAQ
|
||||||
|
|
||||||
@ -399,49 +411,49 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
|
||||||
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
|
||||||
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
|
||||||
|
```python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# coding: utf-8
|
import re
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class YourExtractorIE(InfoExtractor):
|
class YourExtractorIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://yourextractor.com/watch/42',
|
'url': 'http://yourextractor.com/watch/42',
|
||||||
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
|
'md5': 'TODO: md5 sum of the first 10KiB of the video file',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '42',
|
'id': '42',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video title goes here',
|
'title': 'Video title goes here',
|
||||||
# TODO more properties, either as:
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
# * A value
|
# TODO more properties, either as:
|
||||||
# * MD5 checksum; start the string with md5:
|
# * A value
|
||||||
# * A regular expression; start the string with re:
|
# * MD5 checksum; start the string with md5:
|
||||||
# * Any Python type (for example int or float)
|
# * A regular expression; start the string with re:
|
||||||
}
|
# * Any Python type (for example int or float)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
# TODO more code goes here, for example ...
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
# TODO more properties (see youtube_dl/extractor/common.py)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
# TODO more code goes here, for example ...
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
# TODO more properties (see youtube_dl/extractor/common.py)
|
||||||
|
}
|
||||||
|
```
|
||||||
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
|
||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||||
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
|
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
|
||||||
|
@ -117,8 +117,9 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||||
|
|
||||||
# Check for the presence of mandatory fields
|
# Check for the presence of mandatory fields
|
||||||
for key in ('id', 'url', 'title', 'ext'):
|
if got_dict.get('_type') != 'playlist':
|
||||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
for key in ('id', 'url', 'title', 'ext'):
|
||||||
|
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||||
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
for key in ['webpage_url', 'extractor', 'extractor_key']:
|
||||||
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
|
||||||
|
@ -221,7 +221,7 @@ class TestFormatSelection(unittest.TestCase):
|
|||||||
'138', '137', '248', '136', '247', '135', '246',
|
'138', '137', '248', '136', '247', '135', '246',
|
||||||
'245', '244', '134', '243', '133', '242', '160',
|
'245', '244', '134', '243', '133', '242', '160',
|
||||||
# Dash audio
|
# Dash audio
|
||||||
'141', '172', '140', '139', '171',
|
'141', '172', '140', '171', '139',
|
||||||
]
|
]
|
||||||
|
|
||||||
for f1id, f2id in zip(order, order[1:]):
|
for f1id, f2id in zip(order, order[1:]):
|
||||||
|
@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_facebook_matching(self):
|
def test_facebook_matching(self):
|
||||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
|
@ -63,15 +63,21 @@ def generator(test_case):
|
|||||||
def test_template(self):
|
def test_template(self):
|
||||||
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
|
||||||
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
|
||||||
|
is_playlist = any(k.startswith('playlist') for k in test_case)
|
||||||
|
test_cases = test_case.get(
|
||||||
|
'playlist', [] if is_playlist else [test_case])
|
||||||
|
|
||||||
def print_skipping(reason):
|
def print_skipping(reason):
|
||||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||||
if not ie.working():
|
if not ie.working():
|
||||||
print_skipping('IE marked as not _WORKING')
|
print_skipping('IE marked as not _WORKING')
|
||||||
return
|
return
|
||||||
if 'playlist' not in test_case:
|
|
||||||
info_dict = test_case.get('info_dict', {})
|
for tc in test_cases:
|
||||||
if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
info_dict = tc.get('info_dict', {})
|
||||||
|
if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
|
||||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||||
|
|
||||||
if 'skip' in test_case:
|
if 'skip' in test_case:
|
||||||
print_skipping(test_case['skip'])
|
print_skipping(test_case['skip'])
|
||||||
return
|
return
|
||||||
@ -81,6 +87,9 @@ def generator(test_case):
|
|||||||
return
|
return
|
||||||
|
|
||||||
params = get_params(test_case.get('params', {}))
|
params = get_params(test_case.get('params', {}))
|
||||||
|
if is_playlist and 'playlist' not in test_case:
|
||||||
|
params.setdefault('extract_flat', True)
|
||||||
|
params.setdefault('skip_download', True)
|
||||||
|
|
||||||
ydl = YoutubeDL(params)
|
ydl = YoutubeDL(params)
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
@ -93,7 +102,6 @@ def generator(test_case):
|
|||||||
def get_tc_filename(tc):
|
def get_tc_filename(tc):
|
||||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||||
|
|
||||||
test_cases = test_case.get('playlist', [test_case])
|
|
||||||
def try_rm_tcs_files():
|
def try_rm_tcs_files():
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
tc_filename = get_tc_filename(tc)
|
tc_filename = get_tc_filename(tc)
|
||||||
@ -105,7 +113,10 @@ def generator(test_case):
|
|||||||
try_num = 1
|
try_num = 1
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
ydl.download([test_case['url']])
|
# We're not using .download here sine that is just a shim
|
||||||
|
# for outside error handling, and returns the exit code
|
||||||
|
# instead of the result dict.
|
||||||
|
res_dict = ydl.extract_info(test_case['url'])
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||||
@ -121,6 +132,17 @@ def generator(test_case):
|
|||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if is_playlist:
|
||||||
|
self.assertEqual(res_dict['_type'], 'playlist')
|
||||||
|
expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
|
||||||
|
if 'playlist_mincount' in test_case:
|
||||||
|
self.assertGreaterEqual(
|
||||||
|
len(res_dict['entries']),
|
||||||
|
test_case['playlist_mincount'],
|
||||||
|
'Expected at least %d in playlist %s, but got only %d' % (
|
||||||
|
test_case['playlist_mincount'], test_case['url'],
|
||||||
|
len(res_dict['entries'])))
|
||||||
|
|
||||||
for tc in test_cases:
|
for tc in test_cases:
|
||||||
tc_filename = get_tc_filename(tc)
|
tc_filename = get_tc_filename(tc)
|
||||||
if not test_case.get('params', {}).get('skip_download', False):
|
if not test_case.get('params', {}).get('skip_download', False):
|
||||||
|
@ -1,6 +1,17 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
|
|
||||||
|
## DEPRECATED FILE!
|
||||||
|
# Add new tests to the extractors themselves, like this:
|
||||||
|
# _TEST = {
|
||||||
|
# 'url': 'http://example.com/playlist/42',
|
||||||
|
# 'playlist_mincount': 99,
|
||||||
|
# 'info_dict': {
|
||||||
|
# 'id': '42',
|
||||||
|
# 'title': 'Playlist number forty-two',
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
|
@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
||||||
|
|
||||||
def test_uppercase_escpae(self):
|
def test_uppercase_escape(self):
|
||||||
self.assertEqual(uppercase_escape(u'aä'), u'aä')
|
self.assertEqual(uppercase_escape(u'aä'), u'aä')
|
||||||
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
|
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
|
||||||
|
|
||||||
|
@ -162,6 +162,7 @@ class YoutubeDL(object):
|
|||||||
default_search: Prepend this string if an input url is not valid.
|
default_search: Prepend this string if an input url is not valid.
|
||||||
'auto' for elaborate guessing
|
'auto' for elaborate guessing
|
||||||
encoding: Use this encoding instead of the system-specified.
|
encoding: Use this encoding instead of the system-specified.
|
||||||
|
extract_flat: Do not resolve URLs, return the immediate result.
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
the FileDownloader:
|
the FileDownloader:
|
||||||
@ -479,7 +480,10 @@ class YoutubeDL(object):
|
|||||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
age_limit = self.params.get('age_limit')
|
age_limit = self.params.get('age_limit')
|
||||||
if age_limit is not None:
|
if age_limit is not None:
|
||||||
if age_limit < info_dict.get('age_limit', 0):
|
actual_age_limit = info_dict.get('age_limit')
|
||||||
|
if actual_age_limit is None:
|
||||||
|
actual_age_limit = 0
|
||||||
|
if age_limit < actual_age_limit:
|
||||||
return 'Skipping "' + title + '" because it is age restricted'
|
return 'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return '%s has already been recorded in archive' % video_title
|
return '%s has already been recorded in archive' % video_title
|
||||||
@ -558,7 +562,12 @@ class YoutubeDL(object):
|
|||||||
Returns the resolved ie_result.
|
Returns the resolved ie_result.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
|
result_type = ie_result.get('_type', 'video')
|
||||||
|
|
||||||
|
if self.params.get('extract_flat', False):
|
||||||
|
if result_type in ('url', 'url_transparent'):
|
||||||
|
return ie_result
|
||||||
|
|
||||||
if result_type == 'video':
|
if result_type == 'video':
|
||||||
self.add_extra_info(ie_result, extra_info)
|
self.add_extra_info(ie_result, extra_info)
|
||||||
return self.process_video_result(ie_result, download=download)
|
return self.process_video_result(ie_result, download=download)
|
||||||
|
@ -66,6 +66,11 @@ __authors__ = (
|
|||||||
'Naglis Jonaitis',
|
'Naglis Jonaitis',
|
||||||
'Charles Chen',
|
'Charles Chen',
|
||||||
'Hassaan Ali',
|
'Hassaan Ali',
|
||||||
|
'Dobrosław Żybort',
|
||||||
|
'David Fabijan',
|
||||||
|
'Sebastian Haas',
|
||||||
|
'Alexander Kirk',
|
||||||
|
'Erik Johnson',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
@ -292,7 +292,7 @@ class FileDownloader(object):
|
|||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
"""Real download process. Redefine in subclasses."""
|
"""Real download process. Redefine in subclasses."""
|
||||||
raise NotImplementedError(u'This method must be implemented by sublcasses')
|
raise NotImplementedError(u'This method must be implemented by subclasses')
|
||||||
|
|
||||||
def _hook_progress(self, status):
|
def _hook_progress(self, status):
|
||||||
for ph in self._progress_hooks:
|
for ph in self._progress_hooks:
|
||||||
|
@ -69,6 +69,7 @@ from .dfb import DFBIE
|
|||||||
from .dotsub import DotsubIE
|
from .dotsub import DotsubIE
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drtv import DRTVIE
|
from .drtv import DRTVIE
|
||||||
|
from .dump import DumpIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
from .divxstage import DivxStageIE
|
from .divxstage import DivxStageIE
|
||||||
@ -77,6 +78,10 @@ from .ebaumsworld import EbaumsWorldIE
|
|||||||
from .ehow import EHowIE
|
from .ehow import EHowIE
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
|
from .ellentv import (
|
||||||
|
EllenTVIE,
|
||||||
|
EllenTVClipsIE,
|
||||||
|
)
|
||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
from .empflix import EmpflixIE
|
from .empflix import EmpflixIE
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
@ -126,6 +131,7 @@ from .helsinki import HelsinkiIE
|
|||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
|
from .howstuffworks import HowStuffWorksIE
|
||||||
from .huffpost import HuffPostIE
|
from .huffpost import HuffPostIE
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .iconosquare import IconosquareIE
|
from .iconosquare import IconosquareIE
|
||||||
@ -146,6 +152,7 @@ from .ivi import (
|
|||||||
from .izlesene import IzleseneIE
|
from .izlesene import IzleseneIE
|
||||||
from .jadorecettepub import JadoreCettePubIE
|
from .jadorecettepub import JadoreCettePubIE
|
||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
|
from .jove import JoveIE
|
||||||
from .jukebox import JukeboxIE
|
from .jukebox import JukeboxIE
|
||||||
from .justintv import JustinTVIE
|
from .justintv import JustinTVIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
@ -177,14 +184,17 @@ from .mdr import MDRIE
|
|||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||||
|
from .mitele import MiTeleIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mlb import MLBIE
|
from .mlb import MLBIE
|
||||||
from .mpora import MporaIE
|
from .mpora import MporaIE
|
||||||
from .mofosex import MofosexIE
|
from .mofosex import MofosexIE
|
||||||
|
from .mojvideo import MojvideoIE
|
||||||
from .mooshare import MooshareIE
|
from .mooshare import MooshareIE
|
||||||
from .morningstar import MorningstarIE
|
from .morningstar import MorningstarIE
|
||||||
from .motherless import MotherlessIE
|
from .motherless import MotherlessIE
|
||||||
from .motorsport import MotorsportIE
|
from .motorsport import MotorsportIE
|
||||||
|
from .movieclips import MovieClipsIE
|
||||||
from .moviezine import MoviezineIE
|
from .moviezine import MoviezineIE
|
||||||
from .movshare import MovShareIE
|
from .movshare import MovShareIE
|
||||||
from .mtv import (
|
from .mtv import (
|
||||||
@ -224,12 +234,17 @@ from .nrk import (
|
|||||||
from .ntv import NTVIE
|
from .ntv import NTVIE
|
||||||
from .nytimes import NYTimesIE
|
from .nytimes import NYTimesIE
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .oe1 import OE1IE
|
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import ORFIE
|
from .orf import (
|
||||||
|
ORFTVthekIE,
|
||||||
|
ORFOE1IE,
|
||||||
|
ORFFM4IE,
|
||||||
|
)
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
|
from .patreon import PatreonIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
from .playfm import PlayFMIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
@ -247,9 +262,10 @@ from .ro220 import Ro220IE
|
|||||||
from .rottentomatoes import RottenTomatoesIE
|
from .rottentomatoes import RottenTomatoesIE
|
||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
from .rtbf import RTBFIE
|
from .rtbf import RTBFIE
|
||||||
|
from .rtlnl import RtlXlIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
from .rtve import RTVEALaCartaIE
|
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
@ -260,6 +276,7 @@ from .rutube import (
|
|||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
|
from .sbs import SBSIE
|
||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
@ -327,7 +344,7 @@ from .tutv import TutvIE
|
|||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvp import TvpIE
|
from .tvp import TvpIE
|
||||||
from .tvplay import TVPlayIE
|
from .tvplay import TVPlayIE
|
||||||
from.ubu import UbuIE
|
from .ubu import UbuIE
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
UdemyIE,
|
UdemyIE,
|
||||||
UdemyCourseIE
|
UdemyCourseIE
|
||||||
@ -383,6 +400,7 @@ from .wistia import WistiaIE
|
|||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
from .wrzuta import WrzutaIE
|
from .wrzuta import WrzutaIE
|
||||||
from .xbef import XBefIE
|
from .xbef import XBefIE
|
||||||
|
from .xboxclips import XboxClipsIE
|
||||||
from .xhamster import XHamsterIE
|
from .xhamster import XHamsterIE
|
||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
#coding: utf-8
|
#coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -13,13 +15,14 @@ class AparatIE(InfoExtractor):
|
|||||||
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.aparat.com/v/wP8On',
|
'url': 'http://www.aparat.com/v/wP8On',
|
||||||
u'file': u'wP8On.mp4',
|
'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
|
||||||
u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
|
'info_dict': {
|
||||||
u'info_dict': {
|
'id': 'wP8On',
|
||||||
u"title": u"تیم گلکسی 11 - زومیت",
|
'ext': 'mp4',
|
||||||
|
'title': 'تیم گلکسی 11 - زومیت',
|
||||||
},
|
},
|
||||||
#u'skip': u'Extremely unreliable',
|
# 'skip': 'Extremely unreliable',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -29,8 +32,8 @@ class AparatIE(InfoExtractor):
|
|||||||
# Note: There is an easier-to-parse configuration at
|
# Note: There is an easier-to-parse configuration at
|
||||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||||
# but the URL in there does not work
|
# but the URL in there does not work
|
||||||
embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
|
embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
|
||||||
video_id + u'/vt/frame')
|
video_id + '/vt/frame')
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
||||||
|
@ -6,6 +6,7 @@ import json
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
|
|||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format': format['type'],
|
'format': format['type'],
|
||||||
'width': format['width'],
|
'width': int_or_none(format['width']),
|
||||||
'height': int(format['height']),
|
'height': int_or_none(format['height']),
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -51,6 +51,9 @@ class ARDIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
|
||||||
|
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||||
r'<meta name="dcterms.title" content="(.*?)"/>',
|
r'<meta name="dcterms.title" content="(.*?)"/>',
|
||||||
|
@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
|
|||||||
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
||||||
return any(re.match(r, f['versionCode']) for r in regexes)
|
return any(re.match(r, f['versionCode']) for r in regexes)
|
||||||
# Some formats may not be in the same language as the url
|
# Some formats may not be in the same language as the url
|
||||||
|
# TODO: Might want not to drop videos that does not match requested language
|
||||||
|
# but to process those formats with lower precedence
|
||||||
formats = filter(_match_lang, all_formats)
|
formats = filter(_match_lang, all_formats)
|
||||||
formats = list(formats) # in python3 filter returns an iterator
|
formats = list(formats) # in python3 filter returns an iterator
|
||||||
if not formats:
|
if not formats:
|
||||||
# Some videos are only available in the 'Originalversion'
|
# Some videos are only available in the 'Originalversion'
|
||||||
# they aren't tagged as being in French or German
|
# they aren't tagged as being in French or German
|
||||||
if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
|
# Sometimes there are neither videos of requested lang code
|
||||||
formats = all_formats
|
# nor original version videos available
|
||||||
else:
|
# For such cases we just take all_formats as is
|
||||||
raise ExtractorError(u'The formats list is empty')
|
formats = all_formats
|
||||||
|
if not formats:
|
||||||
|
raise ExtractorError('The formats list is empty')
|
||||||
|
|
||||||
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
||||||
def sort_key(f):
|
def sort_key(f):
|
||||||
|
@ -463,8 +463,9 @@ class InfoExtractor(object):
|
|||||||
return self._og_search_property('title', html, **kargs)
|
return self._og_search_property('title', html, **kargs)
|
||||||
|
|
||||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||||
regexes = self._og_regexes('video')
|
regexes = self._og_regexes('video') + self._og_regexes('video:url')
|
||||||
if secure: regexes = self._og_regexes('video:secure_url') + regexes
|
if secure:
|
||||||
|
regexes = self._og_regexes('video:secure_url') + regexes
|
||||||
return self._html_search_regex(regexes, html, name, **kargs)
|
return self._html_search_regex(regexes, html, name, **kargs)
|
||||||
|
|
||||||
def _og_search_url(self, html, **kargs):
|
def _og_search_url(self, html, **kargs):
|
||||||
|
@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
|
|||||||
video_id)
|
video_id)
|
||||||
video_info = player_info.find('video')
|
video_info = player_info.find('video')
|
||||||
|
|
||||||
f4m_info = self._download_xml(video_info.find('url').text, video_id)
|
f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
|
||||||
token_el = f4m_info.find('token')
|
token_el = f4m_info.find('token')
|
||||||
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
|
||||||
|
|
||||||
|
39
youtube_dl/extractor/dump.py
Normal file
39
youtube_dl/extractor/dump.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class DumpIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.dump.com/oneus/',
|
||||||
|
'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'oneus',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': "He's one of us.",
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
|
||||||
|
|
||||||
|
thumb = self._og_search_thumbnail(webpage)
|
||||||
|
title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumb,
|
||||||
|
}
|
79
youtube_dl/extractor/ellentv.py
Normal file
79
youtube_dl/extractor/ellentv.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EllenTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
||||||
|
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0-7jqrsr18',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
||||||
|
'timestamp': 1406876400,
|
||||||
|
'upload_date': '20140801',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
|
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||||
|
webpage, 'timestamp'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'url': self._html_search_meta('VideoURL', webpage, 'url'),
|
||||||
|
'timestamp': timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class EllenTVClipsIE(InfoExtractor):
|
||||||
|
IE_NAME = 'EllenTV:clips'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'meryl-streep-vanessa-hudgens',
|
||||||
|
'title': 'Meryl Streep, Vanessa Hudgens',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 9,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
playlist = self._extract_playlist(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'entries': self._extract_entries(playlist)
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_playlist(self, webpage):
|
||||||
|
json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
|
||||||
|
try:
|
||||||
|
return json.loads("[{" + json_string + "}]")
|
||||||
|
except ValueError as ve:
|
||||||
|
raise ExtractorError('Failed to download JSON', cause=ve)
|
||||||
|
|
||||||
|
def _extract_entries(self, playlist):
|
||||||
|
return [self.url_result(item['url'], 'EllenTV') for item in playlist]
|
@ -36,7 +36,7 @@ class EscapistIE(InfoExtractor):
|
|||||||
r'<meta name="description" content="([^"]*)"',
|
r'<meta name="description" content="([^"]*)"',
|
||||||
webpage, 'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
playerUrl = self._og_search_video_url(webpage, name=u'player URL')
|
playerUrl = self._og_search_video_url(webpage, name='player URL')
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<meta name="title" content="([^"]*)"',
|
r'<meta name="title" content="([^"]*)"',
|
||||||
|
@ -20,7 +20,7 @@ from ..utils import (
|
|||||||
class FacebookIE(InfoExtractor):
|
class FacebookIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:\w+\.)?facebook\.com/
|
https?://(?:\w+\.)?facebook\.com/
|
||||||
(?:[^#?]*\#!/)?
|
(?:[^#]*?\#!/)?
|
||||||
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
|
||||||
(?:v|video_id)=(?P<id>[0-9]+)
|
(?:v|video_id)=(?P<id>[0-9]+)
|
||||||
(?:.*)'''
|
(?:.*)'''
|
||||||
|
@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor):
|
|||||||
fields = dict(re.findall(r'''(?x)<input\s+
|
fields = dict(re.findall(r'''(?x)<input\s+
|
||||||
type="hidden"\s+
|
type="hidden"\s+
|
||||||
name="([^"]+)"\s+
|
name="([^"]+)"\s+
|
||||||
(?:id="[^"]+"\s+)?
|
|
||||||
value="([^"]*)"
|
value="([^"]*)"
|
||||||
''', webpage))
|
''', webpage))
|
||||||
|
|
||||||
@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor):
|
|||||||
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
|
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
|
||||||
webpage, 'extension', fatal=False)
|
webpage, 'extension', fatal=False)
|
||||||
video_url = self._search_regex(
|
video_url = self._search_regex(
|
||||||
r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
|
r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
|
@ -16,6 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
orderedSet,
|
||||||
parse_xml,
|
parse_xml,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
@ -289,6 +290,22 @@ class GenericIE(InfoExtractor):
|
|||||||
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
'description': 'Mario\'s life in the fast lane has never looked so good.',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# YouTube embed via <data-embed-url="">
|
||||||
|
{
|
||||||
|
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'jpSGZsgga_I',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Asphalt 8: Airborne - Launch Trailer',
|
||||||
|
'uploader': 'Gameloft',
|
||||||
|
'uploader_id': 'gameloft',
|
||||||
|
'upload_date': '20130821',
|
||||||
|
'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_download_webpage(self, video_id):
|
def report_download_webpage(self, video_id):
|
||||||
@ -479,6 +496,12 @@ class GenericIE(InfoExtractor):
|
|||||||
video_uploader = self._search_regex(
|
video_uploader = self._search_regex(
|
||||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||||
|
|
||||||
|
# Helper method
|
||||||
|
def _playlist_from_matches(matches, getter, ie=None):
|
||||||
|
urlrs = orderedSet(self.url_result(getter(m), ie) for m in matches)
|
||||||
|
return self.playlist_result(
|
||||||
|
urlrs, playlist_id=video_id, playlist_title=video_title)
|
||||||
|
|
||||||
# Look for BrightCove:
|
# Look for BrightCove:
|
||||||
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
|
||||||
if bc_urls:
|
if bc_urls:
|
||||||
@ -514,6 +537,7 @@ class GenericIE(InfoExtractor):
|
|||||||
matches = re.findall(r'''(?x)
|
matches = re.findall(r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
<iframe[^>]+?src=|
|
<iframe[^>]+?src=|
|
||||||
|
data-video-url=|
|
||||||
<embed[^>]+?src=|
|
<embed[^>]+?src=|
|
||||||
embedSWF\(?:\s*
|
embedSWF\(?:\s*
|
||||||
)
|
)
|
||||||
@ -522,19 +546,15 @@ class GenericIE(InfoExtractor):
|
|||||||
(?:embed|v)/.+?)
|
(?:embed|v)/.+?)
|
||||||
\1''', webpage)
|
\1''', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
return _playlist_from_matches(
|
||||||
for tuppl in matches]
|
matches, lambda m: unescapeHTML(m[1]), ie='Youtube')
|
||||||
return self.playlist_result(
|
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
|
||||||
|
|
||||||
# Look for embedded Dailymotion player
|
# Look for embedded Dailymotion player
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]))
|
return _playlist_from_matches(
|
||||||
for tuppl in matches]
|
matches, lambda m: unescapeHTML(m[1]))
|
||||||
return self.playlist_result(
|
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
match = re.search(
|
match = re.search(
|
||||||
@ -648,10 +668,8 @@ class GenericIE(InfoExtractor):
|
|||||||
# Look for funnyordie embed
|
# Look for funnyordie embed
|
||||||
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
|
return _playlist_from_matches(
|
||||||
for eurl in matches]
|
matches, getter=unescapeHTML, ie='FunnyOrDie')
|
||||||
return self.playlist_result(
|
|
||||||
urlrs, playlist_id=video_id, playlist_title=video_title)
|
|
||||||
|
|
||||||
# Look for embedded RUTV player
|
# Look for embedded RUTV player
|
||||||
rutv_url = RUTVIE._extract_url(webpage)
|
rutv_url = RUTVIE._extract_url(webpage)
|
||||||
@ -706,6 +724,20 @@ class GenericIE(InfoExtractor):
|
|||||||
url = unescapeHTML(mobj.group('url'))
|
url = unescapeHTML(mobj.group('url'))
|
||||||
return self.url_result(url, ie='MTVServicesEmbedded')
|
return self.url_result(url, ie='MTVServicesEmbedded')
|
||||||
|
|
||||||
|
# Look for embedded yahoo player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Yahoo')
|
||||||
|
|
||||||
|
# Look for embedded sbs.com.au player
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)sbs\.com\.au/ondemand/video/single/.+?)\1',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'SBS')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if not found:
|
if not found:
|
||||||
|
134
youtube_dl/extractor/howstuffworks.py
Normal file
134
youtube_dl/extractor/howstuffworks.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import find_xpath_attr
|
||||||
|
|
||||||
|
|
||||||
|
class HowStuffWorksIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '450221',
|
||||||
|
'display_id': 'cool-jobs-iditarod-musher',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Cool Jobs - Iditarod Musher',
|
||||||
|
'description': 'md5:82bb58438a88027b8186a1fccb365f90',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# md5 is not consistent
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '553470',
|
||||||
|
'display_id': 'deadliest-catch-jakes-farewell-pots',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Deadliest Catch: Jake\'s Farewell Pots',
|
||||||
|
'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# md5 is not consistent
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '440011',
|
||||||
|
'display_id': 'sword-swallowing-1-by-dan-meyer',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Sword Swallowing #1 by Dan Meyer',
|
||||||
|
'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# md5 is not consistent
|
||||||
|
'skip_download': True
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
display_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
|
||||||
|
|
||||||
|
mp4 = self._search_regex(
|
||||||
|
r'''(?xs)var\s+clip\s*=\s*{\s*
|
||||||
|
.+?\s*
|
||||||
|
content_id\s*:\s*%s\s*,\s*
|
||||||
|
.+?\s*
|
||||||
|
mp4\s*:\s*\[(.*?),?\]\s*
|
||||||
|
};\s*
|
||||||
|
videoData\.push\(clip\);''' % content_id,
|
||||||
|
webpage, 'mp4', fatal=False, default=None)
|
||||||
|
|
||||||
|
smil = self._download_xml(
|
||||||
|
'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
|
||||||
|
content_id, 'Downloading video SMIL')
|
||||||
|
|
||||||
|
http_base = find_xpath_attr(
|
||||||
|
smil,
|
||||||
|
'./{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
|
||||||
|
'name',
|
||||||
|
'httpBase').get('content')
|
||||||
|
|
||||||
|
def random_string(str_len=0):
|
||||||
|
return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
|
||||||
|
|
||||||
|
URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
if mp4:
|
||||||
|
for video in json.loads('[%s]' % mp4):
|
||||||
|
bitrate = video['bitrate']
|
||||||
|
fmt = {
|
||||||
|
'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
|
||||||
|
'format_id': bitrate,
|
||||||
|
}
|
||||||
|
m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
|
||||||
|
if m:
|
||||||
|
fmt['vbr'] = int(m.group('vbr'))
|
||||||
|
formats.append(fmt)
|
||||||
|
else:
|
||||||
|
for video in smil.findall(
|
||||||
|
'.//{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
|
||||||
|
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||||
|
formats.append({
|
||||||
|
'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
|
||||||
|
'format_id': '%dk' % vbr,
|
||||||
|
'vbr': vbr,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
TITLE_SUFFIX = ' : HowStuffWorks'
|
||||||
|
if title.endswith(TITLE_SUFFIX):
|
||||||
|
title = title[:-len(TITLE_SUFFIX)]
|
||||||
|
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': content_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
80
youtube_dl/extractor/jove.py
Normal file
80
youtube_dl/extractor/jove.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class JoveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
|
||||||
|
_CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
|
||||||
|
'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2744',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
|
||||||
|
'description': 'md5:015dd4509649c0908bc27f049e0262c6',
|
||||||
|
'thumbnail': 're:^https?://.*\.png$',
|
||||||
|
'upload_date': '20110523',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
|
||||||
|
'md5': '914aeb356f416811d911996434811beb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '51796',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
|
||||||
|
'description': 'md5:35ff029261900583970c4023b70f1dc9',
|
||||||
|
'thumbnail': 're:^https?://.*\.png$',
|
||||||
|
'upload_date': '20140802',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
chapters_id = self._html_search_regex(
|
||||||
|
r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
|
||||||
|
|
||||||
|
chapters_xml = self._download_xml(
|
||||||
|
self._CHAPTERS_URL.format(video_id=chapters_id),
|
||||||
|
video_id, note='Downloading chapters XML',
|
||||||
|
errnote='Failed to download chapters XML')
|
||||||
|
|
||||||
|
video_url = chapters_xml.attrib.get('video')
|
||||||
|
if not video_url:
|
||||||
|
raise ExtractorError('Failed to get the video URL')
|
||||||
|
|
||||||
|
title = self._html_search_meta('citation_title', webpage, 'title')
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
|
||||||
|
webpage, 'description', fatal=False)
|
||||||
|
publish_date = unified_strdate(self._html_search_meta(
|
||||||
|
'citation_publication_date', webpage, 'publish date', fatal=False))
|
||||||
|
comment_count = self._html_search_regex(
|
||||||
|
r'<meta name="num_comments" content="(\d+) Comments?"',
|
||||||
|
webpage, 'comment count', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
'upload_date': publish_date,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
}
|
@ -111,17 +111,28 @@ class LivestreamIE(InfoExtractor):
|
|||||||
event_name = mobj.group('event_name')
|
event_name = mobj.group('event_name')
|
||||||
webpage = self._download_webpage(url, video_id or event_name)
|
webpage = self._download_webpage(url, video_id or event_name)
|
||||||
|
|
||||||
if video_id is None:
|
og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
|
||||||
# This is an event page:
|
if og_video is None:
|
||||||
config_json = self._search_regex(
|
config_json = self._search_regex(
|
||||||
r'window.config = ({.*?});', webpage, 'window config')
|
r'window.config = ({.*?});', webpage, 'window config')
|
||||||
info = json.loads(config_json)['event']
|
info = json.loads(config_json)['event']
|
||||||
|
|
||||||
|
def is_relevant(vdata, vid):
|
||||||
|
result = vdata['type'] == 'video'
|
||||||
|
if video_id is not None:
|
||||||
|
result = result and compat_str(vdata['data']['id']) == vid
|
||||||
|
return result
|
||||||
|
|
||||||
videos = [self._extract_video_info(video_data['data'])
|
videos = [self._extract_video_info(video_data['data'])
|
||||||
for video_data in info['feed']['data']
|
for video_data in info['feed']['data']
|
||||||
if video_data['type'] == 'video']
|
if is_relevant(video_data, video_id)]
|
||||||
return self.playlist_result(videos, info['id'], info['full_name'])
|
if video_id is None:
|
||||||
|
# This is an event page:
|
||||||
|
return self.playlist_result(videos, info['id'], info['full_name'])
|
||||||
|
else:
|
||||||
|
if videos:
|
||||||
|
return videos[0]
|
||||||
else:
|
else:
|
||||||
og_video = self._og_search_video_url(webpage, 'player url')
|
|
||||||
query_str = compat_urllib_parse_urlparse(og_video).query
|
query_str = compat_urllib_parse_urlparse(og_video).query
|
||||||
query = compat_urlparse.parse_qs(query_str)
|
query = compat_urlparse.parse_qs(query_str)
|
||||||
api_url = query['play_url'][0].replace('.smil', '')
|
api_url = query['play_url'][0].replace('.smil', '')
|
||||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -83,6 +84,21 @@ class MetacafeIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Movieclips.com video
|
||||||
|
{
|
||||||
|
'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'mv-Wy7ZU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'My Week with Marilyn - Do You Love Me?',
|
||||||
|
'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.',
|
||||||
|
'uploader': 'movie_trailers',
|
||||||
|
'duration': 176,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'requires rtmpdump',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_disclaimer(self):
|
def report_disclaimer(self):
|
||||||
@ -134,6 +150,7 @@ class MetacafeIE(InfoExtractor):
|
|||||||
|
|
||||||
# Extract URL, uploader and title from webpage
|
# Extract URL, uploader and title from webpage
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
video_url = None
|
||||||
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
|
||||||
@ -146,16 +163,17 @@ class MetacafeIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
gdaKey = mobj.group(1)
|
gdaKey = mobj.group(1)
|
||||||
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
|
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
|
||||||
else:
|
if video_url is None:
|
||||||
mobj = re.search(r'<video src="([^"]+)"', webpage)
|
mobj = re.search(r'<video src="([^"]+)"', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
video_url = mobj.group(1)
|
video_url = mobj.group(1)
|
||||||
video_ext = 'mp4'
|
video_ext = 'mp4'
|
||||||
else:
|
if video_url is None:
|
||||||
mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
|
flashvars = self._search_regex(
|
||||||
if mobj is None:
|
r' name="flashvars" value="(.*?)"', webpage, 'flashvars',
|
||||||
raise ExtractorError('Unable to extract media URL')
|
default=None)
|
||||||
vardict = compat_parse_qs(mobj.group(1))
|
if flashvars:
|
||||||
|
vardict = compat_parse_qs(flashvars)
|
||||||
if 'mediaData' not in vardict:
|
if 'mediaData' not in vardict:
|
||||||
raise ExtractorError('Unable to extract media URL')
|
raise ExtractorError('Unable to extract media URL')
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
@ -165,26 +183,68 @@ class MetacafeIE(InfoExtractor):
|
|||||||
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
|
||||||
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
|
||||||
video_ext = determine_ext(video_url)
|
video_ext = determine_ext(video_url)
|
||||||
|
if video_url is None:
|
||||||
|
player_url = self._search_regex(
|
||||||
|
r"swfobject\.embedSWF\('([^']+)'",
|
||||||
|
webpage, 'config URL', default=None)
|
||||||
|
if player_url:
|
||||||
|
config_url = self._search_regex(
|
||||||
|
r'config=(.+)$', player_url, 'config URL')
|
||||||
|
config_doc = self._download_xml(
|
||||||
|
config_url, video_id,
|
||||||
|
note='Downloading video config')
|
||||||
|
smil_url = config_doc.find('.//properties').attrib['smil_file']
|
||||||
|
smil_doc = self._download_xml(
|
||||||
|
smil_url, video_id,
|
||||||
|
note='Downloading SMIL document')
|
||||||
|
base_url = smil_doc.find('./head/meta').attrib['base']
|
||||||
|
video_url = []
|
||||||
|
for vn in smil_doc.findall('.//video'):
|
||||||
|
br = int(vn.attrib['system-bitrate'])
|
||||||
|
play_path = vn.attrib['src']
|
||||||
|
video_url.append({
|
||||||
|
'format_id': 'smil-%d' % br,
|
||||||
|
'url': base_url,
|
||||||
|
'play_path': play_path,
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': player_url,
|
||||||
|
'ext': play_path.partition(':')[0],
|
||||||
|
})
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
if video_url is None:
|
||||||
|
raise ExtractorError('Unsupported video type')
|
||||||
|
|
||||||
|
video_title = self._html_search_regex(
|
||||||
|
r'(?im)<title>(.*) - Video</title>', webpage, 'title')
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||||
webpage, 'uploader nickname', fatal=False)
|
webpage, 'uploader nickname', fatal=False)
|
||||||
|
duration = int_or_none(
|
||||||
|
self._html_search_meta('video:duration', webpage))
|
||||||
|
|
||||||
if re.search(r'"contentRating":"restricted"', webpage) is not None:
|
age_limit = (
|
||||||
age_limit = 18
|
18
|
||||||
|
if re.search(r'"contentRating":"restricted"', webpage)
|
||||||
|
else 0)
|
||||||
|
|
||||||
|
if isinstance(video_url, list):
|
||||||
|
formats = video_url
|
||||||
else:
|
else:
|
||||||
age_limit = 0
|
formats = [{
|
||||||
|
'url': video_url,
|
||||||
|
'ext': video_ext,
|
||||||
|
}]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
|
||||||
'description': description,
|
'description': description,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'thumbnail':thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'ext': video_ext,
|
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': duration,
|
||||||
}
|
}
|
||||||
|
60
youtube_dl/extractor/mitele.py
Normal file
60
youtube_dl/extractor/mitele.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
get_element_by_attribute,
|
||||||
|
parse_duration,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MiTeleIE(InfoExtractor):
|
||||||
|
IE_NAME = 'mitele.es'
|
||||||
|
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
|
||||||
|
'md5': '6a75fe9d0d3275bead0cb683c616fddb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0fce117d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Programa 144 - Tor, la web invisible',
|
||||||
|
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
|
||||||
|
'display_id': 'programa-144',
|
||||||
|
'duration': 2913,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
episode = mobj.group('episode')
|
||||||
|
webpage = self._download_webpage(url, episode)
|
||||||
|
embed_data_json = self._search_regex(
|
||||||
|
r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
|
||||||
|
flags=re.DOTALL
|
||||||
|
).replace('\'', '"')
|
||||||
|
embed_data = json.loads(embed_data_json)
|
||||||
|
|
||||||
|
info_url = embed_data['flashvars']['host']
|
||||||
|
info_el = self._download_xml(info_url, episode).find('./video/info')
|
||||||
|
|
||||||
|
video_link = info_el.find('videoUrl/link').text
|
||||||
|
token_query = compat_urllib_parse.urlencode({'id': video_link})
|
||||||
|
token_info = self._download_json(
|
||||||
|
'http://token.mitele.es/?' + token_query, episode,
|
||||||
|
transform_source=strip_jsonp
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': embed_data['videoId'],
|
||||||
|
'display_id': episode,
|
||||||
|
'title': info_el.find('title').text,
|
||||||
|
'url': token_info['tokenizedUrl'],
|
||||||
|
'description': get_element_by_attribute('class', 'text', webpage),
|
||||||
|
'thumbnail': info_el.find('thumb').text,
|
||||||
|
'duration': parse_duration(info_el.find('duration').text),
|
||||||
|
}
|
58
youtube_dl/extractor/mojvideo.py
Normal file
58
youtube_dl/extractor/mojvideo.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MojvideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
|
||||||
|
'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3d1ed4497707730b2906',
|
||||||
|
'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
|
||||||
|
'thumbnail': 're:^http://.*\.jpg$',
|
||||||
|
'duration': 242,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
# XML is malformed
|
||||||
|
playerapi = self._download_webpage(
|
||||||
|
'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
|
||||||
|
|
||||||
|
if '<error>true</error>' in playerapi:
|
||||||
|
error_desc = self._html_search_regex(
|
||||||
|
r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title>([^<]+)</title>', playerapi, 'title')
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<file>([^<]+)</file>', playerapi, 'video URL')
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
|
||||||
|
duration = parse_duration(self._html_search_regex(
|
||||||
|
r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
78
youtube_dl/extractor/movieclips.py
Normal file
78
youtube_dl/extractor/movieclips.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
compat_str,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MovieClipsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://movieclips\.com/(?P<id>[\da-zA-Z]+)(?:-(?P<display_id>[\da-z-]+))?'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://movieclips.com/Wy7ZU-my-week-with-marilyn-movie-do-you-love-me/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Wy7ZU',
|
||||||
|
'display_id': 'my-week-with-marilyn-movie-do-you-love-me',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'My Week with Marilyn - Do You Love Me?',
|
||||||
|
'description': 'md5:e86795bd332fe3cff461e7c8dc542acb',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# rtmp download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
show_id = display_id or video_id
|
||||||
|
|
||||||
|
config = self._download_xml(
|
||||||
|
'http://config.movieclips.com/player/config/%s' % video_id,
|
||||||
|
show_id, 'Downloading player config')
|
||||||
|
|
||||||
|
if config.find('./country-region').text == 'false':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, config.find('./region_alert').text), expected=True)
|
||||||
|
|
||||||
|
properties = config.find('./video/properties')
|
||||||
|
smil_file = properties.attrib['smil_file']
|
||||||
|
|
||||||
|
smil = self._download_xml(smil_file, show_id, 'Downloading SMIL')
|
||||||
|
base_url = smil.find('./head/meta').attrib['base']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in smil.findall('./body/switch/video'):
|
||||||
|
vbr = int(video.attrib['system-bitrate']) / 1000
|
||||||
|
src = video.attrib['src']
|
||||||
|
formats.append({
|
||||||
|
'url': base_url,
|
||||||
|
'play_path': src,
|
||||||
|
'ext': src.split(':')[0],
|
||||||
|
'vbr': vbr,
|
||||||
|
'format_id': '%dk' % vbr,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = '%s - %s' % (properties.attrib['clip_movie_title'], properties.attrib['clip_title'])
|
||||||
|
description = clean_html(compat_str(properties.attrib['clip_description']))
|
||||||
|
thumbnail = properties.attrib['image']
|
||||||
|
categories = properties.attrib['clip_categories'].split(',')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -8,19 +9,34 @@ from ..utils import ExtractorError
|
|||||||
|
|
||||||
|
|
||||||
class NownessIE(InfoExtractor):
|
class NownessIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
|
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
{
|
||||||
'md5': '068bc0202558c2e391924cb8cc470676',
|
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
|
||||||
'info_dict': {
|
'md5': '068bc0202558c2e391924cb8cc470676',
|
||||||
'id': '2520295746001',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '2520295746001',
|
||||||
'description': 'Candor: The Art of Gesticulation',
|
'ext': 'mp4',
|
||||||
'uploader': 'Nowness',
|
'title': 'Candor: The Art of Gesticulation',
|
||||||
'title': 'Candor: The Art of Gesticulation',
|
'description': 'Candor: The Art of Gesticulation',
|
||||||
}
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
}
|
'uploader': 'Nowness',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
|
||||||
|
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3716354522001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||||
|
'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'uploader': 'Nowness',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -38,7 +38,7 @@ class NuvidIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
request, video_id, 'Downloading %s page' % format_id)
|
request, video_id, 'Downloading %s page' % format_id)
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False)
|
r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
@ -49,19 +49,24 @@ class NuvidIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip()
|
[r'<span title="([^"]+)">',
|
||||||
thumbnail = self._html_search_regex(
|
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip()
|
||||||
r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"',
|
thumbnails = [
|
||||||
webpage, 'thumbnail URL', fatal=False)
|
{
|
||||||
|
'url': thumb_url,
|
||||||
|
} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
|
||||||
|
]
|
||||||
|
thumbnail = thumbnails[0]['url'] if thumbnails else None
|
||||||
duration = parse_duration(self._html_search_regex(
|
duration = parse_duration(self._html_search_regex(
|
||||||
r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False))
|
r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False))
|
r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': 'http://m.nuvid.com%s' % thumbnail,
|
'thumbnails': thumbnails,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
@ -1,40 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import calendar
|
|
||||||
import datetime
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
# audios on oe1.orf.at are only available for 7 days, so we can't
|
|
||||||
# add tests.
|
|
||||||
|
|
||||||
|
|
||||||
class OE1IE(InfoExtractor):
|
|
||||||
IE_DESC = 'oe1.orf.at'
|
|
||||||
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
show_id = mobj.group('id')
|
|
||||||
|
|
||||||
data = self._download_json(
|
|
||||||
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
|
||||||
show_id
|
|
||||||
)
|
|
||||||
|
|
||||||
timestamp = datetime.datetime.strptime('%s %s' % (
|
|
||||||
data['item']['day_label'],
|
|
||||||
data['item']['time']
|
|
||||||
), '%d.%m.%Y %H:%M')
|
|
||||||
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': show_id,
|
|
||||||
'title': data['item']['title'],
|
|
||||||
'url': data['item']['url_stream'],
|
|
||||||
'ext': 'mp3',
|
|
||||||
'description': data['item'].get('info'),
|
|
||||||
'timestamp': unix_timestamp
|
|
||||||
}
|
|
@ -3,23 +3,38 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unescapeHTML
|
from ..utils import (
|
||||||
|
unescapeHTML,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class OoyalaIE(InfoExtractor):
|
class OoyalaIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
|
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
{
|
||||||
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
|
||||||
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
|
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||||
'info_dict': {
|
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
|
||||||
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
|
||||||
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
|
'ext': 'mp4',
|
||||||
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
'title': 'Explaining Data Recovery from Hard Drives and SSDs',
|
||||||
|
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Only available for ipad
|
||||||
|
'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
|
||||||
|
'md5': '4b9754921fddb68106e48c142e2a01e6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Simulation Overview - Levels of Simulation',
|
||||||
|
'description': '',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _url_for_embed_code(embed_code):
|
def _url_for_embed_code(embed_code):
|
||||||
@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor):
|
|||||||
player = self._download_webpage(player_url, embedCode)
|
player = self._download_webpage(player_url, embedCode)
|
||||||
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
|
||||||
player, 'mobile player url')
|
player, 'mobile player url')
|
||||||
mobile_player = self._download_webpage(mobile_url, embedCode)
|
# Looks like some videos are only available for particular devices
|
||||||
videos_info = self._search_regex(
|
# (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
|
||||||
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
# is only available for ipad)
|
||||||
mobile_player, 'info').replace('\\"','"')
|
# Working around with fetching URLs for all the devices found starting with 'unknown'
|
||||||
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
|
# until we succeed or eventually fail for each device.
|
||||||
|
devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
|
||||||
|
devices.remove('unknown')
|
||||||
|
devices.insert(0, 'unknown')
|
||||||
|
for device in devices:
|
||||||
|
mobile_player = self._download_webpage(
|
||||||
|
'%s&device=%s' % (mobile_url, device), embedCode,
|
||||||
|
'Downloading mobile player JS for %s device' % device)
|
||||||
|
videos_info = self._search_regex(
|
||||||
|
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
|
||||||
|
mobile_player, 'info', fatal=False, default=None)
|
||||||
|
if videos_info:
|
||||||
|
break
|
||||||
|
if not videos_info:
|
||||||
|
raise ExtractorError('Unable to extract info')
|
||||||
|
videos_info = videos_info.replace('\\"', '"')
|
||||||
|
videos_more_info = self._search_regex(
|
||||||
|
r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
|
||||||
videos_info = json.loads(videos_info)
|
videos_info = json.loads(videos_info)
|
||||||
videos_more_info =json.loads(videos_more_info)
|
videos_more_info = json.loads(videos_more_info)
|
||||||
|
|
||||||
if videos_more_info.get('lineup'):
|
if videos_more_info.get('lineup'):
|
||||||
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
|
||||||
|
@ -3,6 +3,8 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import calendar
|
||||||
|
import datetime
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -12,7 +14,9 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ORFIE(InfoExtractor):
|
class ORFTVthekIE(InfoExtractor):
|
||||||
|
IE_NAME = 'orf:tvthek'
|
||||||
|
IE_DESC = 'ORF TVthek'
|
||||||
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
|
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
|
|||||||
'entries': entries,
|
'entries': entries,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Audios on ORF radio are only available for 7 days, so we can't add tests.
|
||||||
|
|
||||||
|
|
||||||
|
class ORFOE1IE(InfoExtractor):
|
||||||
|
IE_NAME = 'orf:oe1'
|
||||||
|
IE_DESC = 'Radio Österreich 1'
|
||||||
|
_VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
show_id = mobj.group('id')
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'http://oe1.orf.at/programm/%s/konsole' % show_id,
|
||||||
|
show_id
|
||||||
|
)
|
||||||
|
|
||||||
|
timestamp = datetime.datetime.strptime('%s %s' % (
|
||||||
|
data['item']['day_label'],
|
||||||
|
data['item']['time']
|
||||||
|
), '%d.%m.%Y %H:%M')
|
||||||
|
unix_timestamp = calendar.timegm(timestamp.utctimetuple())
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': show_id,
|
||||||
|
'title': data['item']['title'],
|
||||||
|
'url': data['item']['url_stream'],
|
||||||
|
'ext': 'mp3',
|
||||||
|
'description': data['item'].get('info'),
|
||||||
|
'timestamp': unix_timestamp
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ORFFM4IE(InfoExtractor):
|
||||||
|
IE_DESC = 'orf:fm4'
|
||||||
|
IE_DESC = 'radio FM4'
|
||||||
|
_VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
show_date = mobj.group('date')
|
||||||
|
show_id = mobj.group('show')
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
|
||||||
|
show_id
|
||||||
|
)
|
||||||
|
|
||||||
|
def extract_entry_dict(info, title, subtitle):
|
||||||
|
return {
|
||||||
|
'id': info['loopStreamId'].replace('.mp3', ''),
|
||||||
|
'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
|
||||||
|
'title': title,
|
||||||
|
'description': subtitle,
|
||||||
|
'duration': (info['end'] - info['start']) / 1000,
|
||||||
|
'timestamp': info['start'] / 1000,
|
||||||
|
'ext': 'mp3'
|
||||||
|
}
|
||||||
|
|
||||||
|
entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': show_id,
|
||||||
|
'title': data['title'],
|
||||||
|
'description': data['subtitle'],
|
||||||
|
'entries': entries
|
||||||
|
}
|
100
youtube_dl/extractor/patreon.py
Normal file
100
youtube_dl/extractor/patreon.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PatreonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||||
|
'md5': 'e25505eec1053a6e6813b8ed369875cc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '743933',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Episode 166: David Smalley of Dogma Debate',
|
||||||
|
'uploader': 'Cognitive Dissonance Podcast',
|
||||||
|
'thumbnail': 're:^https?://.*$',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.patreon.com/creation?hid=754133',
|
||||||
|
'md5': '3eb09345bf44bf60451b8b0b81759d0a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '754133',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'CD 167 Extra',
|
||||||
|
'uploader': 'Cognitive Dissonance Podcast',
|
||||||
|
'thumbnail': 're:^https?://.*$',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||||
|
# needed. Keeping this commented for when this inevitably changes.
|
||||||
|
'''
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
if username is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_form = {
|
||||||
|
'redirectUrl': 'http://www.patreon.com/',
|
||||||
|
'email': username,
|
||||||
|
'password': password,
|
||||||
|
}
|
||||||
|
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
'https://www.patreon.com/processLogin',
|
||||||
|
compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||||
|
)
|
||||||
|
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||||
|
|
||||||
|
if re.search(r'onLoginFailed', login_page):
|
||||||
|
raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
'''
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._og_search_title(webpage).strip()
|
||||||
|
|
||||||
|
attach_fn = self._html_search_regex(
|
||||||
|
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||||
|
webpage, 'attachment URL', default=None)
|
||||||
|
if attach_fn is not None:
|
||||||
|
video_url = 'http://www.patreon.com' + attach_fn
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||||
|
else:
|
||||||
|
playlist_js = self._search_regex(
|
||||||
|
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||||
|
webpage, 'playlist JSON')
|
||||||
|
playlist_json = js_to_json(playlist_js)
|
||||||
|
playlist = json.loads(playlist_json)
|
||||||
|
data = playlist[0]
|
||||||
|
video_url = self._proto_relative_url(data['mp3'])
|
||||||
|
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||||
|
uploader = data.get('artist')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': title,
|
||||||
|
'uploader': uploader,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@ -20,17 +20,53 @@ class PBSIE(InfoExtractor):
|
|||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
{
|
||||||
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
|
||||||
'info_dict': {
|
'md5': 'ce1888486f0908d555a8093cac9a7362',
|
||||||
'id': '2365006249',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '2365006249',
|
||||||
'title': 'A More Perfect Union',
|
'ext': 'mp4',
|
||||||
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
'title': 'A More Perfect Union',
|
||||||
'duration': 3190,
|
'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
|
||||||
|
'duration': 3190,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
{
|
||||||
|
'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
|
||||||
|
'md5': '143c98aa54a346738a3d78f54c925321',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2365297690',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Losing Iraq',
|
||||||
|
'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
|
||||||
|
'duration': 5050,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
|
||||||
|
'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2201174722',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
|
||||||
|
'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
|
||||||
|
'duration': 801,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.pbs.org/wnet/gperf/dudamel-conducts-verdi-requiem-hollywood-bowl-full-episode/3374/',
|
||||||
|
'md5': 'c62859342be2a0358d6c9eb306595978',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2365297708',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:68d87ef760660eb564455eb30ca464fe',
|
||||||
|
'title': 'Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full',
|
||||||
|
'duration': 6559,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def _extract_ids(self, url):
|
def _extract_ids(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -40,15 +76,18 @@ class PBSIE(InfoExtractor):
|
|||||||
if presumptive_id:
|
if presumptive_id:
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
# frontline video embed
|
MEDIA_ID_REGEXES = [
|
||||||
|
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||||
|
r'class="coveplayerid">([^<]+)<', # coveplayer
|
||||||
|
]
|
||||||
|
|
||||||
media_id = self._search_regex(
|
media_id = self._search_regex(
|
||||||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
|
MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
|
||||||
webpage, 'frontline video ID', fatal=False, default=None)
|
|
||||||
if media_id:
|
if media_id:
|
||||||
return media_id, presumptive_id
|
return media_id, presumptive_id
|
||||||
|
|
||||||
url = self._search_regex(
|
url = self._search_regex(
|
||||||
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
r'<iframe\s+(?:class|id)=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
|
||||||
webpage, 'player URL')
|
webpage, 'player URL')
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
|
||||||
|
82
youtube_dl/extractor/playfm.py
Normal file
82
youtube_dl/extractor/playfm.py
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlayFMIE(InfoExtractor):
|
||||||
|
IE_NAME = 'play.fm'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
|
||||||
|
'md5': 'c505f8307825a245d0c7ad1850001f22',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '137220',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
|
||||||
|
'uploader': 'Sven Tasnadi',
|
||||||
|
'uploader_id': 'sventasnadi',
|
||||||
|
'duration': 5627.428,
|
||||||
|
'upload_date': '20140712',
|
||||||
|
'view_count': int,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
upload_date = mobj.group('upload_date')
|
||||||
|
|
||||||
|
rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
|
||||||
|
req = compat_urllib_request.Request(
|
||||||
|
'http://www.play.fm/flexRead/recording', data=rec_data)
|
||||||
|
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
|
rec_doc = self._download_xml(req, video_id)
|
||||||
|
|
||||||
|
error_node = rec_doc.find('./error')
|
||||||
|
if error_node is not None:
|
||||||
|
raise ExtractorError('An error occured: %s (code %s)' % (
|
||||||
|
error_node.text, rec_doc.find('./status').text))
|
||||||
|
|
||||||
|
recording = rec_doc.find('./recording')
|
||||||
|
title = recording.find('./title').text
|
||||||
|
view_count = int_or_none(recording.find('./stats/playcount').text)
|
||||||
|
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
||||||
|
thumbnail = recording.find('./image').text
|
||||||
|
|
||||||
|
artist = recording.find('./artists/artist')
|
||||||
|
uploader = artist.find('./name').text
|
||||||
|
uploader_id = artist.find('./slug').text
|
||||||
|
|
||||||
|
video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
|
||||||
|
'http:', recording.find('./url').text,
|
||||||
|
recording.find('./_class').text, recording.find('./file_id').text,
|
||||||
|
rec_doc.find('./uuid').text, video_id,
|
||||||
|
rec_doc.find('./jingle/file_id').text,
|
||||||
|
'http%3A%2F%2Fwww.play.fm%2Fplayer',
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp3',
|
||||||
|
'filesize': int_or_none(recording.find('./size').text),
|
||||||
|
'title': title,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'view_count': view_count,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
}
|
@ -1,23 +1,23 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import strip_jsonp
|
from ..utils import str_or_none
|
||||||
|
|
||||||
|
|
||||||
class ReverbNationIE(InfoExtractor):
|
class ReverbNationIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||||
'file': '16965047.mp3',
|
|
||||||
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
|
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
"id": "16965047",
|
||||||
|
"ext": "mp3",
|
||||||
"title": "MONA LISA",
|
"title": "MONA LISA",
|
||||||
"uploader": "ALKILADOS",
|
"uploader": "ALKILADOS",
|
||||||
"uploader_id": 216429,
|
"uploader_id": "216429",
|
||||||
"thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
|
"thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
|
|||||||
song_id = mobj.group('id')
|
song_id = mobj.group('id')
|
||||||
|
|
||||||
api_res = self._download_json(
|
api_res = self._download_json(
|
||||||
'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
|
'https://api.reverbnation.com/song/%s' % song_id,
|
||||||
% (song_id, int(time.time() * 1000)),
|
|
||||||
song_id,
|
song_id,
|
||||||
transform_source=strip_jsonp,
|
|
||||||
note='Downloading information of song %s' % song_id
|
note='Downloading information of song %s' % song_id
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
|
|||||||
'title': api_res.get('name'),
|
'title': api_res.get('name'),
|
||||||
'url': api_res.get('url'),
|
'url': api_res.get('url'),
|
||||||
'uploader': api_res.get('artist', {}).get('name'),
|
'uploader': api_res.get('artist', {}).get('name'),
|
||||||
'uploader_id': api_res.get('artist', {}).get('id'),
|
'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
|
||||||
'thumbnail': api_res.get('image', api_res.get('thumbnail')),
|
'thumbnail': self._proto_relative_url(
|
||||||
|
api_res.get('image', api_res.get('thumbnail'))),
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}
|
}
|
||||||
|
51
youtube_dl/extractor/rtlnl.py
Normal file
51
youtube_dl/extractor/rtlnl.py
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class RtlXlIE(InfoExtractor):
|
||||||
|
IE_NAME = 'rtlxl.nl'
|
||||||
|
_VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'RTL Nieuws - Laat',
|
||||||
|
'description': 'Dagelijks het laatste nieuws uit binnen- en '
|
||||||
|
'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
|
||||||
|
'onze mobiele apps.',
|
||||||
|
'timestamp': 1408051800,
|
||||||
|
'upload_date': '20140814',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# We download the first bytes of the first fragment, it can't be
|
||||||
|
# processed by the f4m downloader beacuse it isn't complete
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
uuid = mobj.group('uuid')
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
|
||||||
|
uuid)
|
||||||
|
material = info['material'][0]
|
||||||
|
episode_info = info['episodes'][0]
|
||||||
|
|
||||||
|
f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
|
||||||
|
progname = info['abstracts'][0]['name']
|
||||||
|
subtitle = material['title'] or info['episodes'][0]['name']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': uuid,
|
||||||
|
'title': '%s - %s' % (progname, subtitle),
|
||||||
|
'formats': self._extract_f4m_formats(f4m_url, uuid),
|
||||||
|
'timestamp': material['original_date'],
|
||||||
|
'description': episode_info['synopsis'],
|
||||||
|
}
|
@ -1,21 +1,66 @@
|
|||||||
# encoding: utf-8
|
# encoding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import base64
|
import base64
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
struct_unpack,
|
struct_unpack,
|
||||||
|
remove_end,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _decrypt_url(png):
|
||||||
|
encrypted_data = base64.b64decode(png)
|
||||||
|
text_index = encrypted_data.find(b'tEXt')
|
||||||
|
text_chunk = encrypted_data[text_index - 4:]
|
||||||
|
length = struct_unpack('!I', text_chunk[:4])[0]
|
||||||
|
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
||||||
|
data = bytearray(text_chunk[8:8 + length])
|
||||||
|
data = [chr(b) for b in data if b != 0]
|
||||||
|
hash_index = data.index('#')
|
||||||
|
alphabet_data = data[:hash_index]
|
||||||
|
url_data = data[hash_index + 1:]
|
||||||
|
|
||||||
|
alphabet = []
|
||||||
|
e = 0
|
||||||
|
d = 0
|
||||||
|
for l in alphabet_data:
|
||||||
|
if d == 0:
|
||||||
|
alphabet.append(l)
|
||||||
|
d = e = (e + 1) % 4
|
||||||
|
else:
|
||||||
|
d -= 1
|
||||||
|
url = ''
|
||||||
|
f = 0
|
||||||
|
e = 3
|
||||||
|
b = 1
|
||||||
|
for letter in url_data:
|
||||||
|
if f == 0:
|
||||||
|
l = int(letter) * 10
|
||||||
|
f = 1
|
||||||
|
else:
|
||||||
|
if e == 0:
|
||||||
|
l += int(letter)
|
||||||
|
url += alphabet[l]
|
||||||
|
e = (b + 3) % 4
|
||||||
|
f = 0
|
||||||
|
b += 1
|
||||||
|
else:
|
||||||
|
e -= 1
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class RTVEALaCartaIE(InfoExtractor):
|
class RTVEALaCartaIE(InfoExtractor):
|
||||||
IE_NAME = 'rtve.es:alacarta'
|
IE_NAME = 'rtve.es:alacarta'
|
||||||
IE_DESC = 'RTVE a la carta'
|
IE_DESC = 'RTVE a la carta'
|
||||||
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||||
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -23,48 +68,15 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'note': 'Live stream',
|
||||||
def _decrypt_url(self, png):
|
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||||
encrypted_data = base64.b64decode(png)
|
'info_dict': {
|
||||||
text_index = encrypted_data.find(b'tEXt')
|
'id': '1694255',
|
||||||
text_chunk = encrypted_data[text_index-4:]
|
'ext': 'flv',
|
||||||
length = struct_unpack('!I', text_chunk[:4])[0]
|
'title': 'TODO',
|
||||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
}
|
||||||
data = bytearray(text_chunk[8:8+length])
|
}]
|
||||||
data = [chr(b) for b in data if b != 0]
|
|
||||||
hash_index = data.index('#')
|
|
||||||
alphabet_data = data[:hash_index]
|
|
||||||
url_data = data[hash_index+1:]
|
|
||||||
|
|
||||||
alphabet = []
|
|
||||||
e = 0
|
|
||||||
d = 0
|
|
||||||
for l in alphabet_data:
|
|
||||||
if d == 0:
|
|
||||||
alphabet.append(l)
|
|
||||||
d = e = (e + 1) % 4
|
|
||||||
else:
|
|
||||||
d -= 1
|
|
||||||
url = ''
|
|
||||||
f = 0
|
|
||||||
e = 3
|
|
||||||
b = 1
|
|
||||||
for letter in url_data:
|
|
||||||
if f == 0:
|
|
||||||
l = int(letter)*10
|
|
||||||
f = 1
|
|
||||||
else:
|
|
||||||
if e == 0:
|
|
||||||
l += int(letter)
|
|
||||||
url += alphabet[l]
|
|
||||||
e = (b + 3) % 4
|
|
||||||
f = 0
|
|
||||||
b += 1
|
|
||||||
else:
|
|
||||||
e -= 1
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -74,11 +86,57 @@ class RTVEALaCartaIE(InfoExtractor):
|
|||||||
video_id)['page']['items'][0]
|
video_id)['page']['items'][0]
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
video_url = self._decrypt_url(png)
|
video_url = _decrypt_url(png)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'thumbnail': info['image'],
|
'thumbnail': info.get('image'),
|
||||||
|
'page_url': url,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RTVELiveIE(InfoExtractor):
|
||||||
|
IE_NAME = 'rtve.es:live'
|
||||||
|
IE_DESC = 'RTVE.es live streams'
|
||||||
|
_VALID_URL = r'http://www\.rtve\.es/(?:deportes/directo|noticias|television)/(?P<id>[a-zA-Z0-9-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.rtve.es/noticias/directo-la-1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'directo-la-1',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 're:^La 1 de TVE [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'live stream',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
start_time = time.gmtime()
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_url = self._search_regex(
|
||||||
|
r'<param name="movie" value="([^"]+)"/>', webpage, 'player URL')
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' en directo')
|
||||||
|
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
||||||
|
|
||||||
|
vidplayer_id = self._search_regex(
|
||||||
|
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||||
|
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||||
|
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||||
|
video_url = _decrypt_url(png)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'app': 'rtve-live-live?ovpfv=2.1.2',
|
||||||
|
'player_url': player_url,
|
||||||
|
'rtmp_live': True,
|
||||||
}
|
}
|
||||||
|
56
youtube_dl/extractor/sbs.py
Normal file
56
youtube_dl/extractor/sbs.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SBSIE(InfoExtractor):
|
||||||
|
IE_DESC = 'sbs.com.au'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/ondemand/video/single/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
# Original URL is handled by the generic IE which finds the iframe:
|
||||||
|
# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
|
||||||
|
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
||||||
|
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '320403011771',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Dingo Conservation',
|
||||||
|
'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction',
|
||||||
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
|
},
|
||||||
|
'add_ies': ['generic'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
release_urls_json = js_to_json(self._search_regex(
|
||||||
|
r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n',
|
||||||
|
webpage, ''))
|
||||||
|
release_urls = json.loads(release_urls_json)
|
||||||
|
theplatform_url = (
|
||||||
|
release_urls.get('progressive') or release_urls.get('standard'))
|
||||||
|
|
||||||
|
title = remove_end(self._og_search_title(webpage), ' (The Feed)')
|
||||||
|
description = self._html_search_meta('description', webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'url': theplatform_url,
|
||||||
|
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
@ -17,11 +17,11 @@ class SharedIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://shared.sx/0060718775',
|
'url': 'http://shared.sx/0060718775',
|
||||||
'md5': '53e1c58fc3e777ae1dfe9e57ba2f9c72',
|
'md5': '106fefed92a8a2adb8c98e6a0652f49b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0060718775',
|
'id': '0060718775',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny Trailer',
|
'title': 'Bmp4',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ class TeamcocoIE(InfoExtractor):
|
|||||||
video_id = mobj.group("video_id")
|
video_id = mobj.group("video_id")
|
||||||
if not video_id:
|
if not video_id:
|
||||||
video_id = self._html_search_regex(
|
video_id = self._html_search_regex(
|
||||||
r'<article class="video" data-id="(\d+?)"',
|
r'data-node-id="(\d+?)"',
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
|
||||||
|
@ -121,6 +121,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'videopassword': 'youtube-dl',
|
'videopassword': 'youtube-dl',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://vimeo.com/channels/keypeele/75629013',
|
||||||
|
'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
|
||||||
|
'note': 'Video is freely available via original URL '
|
||||||
|
'and protected with password when accessed via http://vimeo.com/75629013',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '75629013',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Key & Peele: Terrorist Interrogation',
|
||||||
|
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
|
||||||
|
'uploader_id': 'atencio',
|
||||||
|
'uploader': 'Peter Atencio',
|
||||||
|
'duration': 187,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vimeo.com/76979871',
|
'url': 'http://vimeo.com/76979871',
|
||||||
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
'md5': '3363dd6ffebe3784d56f4132317fd446',
|
||||||
@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
if mobj.group('pro') or mobj.group('player'):
|
if mobj.group('pro') or mobj.group('player'):
|
||||||
url = 'http://player.vimeo.com/video/' + video_id
|
url = 'http://player.vimeo.com/video/' + video_id
|
||||||
else:
|
|
||||||
url = 'https://vimeo.com/' + video_id
|
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
request = compat_urllib_request.Request(url, None, headers)
|
request = compat_urllib_request.Request(url, None, headers)
|
||||||
@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
if video_thumbnail is None:
|
if video_thumbnail is None:
|
||||||
video_thumbs = config["video"].get("thumbs")
|
video_thumbs = config["video"].get("thumbs")
|
||||||
if video_thumbs and isinstance(video_thumbs, dict):
|
if video_thumbs and isinstance(video_thumbs, dict):
|
||||||
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
_, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
|
||||||
|
|
||||||
# Extract video description
|
# Extract video description
|
||||||
video_description = None
|
video_description = None
|
||||||
|
@ -44,7 +44,7 @@ class VodlockerIE(InfoExtractor):
|
|||||||
req, video_id, 'Downloading video page')
|
req, video_id, 'Downloading video page')
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title')
|
r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
|
r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
|
||||||
url = self._search_regex(
|
url = self._search_regex(
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class VubeIE(InfoExtractor):
|
class VubeIE(InfoExtractor):
|
||||||
@ -29,6 +31,7 @@ class VubeIE(InfoExtractor):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -47,6 +50,7 @@ class VubeIE(InfoExtractor):
|
|||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'categories': ['seraina', 'jessica', 'krewella', 'alive'],
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
|
||||||
@ -56,13 +60,15 @@ class VubeIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Frozen - Let It Go Cover by Siren Gene',
|
'title': 'Frozen - Let It Go Cover by Siren Gene',
|
||||||
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
|
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
|
||||||
'uploader': 'Siren Gene',
|
|
||||||
'uploader_id': 'Siren',
|
|
||||||
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
|
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
|
||||||
|
'uploader': 'Siren',
|
||||||
|
'timestamp': 1395448018,
|
||||||
|
'upload_date': '20140322',
|
||||||
'duration': 221.788,
|
'duration': 221.788,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@ -71,47 +77,40 @@ class VubeIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
video = self._download_json(
|
||||||
data_json = self._search_regex(
|
'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||||
r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n',
|
|
||||||
webpage, 'video data'
|
|
||||||
)
|
|
||||||
data = json.loads(data_json)
|
|
||||||
video = (
|
|
||||||
data.get('video') or
|
|
||||||
data)
|
|
||||||
assert isinstance(video, dict)
|
|
||||||
|
|
||||||
public_id = video['public_id']
|
public_id = video['public_id']
|
||||||
|
|
||||||
formats = [
|
formats = []
|
||||||
{
|
|
||||||
'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
|
for media in video['media'].get('video', []) + video['media'].get('audio', []):
|
||||||
'height': int(fmt['height']),
|
if media['transcoding_status'] != 'processed':
|
||||||
'abr': int(fmt['audio_bitrate']),
|
continue
|
||||||
'vbr': int(fmt['video_bitrate']),
|
fmt = {
|
||||||
'format_id': fmt['media_resolution_id']
|
'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
|
||||||
} for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'
|
'abr': int(media['audio_bitrate']),
|
||||||
]
|
'format_id': compat_str(media['media_resolution_id']),
|
||||||
|
}
|
||||||
|
vbr = int(media['video_bitrate'])
|
||||||
|
if vbr:
|
||||||
|
fmt.update({
|
||||||
|
'vbr': vbr,
|
||||||
|
'height': int(media['height']),
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
description = video.get('description')
|
description = video.get('description')
|
||||||
thumbnail = self._proto_relative_url(
|
thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
|
||||||
video.get('thumbnail') or video.get('thumbnail_src'),
|
uploader = video.get('user_alias') or video.get('channel')
|
||||||
scheme='http:')
|
|
||||||
uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias')
|
|
||||||
uploader_id = data.get('user', {}).get('name')
|
|
||||||
timestamp = int_or_none(video.get('upload_time'))
|
timestamp = int_or_none(video.get('upload_time'))
|
||||||
duration = video['duration']
|
duration = video['duration']
|
||||||
view_count = video.get('raw_view_count')
|
view_count = video.get('raw_view_count')
|
||||||
like_count = video.get('rlikes')
|
like_count = video.get('total_likes')
|
||||||
if like_count is None:
|
dislike_count = video.get('total_hates')
|
||||||
like_count = video.get('total_likes')
|
|
||||||
dislike_count = video.get('rhates')
|
|
||||||
if dislike_count is None:
|
|
||||||
dislike_count = video.get('total_hates')
|
|
||||||
|
|
||||||
comments = video.get('comments')
|
comments = video.get('comments')
|
||||||
comment_count = None
|
comment_count = None
|
||||||
@ -124,6 +123,8 @@ class VubeIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
comment_count = len(comments)
|
comment_count = len(comments)
|
||||||
|
|
||||||
|
categories = [tag['text'] for tag in video['tags']]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@ -131,11 +132,11 @@ class VubeIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'dislike_count': dislike_count,
|
'dislike_count': dislike_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
|
'categories': categories,
|
||||||
}
|
}
|
||||||
|
57
youtube_dl/extractor/xboxclips.py
Normal file
57
youtube_dl/extractor/xboxclips.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_iso8601,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class XboxClipsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||||
|
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Iabdulelah playing Upload Studio',
|
||||||
|
'filesize_approx': 28101836.8,
|
||||||
|
'timestamp': 1407388500,
|
||||||
|
'upload_date': '20140807',
|
||||||
|
'duration': 56,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'>Link: <a href="([^"]+)">', webpage, 'video URL')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
|
||||||
|
timestamp = parse_iso8601(self._html_search_regex(
|
||||||
|
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
|
||||||
|
filesize = float_or_none(self._html_search_regex(
|
||||||
|
r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
|
||||||
|
duration = int_or_none(self._html_search_regex(
|
||||||
|
r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'>Views: (\d+)<', webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'filesize_approx': filesize,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class XHamsterIE(InfoExtractor):
|
class XHamsterIE(InfoExtractor):
|
||||||
"""Information Extractor for xHamster"""
|
"""Information Extractor for xHamster"""
|
||||||
_VALID_URL = r'http://(?:www\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
_VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class YahooIE(InfoExtractor):
|
class YahooIE(InfoExtractor):
|
||||||
IE_DESC = 'Yahoo screen and movies'
|
IE_DESC = 'Yahoo screen and movies'
|
||||||
_VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
|
_VALID_URL = r'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
|
||||||
@ -46,12 +46,23 @@ class YahooIE(InfoExtractor):
|
|||||||
'title': 'The World Loves Spider-Man',
|
'title': 'The World Loves Spider-Man',
|
||||||
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
|
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
|
||||||
|
'md5': '60e8ac193d8fb71997caa8fce54c6460',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Yahoo Saves 'Community'",
|
||||||
|
'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
|
||||||
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
url = mobj.group('url')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
items_json = self._search_regex(
|
items_json = self._search_regex(
|
||||||
|
@ -225,7 +225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||||
|
|
||||||
# Dash webm audio
|
# Dash webm audio
|
||||||
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
|
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
|
||||||
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
|
||||||
|
|
||||||
# RTMP (unnamed)
|
# RTMP (unnamed)
|
||||||
@ -374,6 +374,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
return lambda s: u''.join(s[i] for i in cache_spec)
|
return lambda s: u''.join(s[i] for i in cache_spec)
|
||||||
except IOError:
|
except IOError:
|
||||||
pass # No cache available
|
pass # No cache available
|
||||||
|
except ValueError:
|
||||||
|
try:
|
||||||
|
file_size = os.path.getsize(cache_fn)
|
||||||
|
except (OSError, IOError) as oe:
|
||||||
|
file_size = str(oe)
|
||||||
|
self._downloader.report_warning(
|
||||||
|
u'Cache %s failed (%s)' % (cache_fn, file_size))
|
||||||
|
|
||||||
if player_type == 'js':
|
if player_type == 'js':
|
||||||
code = self._download_webpage(
|
code = self._download_webpage(
|
||||||
@ -501,6 +508,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
sub_lang_list = {}
|
sub_lang_list = {}
|
||||||
for l in lang_list:
|
for l in lang_list:
|
||||||
lang = l[1]
|
lang = l[1]
|
||||||
|
if lang in sub_lang_list:
|
||||||
|
continue
|
||||||
params = compat_urllib_parse.urlencode({
|
params = compat_urllib_parse.urlencode({
|
||||||
'lang': lang,
|
'lang': lang,
|
||||||
'v': video_id,
|
'v': video_id,
|
||||||
|
@ -24,6 +24,7 @@ import socket
|
|||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import traceback
|
import traceback
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
@ -228,18 +229,42 @@ else:
|
|||||||
assert type(s) == type(u'')
|
assert type(s) == type(u'')
|
||||||
print(s)
|
print(s)
|
||||||
|
|
||||||
# In Python 2.x, json.dump expects a bytestream.
|
|
||||||
# In Python 3.x, it writes to a character stream
|
|
||||||
if sys.version_info < (3,0):
|
|
||||||
def write_json_file(obj, fn):
|
|
||||||
with open(fn, 'wb') as f:
|
|
||||||
json.dump(obj, f)
|
|
||||||
else:
|
|
||||||
def write_json_file(obj, fn):
|
|
||||||
with open(fn, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(obj, f)
|
|
||||||
|
|
||||||
if sys.version_info >= (2,7):
|
def write_json_file(obj, fn):
|
||||||
|
""" Encode obj as JSON and write it to fn, atomically """
|
||||||
|
|
||||||
|
args = {
|
||||||
|
'suffix': '.tmp',
|
||||||
|
'prefix': os.path.basename(fn) + '.',
|
||||||
|
'dir': os.path.dirname(fn),
|
||||||
|
'delete': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# In Python 2.x, json.dump expects a bytestream.
|
||||||
|
# In Python 3.x, it writes to a character stream
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
args['mode'] = 'wb'
|
||||||
|
else:
|
||||||
|
args.update({
|
||||||
|
'mode': 'w',
|
||||||
|
'encoding': 'utf-8',
|
||||||
|
})
|
||||||
|
|
||||||
|
tf = tempfile.NamedTemporaryFile(**args)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with tf:
|
||||||
|
json.dump(obj, tf)
|
||||||
|
os.rename(tf.name, fn)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
os.remove(tf.name)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info >= (2, 7):
|
||||||
def find_xpath_attr(node, xpath, key, val):
|
def find_xpath_attr(node, xpath, key, val):
|
||||||
""" Find the xpath xpath[@key=val] """
|
""" Find the xpath xpath[@key=val] """
|
||||||
assert re.match(r'^[a-zA-Z-]+$', key)
|
assert re.match(r'^[a-zA-Z-]+$', key)
|
||||||
@ -827,6 +852,7 @@ def unified_strdate(date_str):
|
|||||||
'%b %dnd %Y %I:%M%p',
|
'%b %dnd %Y %I:%M%p',
|
||||||
'%b %dth %Y %I:%M%p',
|
'%b %dth %Y %I:%M%p',
|
||||||
'%Y-%m-%d',
|
'%Y-%m-%d',
|
||||||
|
'%Y/%m/%d',
|
||||||
'%d.%m.%Y',
|
'%d.%m.%Y',
|
||||||
'%d/%m/%Y',
|
'%d/%m/%Y',
|
||||||
'%Y/%m/%d %H:%M:%S',
|
'%Y/%m/%d %H:%M:%S',
|
||||||
@ -1259,6 +1285,12 @@ def remove_start(s, start):
|
|||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def remove_end(s, end):
|
||||||
|
if s.endswith(end):
|
||||||
|
return s[:-len(end)]
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
def url_basename(url):
|
def url_basename(url):
|
||||||
path = compat_urlparse.urlparse(url).path
|
path = compat_urlparse.urlparse(url).path
|
||||||
return path.strip(u'/').split(u'/')[-1]
|
return path.strip(u'/').split(u'/')[-1]
|
||||||
@ -1273,9 +1305,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
|
|||||||
if get_attr:
|
if get_attr:
|
||||||
if v is not None:
|
if v is not None:
|
||||||
v = getattr(v, get_attr, None)
|
v = getattr(v, get_attr, None)
|
||||||
|
if v == '':
|
||||||
|
v = None
|
||||||
return default if v is None else (int(v) * invscale // scale)
|
return default if v is None else (int(v) * invscale // scale)
|
||||||
|
|
||||||
|
|
||||||
|
def str_or_none(v, default=None):
|
||||||
|
return default if v is None else compat_str(v)
|
||||||
|
|
||||||
|
|
||||||
def str_to_int(int_str):
|
def str_to_int(int_str):
|
||||||
if int_str is None:
|
if int_str is None:
|
||||||
return None
|
return None
|
||||||
@ -1442,6 +1480,34 @@ def strip_jsonp(code):
|
|||||||
return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
|
return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
|
||||||
|
|
||||||
|
|
||||||
|
def js_to_json(code):
|
||||||
|
def fix_kv(m):
|
||||||
|
key = m.group(2)
|
||||||
|
if key.startswith("'"):
|
||||||
|
assert key.endswith("'")
|
||||||
|
assert '"' not in key
|
||||||
|
key = '"%s"' % key[1:-1]
|
||||||
|
elif not key.startswith('"'):
|
||||||
|
key = '"%s"' % key
|
||||||
|
|
||||||
|
value = m.group(4)
|
||||||
|
if value.startswith("'"):
|
||||||
|
assert value.endswith("'")
|
||||||
|
assert '"' not in value
|
||||||
|
value = '"%s"' % value[1:-1]
|
||||||
|
|
||||||
|
return m.group(1) + key + m.group(3) + value
|
||||||
|
|
||||||
|
res = re.sub(r'''(?x)
|
||||||
|
([{,]\s*)
|
||||||
|
("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
|
||||||
|
(:\s*)
|
||||||
|
([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{)
|
||||||
|
''', fix_kv, code)
|
||||||
|
res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def qualities(quality_ids):
|
def qualities(quality_ids):
|
||||||
""" Get a numeric quality value out of a list of possible values """
|
""" Get a numeric quality value out of a list of possible values """
|
||||||
def q(qid):
|
def q(qid):
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.08.02.1'
|
__version__ = '2014.08.23'
|
||||||
|
Reference in New Issue
Block a user