Compare commits
45 Commits
2014.11.09
...
2014.11.13
Author | SHA1 | Date | |
---|---|---|---|
|
69ede8ef81 | ||
|
609a61e3e6 | ||
|
bf951c5e29 | ||
|
af63fed7d8 | ||
|
68d1d41c03 | ||
|
3deed1e91a | ||
|
11b28e93d3 | ||
|
c3d582985f | ||
|
4c0924bb24 | ||
|
3fa5bb3802 | ||
|
c47ec62b83 | ||
|
e4bdb37ec6 | ||
|
3e6e4999ca | ||
|
0e15e725a0 | ||
|
437f68d868 | ||
|
d91d124081 | ||
|
2d42905b68 | ||
|
cbe71cb41d | ||
|
894dd8682e | ||
|
9e05d039e0 | ||
|
bbd5f2de5e | ||
|
73689dafbf | ||
|
4b50ba0989 | ||
|
5ccaddf5b1 | ||
|
0b201a3134 | ||
|
ffe38646ca | ||
|
b703ab4d7f | ||
|
c6afed48ff | ||
|
732c848c14 | ||
|
9d2a4dae90 | ||
|
7009a9047a | ||
|
498942f187 | ||
|
28465df1ff | ||
|
ef89dba58f | ||
|
13ba3a6461 | ||
|
8f6ec4bbe6 | ||
|
c295490830 | ||
|
eb4cb42a02 | ||
|
7a8cbc72b2 | ||
|
2774852c2f | ||
|
bbcc21efd1 | ||
|
60526d6bcb | ||
|
3898c8a7b2 | ||
|
b868c972d1 | ||
|
982a58d049 |
2
AUTHORS
2
AUTHORS
@@ -80,3 +80,5 @@ Damon Timm
|
|||||||
winwon
|
winwon
|
||||||
Xavier Beynon
|
Xavier Beynon
|
||||||
Gabriel Schubiner
|
Gabriel Schubiner
|
||||||
|
xantares
|
||||||
|
Jan Matějka
|
||||||
|
@@ -507,7 +507,7 @@ If you want to add support for a new site, you can follow this quick list (assum
|
|||||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
|
||||||
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
|
||||||
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
|
||||||
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
|
9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this:
|
||||||
|
|
||||||
$ git add youtube_dl/extractor/__init__.py
|
$ git add youtube_dl/extractor/__init__.py
|
||||||
$ git add youtube_dl/extractor/yourextractor.py
|
$ git add youtube_dl/extractor/yourextractor.py
|
||||||
|
@@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
info_dict_str = ''.join(
|
info_dict_str = ''.join(
|
||||||
' %s: %s,\n' % (_repr(k), _repr(v))
|
' %s: %s,\n' % (_repr(k), _repr(v))
|
||||||
for k, v in test_info_dict.items())
|
for k, v in test_info_dict.items())
|
||||||
write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr)
|
write_string(
|
||||||
|
'\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
missing_keys,
|
missing_keys,
|
||||||
'Missing keys in test definition: %s' % (
|
'Missing keys in test definition: %s' % (
|
||||||
|
@@ -16,6 +16,7 @@ import json
|
|||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
clean_html,
|
||||||
DateRange,
|
DateRange,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
@@ -45,6 +46,7 @@ from youtube_dl.utils import (
|
|||||||
escape_url,
|
escape_url,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
get_filesystem_encoding,
|
get_filesystem_encoding,
|
||||||
|
intlist_to_bytes,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -282,6 +284,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
d = json.loads(stripped)
|
d = json.loads(stripped)
|
||||||
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
self.assertEqual(d, [{"id": "532cb", "x": 3}])
|
||||||
|
|
||||||
|
stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc')
|
||||||
|
d = json.loads(stripped)
|
||||||
|
self.assertEqual(d, {'STATUS': 'OK'})
|
||||||
|
|
||||||
def test_uppercase_escape(self):
|
def test_uppercase_escape(self):
|
||||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||||
@@ -345,5 +351,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
on = js_to_json('{"abc": true}')
|
on = js_to_json('{"abc": true}')
|
||||||
self.assertEqual(json.loads(on), {'abc': True})
|
self.assertEqual(json.loads(on), {'abc': True})
|
||||||
|
|
||||||
|
def test_clean_html(self):
|
||||||
|
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||||
|
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||||
|
|
||||||
|
def test_intlist_to_bytes(self):
|
||||||
|
self.assertEqual(
|
||||||
|
intlist_to_bytes([0, 1, 127, 128, 255]),
|
||||||
|
b'\x00\x01\x7f\x80\xff')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@@ -14,7 +14,7 @@ import re
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.utils import compat_str, compat_urlretrieve
|
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
(
|
(
|
||||||
|
@@ -837,14 +837,14 @@ class YoutubeDL(object):
|
|||||||
format_1, format_2 = rf.split('+')
|
format_1, format_2 = rf.split('+')
|
||||||
formats_info = (self.select_format(format_1, formats),
|
formats_info = (self.select_format(format_1, formats),
|
||||||
self.select_format(format_2, formats))
|
self.select_format(format_2, formats))
|
||||||
|
if all(formats_info):
|
||||||
# The first format must contain the video and the
|
# The first format must contain the video and the
|
||||||
# second the audio
|
# second the audio
|
||||||
if formats_info[0].get('vcodec') == 'none':
|
if formats_info[0].get('vcodec') == 'none':
|
||||||
self.report_error('The first format must contain '
|
self.report_error('The first format must '
|
||||||
'the video, try using "-f %s+%s"' %
|
'contain the video, try using '
|
||||||
(format_2, format_1))
|
'"-f %s+%s"' % (format_2, format_1))
|
||||||
return
|
return
|
||||||
if all(formats_info):
|
|
||||||
selected_format = {
|
selected_format = {
|
||||||
'requested_formats': formats_info,
|
'requested_formats': formats_info,
|
||||||
'format': rf,
|
'format': rf,
|
||||||
@@ -1306,11 +1306,13 @@ class YoutubeDL(object):
|
|||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Your Python is broken! Update to a newer and supported version')
|
'Your Python is broken! Update to a newer and supported version')
|
||||||
|
|
||||||
|
stdout_encoding = getattr(
|
||||||
|
sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
|
||||||
encoding_str = (
|
encoding_str = (
|
||||||
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
|
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
|
||||||
locale.getpreferredencoding(),
|
locale.getpreferredencoding(),
|
||||||
sys.getfilesystemencoding(),
|
sys.getfilesystemencoding(),
|
||||||
sys.stdout.encoding,
|
stdout_encoding,
|
||||||
self.get_encoding()))
|
self.get_encoding()))
|
||||||
write_string(encoding_str, encoding=None)
|
write_string(encoding_str, encoding=None)
|
||||||
|
|
||||||
|
@@ -127,6 +127,7 @@ from .francetv import (
|
|||||||
)
|
)
|
||||||
from .freesound import FreesoundIE
|
from .freesound import FreesoundIE
|
||||||
from .freespeech import FreespeechIE
|
from .freespeech import FreespeechIE
|
||||||
|
from .freevideo import FreeVideoIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
from .gamekings import GamekingsIE
|
from .gamekings import GamekingsIE
|
||||||
from .gameone import (
|
from .gameone import (
|
||||||
@@ -141,6 +142,7 @@ from .generic import GenericIE
|
|||||||
from .glide import GlideIE
|
from .glide import GlideIE
|
||||||
from .globo import GloboIE
|
from .globo import GloboIE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
|
from .goldenmoustache import GoldenMoustacheIE
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googleplus import GooglePlusIE
|
||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
@@ -323,6 +325,7 @@ from .sbs import SBSIE
|
|||||||
from .scivee import SciVeeIE
|
from .scivee import SciVeeIE
|
||||||
from .screencast import ScreencastIE
|
from .screencast import ScreencastIE
|
||||||
from .servingsys import ServingSysIE
|
from .servingsys import ServingSysIE
|
||||||
|
from .sexu import SexuIE
|
||||||
from .sexykarma import SexyKarmaIE
|
from .sexykarma import SexyKarmaIE
|
||||||
from .shared import SharedIE
|
from .shared import SharedIE
|
||||||
from .sharesix import ShareSixIE
|
from .sharesix import ShareSixIE
|
||||||
|
@@ -11,13 +11,13 @@ class ABCIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
|
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
|
||||||
'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
|
'md5': 'cb3dd03b18455a661071ee1e28344d9f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5624716',
|
'id': '5868334',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
|
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
|
||||||
'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
|
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -22,7 +22,7 @@ class AllocineIE(InfoExtractor):
|
|||||||
'id': '19546517',
|
'id': '19546517',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
|
||||||
'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
|
'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
|
||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@@ -110,20 +110,25 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
'file': '1353101989.mp3',
|
|
||||||
'md5': '39bc1eded3476e927c724321ddf116cf',
|
'md5': '39bc1eded3476e927c724321ddf116cf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '1353101989',
|
||||||
|
'ext': 'mp3',
|
||||||
'title': 'Intro',
|
'title': 'Intro',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'file': '38097443.mp3',
|
|
||||||
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
|
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '38097443',
|
||||||
|
'ext': 'mp3',
|
||||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Jazz Format Mixtape vol.1',
|
||||||
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2
|
'playlistend': 2
|
||||||
},
|
},
|
||||||
|
@@ -10,12 +10,12 @@ from ..utils import ExtractorError
|
|||||||
class BYUtvIE(InfoExtractor):
|
class BYUtvIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.byutv.org/watch/44e80f7b-e3ba-43ba-8c51-b1fd96c94a79/granite-flats-talking',
|
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'granite-flats-talking',
|
'id': 'studio-c-season-5-episode-5',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:4e9a7ce60f209a33eca0ac65b4918e1c',
|
'description': 'md5:5438d33774b6bdc662f9485a340401cc',
|
||||||
'title': 'Talking',
|
'title': 'Season 5 Episode 5',
|
||||||
'thumbnail': 're:^https?://.*promo.*'
|
'thumbnail': 're:^https?://.*promo.*'
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@@ -94,7 +94,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
# It may just embed a vevo video:
|
# It may just embed a vevo video:
|
||||||
m_vevo = re.search(
|
m_vevo = re.search(
|
||||||
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)',
|
r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
|
||||||
webpage)
|
webpage)
|
||||||
if m_vevo is not None:
|
if m_vevo is not None:
|
||||||
vevo_id = m_vevo.group('id')
|
vevo_id = m_vevo.group('id')
|
||||||
|
38
youtube_dl/extractor/freevideo.py
Normal file
38
youtube_dl/extractor/freevideo.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class FreeVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vysukany-zadecek-22033',
|
||||||
|
'ext': 'mp4',
|
||||||
|
"title": "vysukany-zadecek-22033",
|
||||||
|
"age_limit": 18,
|
||||||
|
},
|
||||||
|
'skip': 'Blocked outside .cz',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage, handle = self._download_webpage_handle(url, video_id)
|
||||||
|
if '//www.czechav.com/' in handle.geturl():
|
||||||
|
raise ExtractorError(
|
||||||
|
'Access to freevideo is blocked from your location',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*?/video.mp4)"',
|
||||||
|
webpage, 'video URL')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': video_id,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@@ -21,7 +21,6 @@ class FunnyOrDieIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||||
'md5': '29f4c5e5a61ca39dfd7e8348a75d0aad',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'e402820827',
|
'id': 'e402820827',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@@ -434,7 +434,17 @@ class GenericIE(InfoExtractor):
|
|||||||
'title': 'Chet Chat 171 - Oct 29, 2014',
|
'title': 'Chet Chat 171 - Oct 29, 2014',
|
||||||
'upload_date': '20141029',
|
'upload_date': '20141029',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
# Livestream embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '67864563',
|
||||||
|
'ext': 'flv',
|
||||||
|
'upload_date': '20141112',
|
||||||
|
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||||
}
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
@@ -916,6 +926,12 @@ class GenericIE(InfoExtractor):
|
|||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||||
|
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group('url'), 'Livestream')
|
||||||
|
|
||||||
def check_video(vurl):
|
def check_video(vurl):
|
||||||
vpath = compat_urlparse.urlparse(vurl).path
|
vpath = compat_urlparse.urlparse(vurl).path
|
||||||
vext = determine_ext(vpath)
|
vext = determine_ext(vpath)
|
||||||
|
48
youtube_dl/extractor/goldenmoustache.py
Normal file
48
youtube_dl/extractor/goldenmoustache.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GoldenMoustacheIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?goldenmoustache\.com/(?P<display_id>[\w-]+)-(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.goldenmoustache.com/suricate-le-poker-3700/',
|
||||||
|
'md5': '0f904432fa07da5054d6c8beb5efb51a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3700',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Suricate - Le Poker',
|
||||||
|
'description': 'md5:3d1f242f44f8c8cb0a106f1fd08e5dc9',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'data-src-type="mp4" data-src="([^"]+)"', webpage, 'video URL')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title>(.*?) - Golden Moustache</title>', webpage, 'title')
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'<strong>([0-9]+)</strong>\s*VUES</span>',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'view_count': view_count,
|
||||||
|
}
|
@@ -18,7 +18,7 @@ from ..utils import (
|
|||||||
|
|
||||||
class LivestreamIE(InfoExtractor):
|
class LivestreamIE(InfoExtractor):
|
||||||
IE_NAME = 'livestream'
|
IE_NAME = 'livestream'
|
||||||
_VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
|
_VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||||
'md5': '53274c76ba7754fb0e8d072716f2292b',
|
'md5': '53274c76ba7754fb0e8d072716f2292b',
|
||||||
@@ -37,6 +37,9 @@ class LivestreamIE(InfoExtractor):
|
|||||||
'title': 'TEDCity2.0 (English)',
|
'title': 'TEDCity2.0 (English)',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 4,
|
'playlist_mincount': 4,
|
||||||
|
}, {
|
||||||
|
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_smil(self, video_id, smil_url):
|
def _parse_smil(self, video_id, smil_url):
|
||||||
|
@@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
|||||||
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url)
|
||||||
if not m:
|
if not m:
|
||||||
return rtmp_video_url
|
return rtmp_video_url
|
||||||
base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
|
base = 'http://viacommtvstrmfs.fplive.net/'
|
||||||
return base + m.group('finalid')
|
return base + m.group('finalid')
|
||||||
|
|
||||||
def _get_feed_url(self, uri):
|
def _get_feed_url(self, uri):
|
||||||
|
@@ -13,9 +13,10 @@ class MySpassIE(InfoExtractor):
|
|||||||
_VALID_URL = r'http://www\.myspass\.de/.*'
|
_VALID_URL = r'http://www\.myspass\.de/.*'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
|
||||||
'file': '11741.mp4',
|
|
||||||
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
'md5': '0b49f4844a068f8b33f4b7c88405862b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '11741',
|
||||||
|
'ext': 'mp4',
|
||||||
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
|
"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
|
||||||
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
|
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
|
||||||
},
|
},
|
||||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
|||||||
unified_strdate,
|
unified_strdate,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
|
strip_jsonp,
|
||||||
url_basename,
|
url_basename,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -63,7 +64,7 @@ class NPOIE(InfoExtractor):
|
|||||||
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
|
||||||
video_id,
|
video_id,
|
||||||
# We have to remove the javascript callback
|
# We have to remove the javascript callback
|
||||||
transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
|
transform_source=strip_jsonp,
|
||||||
)
|
)
|
||||||
token_page = self._download_webpage(
|
token_page = self._download_webpage(
|
||||||
'http://ida.omroep.nl/npoplayer/i.js',
|
'http://ida.omroep.nl/npoplayer/i.js',
|
||||||
|
61
youtube_dl/extractor/sexu.py
Normal file
61
youtube_dl/extractor/sexu.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class SexuIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://sexu.com/961791/',
|
||||||
|
'md5': 'ff615aca9691053c94f8f10d96cd7884',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '961791',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
|
||||||
|
'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
|
||||||
|
'categories': list, # NSFW
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
quality_arr = self._search_regex(
|
||||||
|
r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
|
||||||
|
formats = [{
|
||||||
|
'url': fmt[0].replace('\\', ''),
|
||||||
|
'format_id': fmt[1],
|
||||||
|
'height': int(fmt[1][:3]),
|
||||||
|
} for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
|
||||||
|
|
||||||
|
description = self._html_search_meta(
|
||||||
|
'description', webpage, 'description')
|
||||||
|
|
||||||
|
thumbnail = self._html_search_regex(
|
||||||
|
r'image:\s*"([^"]+)"',
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
categories_str = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories')
|
||||||
|
categories = (
|
||||||
|
None if categories_str is None
|
||||||
|
else categories_str.split(','))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
@@ -4,11 +4,11 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
|
|
||||||
|
|
||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$'
|
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
'md5': '2c2754212136f35fb4b19767d242f66e',
|
'md5': '2c2754212136f35fb4b19767d242f66e',
|
||||||
@@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):
|
|||||||
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
||||||
'duration': 983,
|
'duration': 983,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
|
||||||
|
'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1519126',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
||||||
|
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = m.group('videoID')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = re.sub(r'\s+', ' ', self._html_search_regex(
|
||||||
r'<div class="module-title">(.*?)</div>', webpage, 'title')
|
r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>',
|
||||||
|
webpage, 'title'))
|
||||||
description = self._html_search_meta('description', webpage, 'description')
|
description = self._html_search_meta('description', webpage, 'description')
|
||||||
|
|
||||||
base_url = self._search_regex(
|
base_url = self._search_regex(
|
||||||
@@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):
|
|||||||
_VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
_VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
||||||
IE_NAME = 'Spiegel:Article'
|
IE_NAME = 'Spiegel:Article'
|
||||||
IE_DESC = 'Articles on spiegel.de'
|
IE_DESC = 'Articles on spiegel.de'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1516455',
|
'id': '1516455',
|
||||||
@@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):
|
|||||||
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
||||||
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
|
||||||
|
'info_dict': {
|
||||||
|
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = m.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
# Single video on top of the page
|
||||||
video_link = self._search_regex(
|
video_link = self._search_regex(
|
||||||
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
|
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
|
||||||
'video page URL')
|
'video page URL', default=None)
|
||||||
|
if video_link:
|
||||||
video_url = compat_urlparse.urljoin(
|
video_url = compat_urlparse.urljoin(
|
||||||
self.http_scheme() + '//spiegel.de/', video_link)
|
self.http_scheme() + '//spiegel.de/', video_link)
|
||||||
|
return self.url_result(video_url)
|
||||||
|
|
||||||
return {
|
# Multiple embedded videos
|
||||||
'_type': 'url',
|
embeds = re.findall(
|
||||||
'url': video_url,
|
r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
|
||||||
}
|
webpage)
|
||||||
|
entries = [
|
||||||
|
self.url_result(compat_urlparse.urljoin(
|
||||||
|
self.http_scheme() + '//spiegel.de/', embed_path))
|
||||||
|
for embed_path in embeds
|
||||||
|
]
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
@@ -38,6 +38,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'actively fooling us.'),
|
'actively fooling us.'),
|
||||||
'uploader': 'Dan Dennett',
|
'uploader': 'Dan Dennett',
|
||||||
'width': 854,
|
'width': 854,
|
||||||
|
'duration': 1308,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms',
|
||||||
@@ -57,6 +58,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'title': 'Be passionate. Be courageous. Be your best.',
|
'title': 'Be passionate. Be courageous. Be your best.',
|
||||||
'uploader': 'Gabby Giffords and Mark Kelly',
|
'uploader': 'Gabby Giffords and Mark Kelly',
|
||||||
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
'description': 'md5:5174aed4d0f16021b704120360f72b92',
|
||||||
|
'duration': 1128,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
'url': 'http://www.ted.com/playlists/who_are_the_hackers',
|
||||||
@@ -178,6 +180,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'subtitles': video_subtitles,
|
'subtitles': video_subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'duration': talk_info.get('duration'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_available_subtitles(self, video_id, talk_info):
|
def _get_available_subtitles(self, video_id, talk_info):
|
||||||
|
@@ -4,9 +4,9 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
compat_str,
|
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
)
|
)
|
||||||
@@ -176,8 +176,7 @@ class TVPlayIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
|
||||||
@@ -208,6 +207,10 @@ class TVPlayIE(InfoExtractor):
|
|||||||
'app': m.group('app'),
|
'app': m.group('app'),
|
||||||
'play_path': m.group('playpath'),
|
'play_path': m.group('playpath'),
|
||||||
})
|
})
|
||||||
|
elif video_url.endswith('.f4m'):
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
video_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id))
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
fmt.update({
|
fmt.update({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@@ -27,15 +27,15 @@ class WrzutaIE(InfoExtractor):
|
|||||||
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
|
'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad',
|
'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty',
|
||||||
'md5': '1e546a18e1c22ac6e9adce17b8961ff5',
|
'md5': 'bc78077859bea7bcfe4295d7d7fc9025',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9oXJqdcndqv',
|
'id': '063jOPX5ue2',
|
||||||
'ext': 'ogg',
|
'ext': 'ogg',
|
||||||
'title': 'David Guetta & Showtek ft. Vassy - Bad',
|
'title': 'Liber & Natalia Szroeder - Teraz Ty',
|
||||||
'duration': 270,
|
'duration': 203,
|
||||||
'uploader_id': 'w729',
|
'uploader_id': 'jolka85',
|
||||||
'description': 'md5:4628f01c666bbaaecefa83476cfa794a',
|
'description': 'md5:2d2b6340f9188c8c4cd891580e481096',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@@ -49,16 +49,17 @@ class WrzutaIE(InfoExtractor):
|
|||||||
|
|
||||||
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
|
quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
|
||||||
|
|
||||||
audio_table = {'flv': 'mp3', 'webm': 'ogg', 'mp3': 'mp3'}
|
audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
|
||||||
|
|
||||||
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
|
embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for media in embedpage['url']:
|
for media in embedpage['url']:
|
||||||
|
fmt = media['type'].split('@')[0]
|
||||||
if typ == 'audio':
|
if typ == 'audio':
|
||||||
ext = audio_table[media['type'].split('@')[0]]
|
ext = audio_table.get(fmt, fmt)
|
||||||
else:
|
else:
|
||||||
ext = media['type'].split('@')[0]
|
ext = fmt
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': '%s_%s' % (ext, media['quality'].lower()),
|
'format_id': '%s_%s' % (ext, media['quality'].lower()),
|
||||||
|
@@ -9,40 +9,30 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class YouJizzIE(InfoExtractor):
|
class YouJizzIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+)\.html$'
|
_VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P<id>[0-9]+)\.html(?:$|[?#])'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
|
||||||
'file': '2189178.flv',
|
|
||||||
'md5': '07e15fa469ba384c7693fd246905547c',
|
'md5': '07e15fa469ba384c7693fd246905547c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
'id': '2189178',
|
||||||
|
'ext': 'flv',
|
||||||
"title": "Zeichentrick 1",
|
"title": "Zeichentrick 1",
|
||||||
"age_limit": 18,
|
"age_limit": 18,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_id = mobj.group('videoid')
|
|
||||||
|
|
||||||
# Get webpage content
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
video_title = self._html_search_regex(
|
||||||
|
r'<title>\s*(.*)\s*</title>', webpage, 'title')
|
||||||
|
|
||||||
# Get the video title
|
embed_page_url = self._search_regex(
|
||||||
video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
|
r'(https?://www.youjizz.com/videos/embed/[0-9]+)',
|
||||||
webpage, 'title').strip()
|
webpage, 'embed page')
|
||||||
|
webpage = self._download_webpage(
|
||||||
# Get the embed page
|
embed_page_url, video_id, note='downloading embed page')
|
||||||
result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
|
|
||||||
if result is None:
|
|
||||||
raise ExtractorError('ERROR: unable to extract embed page')
|
|
||||||
|
|
||||||
embed_page_url = result.group(0).strip()
|
|
||||||
video_id = result.group('videoid')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(embed_page_url, video_id)
|
|
||||||
|
|
||||||
# Get the video URL
|
# Get the video URL
|
||||||
m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
|
m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P<playlist>.+?)"\);', webpage)
|
||||||
|
@@ -510,7 +510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _parse_sig_js(self, jscode):
|
def _parse_sig_js(self, jscode):
|
||||||
funcname = self._search_regex(
|
funcname = self._search_regex(
|
||||||
r'signature=([$a-zA-Z]+)', jscode,
|
r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,
|
||||||
'Initial JS player signature function name')
|
'Initial JS player signature function name')
|
||||||
|
|
||||||
jsi = JSInterpreter(jscode)
|
jsi = JSInterpreter(jscode)
|
||||||
|
@@ -843,10 +843,7 @@ def bytes_to_intlist(bs):
|
|||||||
def intlist_to_bytes(xs):
|
def intlist_to_bytes(xs):
|
||||||
if not xs:
|
if not xs:
|
||||||
return b''
|
return b''
|
||||||
if isinstance(chr(0), bytes): # Python 2
|
return struct.pack('%dB' % len(xs), *xs)
|
||||||
return ''.join([chr(x) for x in xs])
|
|
||||||
else:
|
|
||||||
return bytes(xs)
|
|
||||||
|
|
||||||
|
|
||||||
# Cross-platform file locking
|
# Cross-platform file locking
|
||||||
@@ -1334,7 +1331,8 @@ def parse_age_limit(s):
|
|||||||
|
|
||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
|
return re.sub(
|
||||||
|
r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||||
|
|
||||||
|
|
||||||
def js_to_json(code):
|
def js_to_json(code):
|
||||||
|
@@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2014.11.09'
|
__version__ = '2014.11.13.3'
|
||||||
|
Reference in New Issue
Block a user