Compare commits
69 Commits
2016.04.13
...
2016.04.19
Author | SHA1 | Date | |
---|---|---|---|
|
81215d5652 | ||
|
241a318f27 | ||
|
4fdf082375 | ||
|
1b6182d8f7 | ||
|
7bab22a402 | ||
|
0f97fb4d00 | ||
|
b1cf58f48f | ||
|
3014b0ae83 | ||
|
b9f2fdd37f | ||
|
bbb3f730bb | ||
|
d868f43c58 | ||
|
21525bb8ca | ||
|
d8f103159f | ||
|
663ee5f0a9 | ||
|
b6b950bf58 | ||
|
11e60fcad8 | ||
|
c23533a100 | ||
|
0dafea02e6 | ||
|
5d6360c3b7 | ||
|
5e5c30c3fd | ||
|
9154c87fc4 | ||
|
ef0e4e7bc0 | ||
|
67d46a3f90 | ||
|
bec47a0748 | ||
|
36b7d9dbfa | ||
|
8c65e4a527 | ||
|
6ad2ef8b7c | ||
|
00b426d66d | ||
|
0de968b584 | ||
|
0841d5013c | ||
|
a71fca8577 | ||
|
ee94e7e66d | ||
|
759e37c9e6 | ||
|
ae65567102 | ||
|
c394b4f4cb | ||
|
260c7036ba | ||
|
f74197a074 | ||
|
f3a58d46bf | ||
|
b6612c9b11 | ||
|
7e176effb2 | ||
|
4a252cc2d2 | ||
|
f0ec61b525 | ||
|
66d40ae3a5 | ||
|
e6da9240d4 | ||
|
dd91dfcd67 | ||
|
c773082692 | ||
|
9c250931f5 | ||
|
56f1750049 | ||
|
f2159c9815 | ||
|
b0cf2e7c1b | ||
|
74b47d00c3 | ||
|
8cb57bab8e | ||
|
e1bf277e19 | ||
|
ce599d5a7e | ||
|
9e28538726 | ||
|
404284132c | ||
|
5565be9dd9 | ||
|
b3a9474ad1 | ||
|
86475d59b1 | ||
|
73d93f948e | ||
|
f5d8743e0a | ||
|
d1c4e4ba15 | ||
|
f141fefab7 | ||
|
8334637f4a | ||
|
b8f67449ec | ||
|
6d67169509 | ||
|
dcaf00fb3e | ||
|
f896e1ccef | ||
|
c96eca426b |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.13**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.19**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.04.13
|
||||
[debug] youtube-dl version 2016.04.19
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -50,6 +50,7 @@
|
||||
- **arte.tv:ddc**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:future**
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **AtresPlayer**
|
||||
- **ATTTechChannel**
|
||||
@@ -350,13 +351,15 @@
|
||||
- **miomio.tv**
|
||||
- **MiTele**: mitele.es
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:stream**
|
||||
- **mixcloud:user**
|
||||
- **MLB**
|
||||
- **Mnet**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
- **Mofosex**
|
||||
- **Mojvideo**
|
||||
- **Moniker**: allmyvideos.net and vidspot.net
|
||||
- **mooshare**: Mooshare.biz
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
- **Motorsport**: motorsport.com
|
||||
@@ -393,7 +396,6 @@
|
||||
- **ndr:embed:base**
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
- **Nerdist**
|
||||
- **netease:album**: 网易云音乐 - 专辑
|
||||
- **netease:djradio**: 网易云音乐 - 电台
|
||||
- **netease:mv**: 网易云音乐 - MV
|
||||
@@ -609,6 +611,7 @@
|
||||
- **Tagesschau**
|
||||
- **Tapely**
|
||||
- **Tass**
|
||||
- **TDSLifeway**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
@@ -625,7 +628,6 @@
|
||||
- **TeleTask**
|
||||
- **TF1**
|
||||
- **TheIntercept**
|
||||
- **TheOnion**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
@@ -684,7 +686,6 @@
|
||||
- **twitter**
|
||||
- **twitter:amplify**
|
||||
- **twitter:card**
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**: 聯合影音
|
||||
@@ -754,7 +755,6 @@
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **wat.tv**
|
||||
- **WayOfTheMaster**
|
||||
- **WDR**
|
||||
- **wdr:mobile**
|
||||
- **WDRMaus**: Sendung mit der Maus
|
||||
|
@@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
|
||||
entries = result['entries']
|
||||
self.assertTrue(len(entries) >= 20)
|
||||
self.assertTrue(len(entries) >= 50)
|
||||
original_video = entries[0]
|
||||
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
|
||||
|
||||
|
@@ -225,7 +225,7 @@ class FFmpegFD(ExternalFD):
|
||||
|
||||
args += ['-i', url, '-c', 'copy']
|
||||
if protocol == 'm3u8':
|
||||
if self.params.get('hls_use_mpegts', False):
|
||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||
args += ['-f', 'mpegts']
|
||||
else:
|
||||
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
|
||||
|
@@ -27,6 +27,8 @@ class RtspFD(FileDownloader):
|
||||
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
|
||||
return False
|
||||
|
||||
self._debug_cmd(args)
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
@@ -12,9 +12,10 @@ from ..utils import (
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
IE_NAME = 'on.aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[^/?-]+)'
|
||||
_VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
|
||||
'md5': '18ef68f48740e86ae94b98da815eec42',
|
||||
'info_dict': {
|
||||
@@ -31,6 +32,7 @@ class AolIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183',
|
||||
'info_dict': {
|
||||
'id': '5707d6b8e4b090497b04f706',
|
||||
@@ -45,6 +47,12 @@ class AolIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -210,7 +210,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:magazine?/)?(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design',
|
||||
@@ -229,9 +229,27 @@ class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n',
|
||||
'upload_date': '20140805',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:info'
|
||||
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
|
||||
'info_dict': {
|
||||
'id': '067528-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Service civique, un cache misère ?',
|
||||
'upload_date': '20160403',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:future'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
@@ -30,14 +30,14 @@ class AudiomackIE(InfoExtractor):
|
||||
# audiomack wrapper around soundcloud song
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
|
||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||
'info_dict': {
|
||||
'id': '172419696',
|
||||
'id': '258901379',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:1fc3272ed7a635cce5be1568c2822997',
|
||||
'title': 'Young Thug ft Lil Wayne - Take Kare',
|
||||
'uploader': 'Young Thug World',
|
||||
'upload_date': '20141016',
|
||||
'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||
'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||
'uploader': 'ILOVEMAKONNEN',
|
||||
'upload_date': '20160414',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
@@ -671,6 +671,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
|
||||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
|
@@ -340,7 +340,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
ext = 'flv'
|
||||
if ext is None:
|
||||
ext = determine_ext(url)
|
||||
tbr = int_or_none(rend.get('encodingRate'), 1000),
|
||||
tbr = int_or_none(rend.get('encodingRate'), 1000)
|
||||
a_format = {
|
||||
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
||||
'url': url,
|
||||
|
@@ -5,7 +5,6 @@ from ..utils import (
|
||||
xpath_text,
|
||||
xpath_element,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
@@ -64,7 +63,7 @@ class CBSIE(CBSBaseIE):
|
||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true'
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -84,11 +83,11 @@ class CBSIE(CBSBaseIE):
|
||||
pid = xpath_text(item, 'pid')
|
||||
if not pid:
|
||||
continue
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid)
|
||||
except ExtractorError:
|
||||
continue
|
||||
tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
|
||||
if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
|
||||
tp_release_url += '&manifest=m3u'
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
self._sort_formats(formats)
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -55,8 +56,13 @@ class EaglePlatformIE(InfoExtractor):
|
||||
raise ExtractorError(' '.join(response['errors']), expected=True)
|
||||
|
||||
def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||
self._handle_error(response)
|
||||
try:
|
||||
response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError):
|
||||
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
|
||||
self._handle_error(response)
|
||||
raise
|
||||
return response
|
||||
|
||||
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
|
||||
|
@@ -46,6 +46,7 @@ from .arte import (
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVConcertIE,
|
||||
ArteTVInfoIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVCinemaIE,
|
||||
ArteTVDDCIE,
|
||||
@@ -411,7 +412,12 @@ from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
from .mixcloud import (
|
||||
MixcloudIE,
|
||||
MixcloudUserIE,
|
||||
MixcloudPlaylistIE,
|
||||
MixcloudStreamIE,
|
||||
)
|
||||
from .mlb import MLBIE
|
||||
from .mnet import MnetIE
|
||||
from .mpora import MporaIE
|
||||
@@ -419,7 +425,6 @@ from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .moniker import MonikerIE
|
||||
from .mooshare import MooshareIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import MotherlessIE
|
||||
from .motorsport import MotorsportIE
|
||||
@@ -464,7 +469,6 @@ from .ndr import (
|
||||
from .ndtv import NDTVIE
|
||||
from .netzkino import NetzkinoIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nerdist import NerdistIE
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicIE,
|
||||
NetEaseMusicAlbumIE,
|
||||
@@ -729,6 +733,7 @@ from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
from .tapely import TapelyIE
|
||||
from .tass import TassIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@@ -746,7 +751,6 @@ from .teletask import TeleTaskIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theonion import TheOnionIE
|
||||
from .theplatform import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
@@ -831,7 +835,6 @@ from .twitter import (
|
||||
TwitterIE,
|
||||
TwitterAmplifyIE,
|
||||
)
|
||||
from .ubu import UbuIE
|
||||
from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
@@ -916,7 +919,6 @@ from .vulture import VultureIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .wat import WatIE
|
||||
from .wayofthemaster import WayOfTheMasterIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
WDRMobileIE,
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class GazetaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
|
||||
'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
|
||||
@@ -18,9 +18,22 @@ class GazetaIE(InfoExtractor):
|
||||
'description': 'md5:38617526050bd17b234728e7f9620a71',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'video not found',
|
||||
}, {
|
||||
'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.gazeta.ru/video/main/main/2015/06/22/platit_ili_ne_platit_po_isku_yukosa.shtml',
|
||||
'info_dict': {
|
||||
'id': '252048',
|
||||
'ext': 'mp4',
|
||||
'title': '"Если по иску ЮКОСа придется платить, это будет большой удар по бюджету"',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['EaglePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -60,6 +60,7 @@ from .googledrive import GoogleDriveIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -104,7 +105,8 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True, # infinite live stream
|
||||
},
|
||||
'expected_warnings': [
|
||||
r'501.*Not Implemented'
|
||||
r'501.*Not Implemented',
|
||||
r'400.*Bad Request',
|
||||
],
|
||||
},
|
||||
# Direct link with incorrect MIME type
|
||||
@@ -1140,6 +1142,18 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20160409',
|
||||
},
|
||||
},
|
||||
# LiveLeak embed
|
||||
{
|
||||
'url': 'http://www.wykop.pl/link/3088787/',
|
||||
'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
|
||||
'info_dict': {
|
||||
'id': '874_1459135191',
|
||||
'ext': 'mp4',
|
||||
'title': 'Man shows poor quality of new apartment building',
|
||||
'description': 'The wall is like a sand pile.',
|
||||
'uploader': 'Lake8737',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1942,7 +1956,13 @@ class GenericIE(InfoExtractor):
|
||||
# Look for Instagram embeds
|
||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||
if instagram_embed_url is not None:
|
||||
return self.url_result(instagram_embed_url, InstagramIE.ie_key())
|
||||
return self.url_result(
|
||||
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
|
||||
|
||||
# Look for LiveLeak embeds
|
||||
liveleak_url = LiveLeakIE._extract_url(webpage)
|
||||
if liveleak_url:
|
||||
return self.url_result(liveleak_url, 'LiveLeak')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
|
@@ -4,6 +4,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -29,7 +30,12 @@ class HuffPostIE(InfoExtractor):
|
||||
'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
|
||||
'duration': 1549,
|
||||
'upload_date': '20140124',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404: Not Found'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -45,7 +51,7 @@ class HuffPostIE(InfoExtractor):
|
||||
description = data.get('description')
|
||||
|
||||
thumbnails = []
|
||||
for url in data['images'].values():
|
||||
for url in filter(None, data['images'].values()):
|
||||
m = re.match('.*-([0-9]+x[0-9]+)\.', url)
|
||||
if not m:
|
||||
continue
|
||||
@@ -54,13 +60,25 @@ class HuffPostIE(InfoExtractor):
|
||||
'resolution': m.group(1),
|
||||
})
|
||||
|
||||
formats = [{
|
||||
'format': key,
|
||||
'format_id': key.replace('/', '.'),
|
||||
'ext': 'mp4',
|
||||
'url': url,
|
||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||
} for key, url in data.get('sources', {}).get('live', {}).items()]
|
||||
formats = []
|
||||
sources = data.get('sources', {})
|
||||
live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
|
||||
for key, url in live_sources:
|
||||
ext = determine_ext(url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format': key,
|
||||
'format_id': key.replace('/', '.'),
|
||||
'ext': 'mp4',
|
||||
'url': url,
|
||||
'vcodec': 'none' if key.startswith('audio/') else None,
|
||||
})
|
||||
|
||||
if not formats and data.get('fivemin_id'):
|
||||
return self.url_result('5min:%s' % data['fivemin_id'])
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -38,10 +38,19 @@ class InstagramIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_embed_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1',
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
blockquote_el = get_element_by_attribute(
|
||||
'class', 'instagram-media', webpage)
|
||||
if blockquote_el is None:
|
||||
@@ -53,7 +62,9 @@ class InstagramIE(InfoExtractor):
|
||||
return mobj.group('link')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
url = mobj.group('url')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||
|
@@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
|
||||
IE_NAME = 'iqiyi'
|
||||
IE_DESC = '爱奇艺'
|
||||
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'
|
||||
_VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html'
|
||||
|
||||
_NETRC_MACHINE = 'iqiyi'
|
||||
|
||||
@@ -273,6 +273,9 @@ class IqiyiIE(InfoExtractor):
|
||||
'title': '灌篮高手 国语版',
|
||||
},
|
||||
'playlist_count': 101,
|
||||
}, {
|
||||
'url': 'http://www.pps.tv/w_19rrbav0ph.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = [
|
||||
|
@@ -2,39 +2,63 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote_plus
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class KaraoketvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'http://www.karaoketv.co.il/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://karaoketv.co.il/?container=songs&id=171568',
|
||||
'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
|
||||
'info_dict': {
|
||||
'id': '171568',
|
||||
'ext': 'mp4',
|
||||
'title': 'אל העולם שלך - רותם כהן - שרים קריוקי',
|
||||
'id': '58356',
|
||||
'ext': 'flv',
|
||||
'title': 'קריוקי של איזון',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
api_page_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
|
||||
webpage, 'API play URL', group='url')
|
||||
|
||||
page_video_url = self._og_search_video_url(webpage, video_id)
|
||||
config_json = compat_urllib_parse_unquote_plus(self._search_regex(
|
||||
r'config=(.*)', page_video_url, 'configuration'))
|
||||
api_page = self._download_webpage(api_page_url, video_id)
|
||||
video_cdn_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
|
||||
api_page, 'video cdn URL', group='url')
|
||||
|
||||
urls_info_json = self._download_json(
|
||||
config_json, video_id, 'Downloading configuration',
|
||||
transform_source=js_to_json)
|
||||
video_cdn = self._download_webpage(video_cdn_url, video_id)
|
||||
play_path = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
|
||||
video_id)['clip']['url']
|
||||
|
||||
url = urls_info_json['playlist'][0]['url']
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
|
||||
video_id, fatal=False) or {}
|
||||
|
||||
servers = settings.get('servers')
|
||||
if not servers or not isinstance(servers, list):
|
||||
servers = ('wowzail.video-cdn.com:80/vodcdn', )
|
||||
|
||||
formats = [{
|
||||
'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
|
||||
'play_path': play_path,
|
||||
'app': 'vodcdn',
|
||||
'page_url': video_cdn_url,
|
||||
'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
|
||||
'rtmp_real_time': True,
|
||||
'ext': 'flv',
|
||||
} for server in servers]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'url': url,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -52,9 +52,12 @@ class KarriereVideosIE(InfoExtractor):
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'/config/video/(.+?)\.xml', webpage, 'video id')
|
||||
# Server returns malformed headers
|
||||
# Force Accept-Encoding: * to prevent gzipped results
|
||||
playlist = self._download_xml(
|
||||
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
|
||||
video_id, transform_source=fix_xml_ampersands)
|
||||
video_id, transform_source=fix_xml_ampersands,
|
||||
headers={'Accept-Encoding': '*'})
|
||||
|
||||
NS_MAP = {
|
||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
|
||||
|
@@ -268,7 +268,7 @@ class KuwoCategoryIE(InfoExtractor):
|
||||
'title': '八十年代精选',
|
||||
'description': '这些都是属于八十年代的回忆!',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'playlist_count': 24,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -63,6 +63,7 @@ class Laola1TvIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This live stream has already finished.',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -74,6 +75,9 @@ class Laola1TvIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
|
||||
webpage, 'iframe url')
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
)
|
||||
@@ -18,10 +19,14 @@ class Lecture2GoIE(InfoExtractor):
|
||||
'md5': 'ac02b570883020d208d405d5a3fd2f7f',
|
||||
'info_dict': {
|
||||
'id': '17473',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': '2 - Endliche Automaten und reguläre Sprachen',
|
||||
'creator': 'Frank Heitmann',
|
||||
'duration': 5220,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,14 +37,18 @@ class Lecture2GoIE(InfoExtractor):
|
||||
title = self._html_search_regex(r'<em[^>]+class="title">(.+)</em>', webpage, 'title')
|
||||
|
||||
formats = []
|
||||
for url in set(re.findall(r'"src","([^"]+)"', webpage)):
|
||||
for url in set(re.findall(r'var\s+playerUri\d+\s*=\s*"([^"]+)"', webpage)):
|
||||
ext = determine_ext(url)
|
||||
protocol = determine_protocol({'url': url})
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(url, video_id))
|
||||
formats.extend(self._extract_f4m_formats(url, video_id, f4m_id='hds'))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id))
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
else:
|
||||
if protocol == 'rtmp':
|
||||
continue # XXX: currently broken
|
||||
formats.append({
|
||||
'format_id': protocol,
|
||||
'url': url,
|
||||
})
|
||||
|
||||
|
@@ -53,6 +53,14 @@ class LiveLeakIE(InfoExtractor):
|
||||
}
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="https?://(?:\w+\.)?liveleak\.com/ll_embed\?(?:.*?)i=(?P<id>[\w_]+)(?:.*)',
|
||||
webpage)
|
||||
if mobj:
|
||||
return 'http://www.liveleak.com/view?i=%s' % mobj.group('id')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
@@ -49,8 +49,8 @@ class MDRIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
|
||||
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
|
||||
'timestamp': 1419047100,
|
||||
'upload_date': '20141220',
|
||||
'timestamp': 1450950000,
|
||||
'upload_date': '20151224',
|
||||
'duration': 4628,
|
||||
'uploader': 'KIKA',
|
||||
},
|
||||
@@ -71,8 +71,8 @@ class MDRIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = self._search_regex(
|
||||
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>\\?/.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', default=None, group='url').replace('\/', '/')
|
||||
r'(?:dataURL|playerXml(?:["\'])?)\s*:\s*(["\'])(?P<url>.+/(?:video|audio)-?[0-9]+-avCustom\.xml)\1',
|
||||
webpage, 'data url', group='url').replace('\/', '/')
|
||||
|
||||
doc = self._download_xml(
|
||||
compat_urlparse.urljoin(url, data_url), video_id)
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
class MetacriticIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
||||
'info_dict': {
|
||||
'id': '3698222',
|
||||
@@ -20,7 +20,17 @@ class MetacriticIE(InfoExtractor):
|
||||
'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
||||
'duration': 221,
|
||||
},
|
||||
}
|
||||
'skip': 'Not providing trailers anymore',
|
||||
}, {
|
||||
'url': 'http://www.metacritic.com/game/playstation-4/tales-from-the-borderlands-a-telltale-game-series/trailers/5740315',
|
||||
'info_dict': {
|
||||
'id': '5740315',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tales from the Borderlands - Finale: The Vault of the Traveler',
|
||||
'description': 'In the final episode of the season, all hell breaks loose. Jack is now in control of Helios\' systems, and he\'s ready to reclaim his rightful place as king of Hyperion (with or without you).',
|
||||
'duration': 114,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -1,8 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -20,21 +17,28 @@ class MinistryGridIE(InfoExtractor):
|
||||
'id': '3453494717001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Gospel by Numbers',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'upload_date': '20140410',
|
||||
'description': 'Coming soon from T4G 2014!',
|
||||
'uploader': 'LifeWay Christian Resources (MG)',
|
||||
'uploader_id': '2034960640001',
|
||||
'timestamp': 1397145591,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['TDSLifeway'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
portlets_json = self._search_regex(
|
||||
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list')
|
||||
portlets = json.loads(portlets_json)
|
||||
portlets = self._parse_json(self._search_regex(
|
||||
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
|
||||
video_id)
|
||||
pl_id = self._search_regex(
|
||||
r'<!--\s*p_l_id - ([0-9]+)<br>', webpage, 'p_l_id')
|
||||
r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
|
||||
|
||||
for i, portlet in enumerate(portlets):
|
||||
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
|
||||
@@ -46,12 +50,8 @@ class MinistryGridIE(InfoExtractor):
|
||||
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
|
||||
default=None)
|
||||
if video_iframe_url:
|
||||
surl = smuggle_url(
|
||||
video_iframe_url, {'force_videoid': video_id})
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'url': surl,
|
||||
}
|
||||
return self.url_result(
|
||||
smuggle_url(video_iframe_url, {'force_videoid': video_id}),
|
||||
video_id=video_id)
|
||||
|
||||
raise ExtractorError('Could not find video iframe in any portlets')
|
||||
|
@@ -1,27 +1,35 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import functools
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
NO_DEFAULT,
|
||||
OnDemandPagedList,
|
||||
parse_count,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class MixcloudIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/([^/]+)'
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
|
||||
IE_NAME = 'mixcloud'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
|
||||
'info_dict': {
|
||||
'id': 'dholbach-cryptkeeper',
|
||||
'ext': 'mp3',
|
||||
'ext': 'm4a',
|
||||
'title': 'Cryptkeeper',
|
||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||
'uploader': 'Daniel Holbach',
|
||||
@@ -39,22 +47,22 @@ class MixcloudIE(InfoExtractor):
|
||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||
'uploader': 'Gilles Peterson Worldwide',
|
||||
'uploader_id': 'gillespeterson',
|
||||
'thumbnail': 're:https?://.*/images/',
|
||||
'thumbnail': 're:https?://.*',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _check_url(self, url, track_id, ext):
|
||||
try:
|
||||
# We only want to know if the request succeed
|
||||
# don't download the whole file
|
||||
self._request_webpage(
|
||||
HEADRequest(url), track_id,
|
||||
'Trying %s URL' % ext)
|
||||
return True
|
||||
except ExtractorError:
|
||||
return False
|
||||
# See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js
|
||||
@staticmethod
|
||||
def _decrypt_play_info(play_info):
|
||||
KEY = 'pleasedontdownloadourmusictheartistswontgetpaid'
|
||||
|
||||
play_info = base64.b64decode(play_info.encode('ascii'))
|
||||
|
||||
return ''.join([
|
||||
compat_chr(compat_ord(ch) ^ compat_ord(KEY[idx % len(KEY)]))
|
||||
for idx, ch in enumerate(play_info)])
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -68,19 +76,15 @@ class MixcloudIE(InfoExtractor):
|
||||
r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
|
||||
webpage, 'error message', default=None)
|
||||
|
||||
preview_url = self._search_regex(
|
||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"',
|
||||
webpage, 'preview url', default=None if message else NO_DEFAULT)
|
||||
encrypted_play_info = self._search_regex(
|
||||
r'm-play-info="([^"]+)"', webpage, 'play info')
|
||||
play_info = self._parse_json(
|
||||
self._decrypt_play_info(encrypted_play_info), track_id)
|
||||
|
||||
if message:
|
||||
if message and 'stream_url' not in play_info:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
|
||||
song_url = re.sub(r'audiocdn(\d+)', r'stream\1', preview_url)
|
||||
song_url = song_url.replace('/previews/', '/c/originals/')
|
||||
if not self._check_url(song_url, track_id, 'mp3'):
|
||||
song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||
if not self._check_url(song_url, track_id, 'm4a'):
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
song_url = play_info['stream_url']
|
||||
|
||||
PREFIX = (
|
||||
r'm-play-on-spacebar[^>]+'
|
||||
@@ -115,3 +119,201 @@ class MixcloudIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
}
|
||||
|
||||
|
||||
class MixcloudPlaylistBaseIE(InfoExtractor):
|
||||
_PAGE_SIZE = 24
|
||||
|
||||
def _find_urls_in_page(self, page):
|
||||
for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
|
||||
yield self.url_result(
|
||||
compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
|
||||
MixcloudIE.ie_key())
|
||||
|
||||
def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
|
||||
real_page_number = real_page_number or current_page + 1
|
||||
return self._download_webpage(
|
||||
'https://www.mixcloud.com/%s/' % path, video_id,
|
||||
note='Download %s (page %d)' % (page_name, current_page + 1),
|
||||
errnote='Unable to download %s' % page_name,
|
||||
query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
|
||||
headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
|
||||
def _tracks_page_func(self, page, video_id, page_name, current_page):
|
||||
resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
|
||||
|
||||
for item in self._find_urls_in_page(resp):
|
||||
yield item
|
||||
|
||||
def _get_user_description(self, page_content):
|
||||
return self._html_search_regex(
|
||||
r'<div[^>]+class="description-text"[^>]*>(.+?)</div>',
|
||||
page_content, 'user description', fatal=False)
|
||||
|
||||
|
||||
class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
|
||||
IE_NAME = 'mixcloud:user'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mixcloud.com/dholbach/',
|
||||
'info_dict': {
|
||||
'id': 'dholbach_uploads',
|
||||
'title': 'Daniel Holbach (uploads)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'http://www.mixcloud.com/dholbach/uploads/',
|
||||
'info_dict': {
|
||||
'id': 'dholbach_uploads',
|
||||
'title': 'Daniel Holbach (uploads)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'http://www.mixcloud.com/dholbach/favorites/',
|
||||
'info_dict': {
|
||||
'id': 'dholbach_favorites',
|
||||
'title': 'Daniel Holbach (favorites)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
},
|
||||
'params': {
|
||||
'playlist_items': '1-100',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.mixcloud.com/dholbach/listens/',
|
||||
'info_dict': {
|
||||
'id': 'dholbach_listens',
|
||||
'title': 'Daniel Holbach (listens)',
|
||||
'description': 'md5:327af72d1efeb404a8216c27240d1370',
|
||||
},
|
||||
'params': {
|
||||
'playlist_items': '1-100',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user')
|
||||
list_type = mobj.group('type')
|
||||
|
||||
# if only a profile URL was supplied, default to download all uploads
|
||||
if list_type is None:
|
||||
list_type = 'uploads'
|
||||
|
||||
video_id = '%s_%s' % (user_id, list_type)
|
||||
|
||||
profile = self._download_webpage(
|
||||
'https://www.mixcloud.com/%s/' % user_id, video_id,
|
||||
note='Downloading user profile',
|
||||
errnote='Unable to download user profile')
|
||||
|
||||
username = self._og_search_title(profile)
|
||||
description = self._get_user_description(profile)
|
||||
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(
|
||||
self._tracks_page_func,
|
||||
'%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
|
||||
self._PAGE_SIZE, use_cache=True)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, video_id, '%s (%s)' % (username, list_type), description)
|
||||
|
||||
|
||||
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
|
||||
IE_NAME = 'mixcloud:playlist'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
|
||||
'info_dict': {
|
||||
'id': 'RedBullThre3style_tokyo-finalists-2015',
|
||||
'title': 'National Champions 2015',
|
||||
'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
|
||||
'info_dict': {
|
||||
'id': 'maxvibes_jazzcat-on-ness-radio',
|
||||
'title': 'Jazzcat on Ness Radio',
|
||||
'description': 'md5:7bbbf0d6359a0b8cda85224be0f8f263',
|
||||
},
|
||||
'playlist_mincount': 23
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('user')
|
||||
playlist_id = mobj.group('playlist')
|
||||
video_id = '%s_%s' % (user_id, playlist_id)
|
||||
|
||||
profile = self._download_webpage(
|
||||
url, user_id,
|
||||
note='Downloading playlist page',
|
||||
errnote='Unable to download playlist page')
|
||||
|
||||
description = self._get_user_description(profile)
|
||||
playlist_title = self._html_search_regex(
|
||||
r'<span[^>]+class="[^"]*list-playlist-title[^"]*"[^>]*>(.*?)</span>',
|
||||
profile, 'playlist title')
|
||||
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(
|
||||
self._tracks_page_func,
|
||||
'%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, video_id, playlist_title, description)
|
||||
|
||||
|
||||
class MixcloudStreamIE(MixcloudPlaylistBaseIE):
|
||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
|
||||
IE_NAME = 'mixcloud:stream'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.mixcloud.com/FirstEar/stream/',
|
||||
'info_dict': {
|
||||
'id': 'FirstEar',
|
||||
'title': 'First Ear',
|
||||
'description': 'Curators of good music\nfirstearmusic.com',
|
||||
},
|
||||
'playlist_mincount': 192,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
|
||||
entries = []
|
||||
prev_page_url = None
|
||||
|
||||
def _handle_page(page):
|
||||
entries.extend(self._find_urls_in_page(page))
|
||||
return self._search_regex(
|
||||
r'm-next-page-url="([^"]+)"', page,
|
||||
'next page URL', default=None)
|
||||
|
||||
next_page_url = _handle_page(webpage)
|
||||
|
||||
for idx in itertools.count(0):
|
||||
if not next_page_url or prev_page_url == next_page_url:
|
||||
break
|
||||
|
||||
prev_page_url = next_page_url
|
||||
current_page = int(self._search_regex(
|
||||
r'\?page=(\d+)', next_page_url, 'next page number'))
|
||||
|
||||
next_page_url = _handle_page(self._fetch_tracks_page(
|
||||
'%s/stream' % user_id, user_id, 'stream', idx,
|
||||
real_page_number=current_page))
|
||||
|
||||
username = self._og_search_title(webpage)
|
||||
description = self._get_user_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, user_id, username, description)
|
||||
|
@@ -1,110 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class MooshareIE(InfoExtractor):
|
||||
IE_NAME = 'mooshare'
|
||||
IE_DESC = 'Mooshare.biz'
|
||||
_VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://mooshare.biz/8dqtk4bjbp8g',
|
||||
'md5': '4e14f9562928aecd2e42c6f341c8feba',
|
||||
'info_dict': {
|
||||
'id': '8dqtk4bjbp8g',
|
||||
'ext': 'mp4',
|
||||
'title': 'Comedy Football 2011 - (part 1-2)',
|
||||
'duration': 893,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://mooshare.biz/aipjtoc4g95j',
|
||||
'info_dict': {
|
||||
'id': 'aipjtoc4g95j',
|
||||
'ext': 'mp4',
|
||||
'title': 'Orange Caramel Dashing Through the Snow',
|
||||
'duration': 212,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id, 'Downloading page')
|
||||
|
||||
if re.search(r'>Video Not Found or Deleted<', page) is not None:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
|
||||
title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
|
||||
|
||||
download_form = {
|
||||
'op': 'download1',
|
||||
'id': video_id,
|
||||
'hash': hash_key,
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
'http://mooshare.biz/%s' % video_id, urlencode_postdata(download_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
|
||||
self._sleep(5, video_id)
|
||||
|
||||
video_page = self._download_webpage(request, video_id, 'Downloading video page')
|
||||
|
||||
thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
|
||||
duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
|
||||
duration = int(duration_str) if duration_str is not None else None
|
||||
|
||||
formats = []
|
||||
|
||||
# SD video
|
||||
mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'format_id': 'sd',
|
||||
'format': 'SD',
|
||||
})
|
||||
|
||||
# HD video
|
||||
mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('url'),
|
||||
'format_id': 'hd',
|
||||
'format': 'HD',
|
||||
})
|
||||
|
||||
# rtmp video
|
||||
mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
|
||||
if mobj is not None:
|
||||
formats.append({
|
||||
'url': mobj.group('rtmpurl'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': 'rtmp',
|
||||
'format': 'HD',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,17 +1,21 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
)
|
||||
|
||||
|
||||
class MusicPlayOnIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=100&play)=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://en.musicplayon.com/play?v=433377',
|
||||
'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
|
||||
'info_dict': {
|
||||
'id': '433377',
|
||||
'ext': 'mp4',
|
||||
@@ -20,15 +24,16 @@ class MusicPlayOnIE(InfoExtractor):
|
||||
'duration': 342,
|
||||
'uploader': 'ultrafish',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
url = self._URL_TEMPLATE % video_id
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -40,28 +45,14 @@ class MusicPlayOnIE(InfoExtractor):
|
||||
uploader = self._html_search_regex(
|
||||
r'<div>by <a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
|
||||
|
||||
formats = [
|
||||
{
|
||||
'url': 'http://media0-eu-nl.musicplayon.com/stream-mobile?id=%s&type=.mp4' % video_id,
|
||||
'ext': 'mp4',
|
||||
}
|
||||
]
|
||||
|
||||
manifest = self._download_webpage(
|
||||
'http://en.musicplayon.com/manifest.m3u8?v=%s' % video_id, video_id, 'Downloading manifest')
|
||||
|
||||
for entry in manifest.split('#')[1:]:
|
||||
if entry.startswith('EXT-X-STREAM-INF:'):
|
||||
meta, url, _ = entry.split('\n')
|
||||
params = dict(param.split('=') for param in meta.split(',')[1:])
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'tbr': int(params['BANDWIDTH']),
|
||||
'width': int(params['RESOLUTION'].split('x')[1]),
|
||||
'height': int(params['RESOLUTION'].split('x')[-1]),
|
||||
'format_note': params['NAME'].replace('"', '').strip(),
|
||||
})
|
||||
sources = self._parse_json(
|
||||
self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
formats = [{
|
||||
'url': compat_urlparse.urljoin(url, source['src']),
|
||||
'ext': mimetype2ext(source.get('type')),
|
||||
'format_note': source.get('data-res'),
|
||||
} for source in sources]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -1,80 +0,0 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class NerdistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nerdist\.com/vepisode/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.nerdist.com/vepisode/exclusive-which-dc-characters-w',
|
||||
'md5': '3698ed582931b90d9e81e02e26e89f23',
|
||||
'info_dict': {
|
||||
'display_id': 'exclusive-which-dc-characters-w',
|
||||
'id': 'RPHpvJyr',
|
||||
'ext': 'mp4',
|
||||
'title': 'Your TEEN TITANS Revealed! Who\'s on the show?',
|
||||
'thumbnail': 're:^https?://.*/thumbs/.*\.jpg$',
|
||||
'description': 'Exclusive: Find out which DC Comics superheroes will star in TEEN TITANS Live-Action TV Show on Nerdist News with Jessica Chobot!',
|
||||
'uploader': 'Eric Diaz',
|
||||
'upload_date': '20150202',
|
||||
'timestamp': 1422892808,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'''(?x)<script\s+(?:type="text/javascript"\s+)?
|
||||
src="https?://content\.nerdist\.com/players/([a-zA-Z0-9_]+)-''',
|
||||
webpage, 'video ID')
|
||||
timestamp = parse_iso8601(self._html_search_meta(
|
||||
'shareaholic:article_published_time', webpage, 'upload date'))
|
||||
uploader = self._html_search_meta(
|
||||
'shareaholic:article_author_name', webpage, 'article author')
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://content.nerdist.com/jw6/%s.xml' % video_id, video_id)
|
||||
video_info = doc.find('.//item')
|
||||
title = xpath_text(video_info, './title', fatal=True)
|
||||
description = xpath_text(video_info, './description')
|
||||
thumbnail = xpath_text(
|
||||
video_info, './{http://rss.jwpcdn.com/}image', 'thumbnail')
|
||||
|
||||
formats = []
|
||||
for source in video_info.findall('./{http://rss.jwpcdn.com/}source'):
|
||||
vurl = source.attrib['file']
|
||||
ext = determine_ext(vurl)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
vurl, video_id, entry_protocol='m3u8_native', ext='mp4',
|
||||
preference=0))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
vurl, video_id, fatal=False
|
||||
))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': ext,
|
||||
'url': vurl,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
}
|
@@ -7,8 +7,8 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class NewgroundsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/audio/listen/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.newgrounds.com/audio/listen/549479',
|
||||
'md5': 'fe6033d297591288fa1c1f780386f07a',
|
||||
'info_dict': {
|
||||
@@ -17,7 +17,16 @@ class NewgroundsIE(InfoExtractor):
|
||||
'title': 'B7 - BusMode',
|
||||
'uploader': 'Burn7',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.newgrounds.com/portal/view/673111',
|
||||
'md5': '3394735822aab2478c31b1004fe5e5bc',
|
||||
'info_dict': {
|
||||
'id': '673111',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dancin',
|
||||
'uploader': 'Squirrelman82',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -25,9 +34,11 @@ class NewgroundsIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, music_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r',"name":"([^"]+)",', webpage, 'music title')
|
||||
r'<title>([^>]+)</title>', webpage, 'title')
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r',"artist":"([^"]+)",', webpage, 'music uploader')
|
||||
[r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
|
||||
webpage, 'uploader')
|
||||
|
||||
music_url_json_string = self._html_search_regex(
|
||||
r'({"url":"[^"]+"),', webpage, 'music url') + '}'
|
||||
|
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class OnionStudiosIE(InfoExtractor):
|
||||
@@ -17,7 +20,7 @@ class OnionStudiosIE(InfoExtractor):
|
||||
'id': '2937',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hannibal charges forward, stops for a cocktail',
|
||||
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
|
||||
'description': 'md5:e786add7f280b7f0fe237b64cc73df76',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'The A.V. Club',
|
||||
'uploader_id': 'TheAVClub',
|
||||
@@ -42,9 +45,19 @@ class OnionStudiosIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
|
||||
if determine_ext(src) != 'm3u8': # m3u8 always results in 403
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'/(\d+)\.%s' % ext, src, 'height', default=None))
|
||||
formats.append({
|
||||
'format_id': ext + ('-%sp' % height if height else ''),
|
||||
'url': src,
|
||||
'height': height,
|
||||
'ext': ext,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -40,7 +40,7 @@ class Puls4IE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
error_message = self._html_search_regex(
|
||||
r'<div class="message-error">(.+?)</div>',
|
||||
r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
|
||||
webpage, 'error message', default=None)
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -94,19 +95,32 @@ class SportBoxEmbedIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hls = self._search_regex(
|
||||
r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]",
|
||||
webpage, 'hls file')
|
||||
formats = []
|
||||
|
||||
def cleanup_js(code):
|
||||
# desktop_advert_config contains complex Javascripts and we don't need it
|
||||
return js_to_json(re.sub(r'desktop_advert_config.*', '', code))
|
||||
|
||||
jwplayer_data = self._parse_json(self._search_regex(
|
||||
r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id,
|
||||
transform_source=cleanup_js)
|
||||
|
||||
hls_url = jwplayer_data.get('hls_url')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, ext='mp4', m3u8_id='hls'))
|
||||
|
||||
rtsp_url = jwplayer_data.get('rtsp_url')
|
||||
if rtsp_url:
|
||||
formats.append({
|
||||
'url': rtsp_url,
|
||||
'format_id': 'rtsp',
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"',
|
||||
webpage, 'thumbnail', default=None)
|
||||
title = jwplayer_data['node_title']
|
||||
thumbnail = jwplayer_data.get('image_url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
33
youtube_dl/extractor/tdslifeway.py
Normal file
33
youtube_dl/extractor/tdslifeway.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TDSLifewayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P<id>\d+)/index\.html'
|
||||
|
||||
_TEST = {
|
||||
# From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers
|
||||
'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb®istration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F',
|
||||
'info_dict': {
|
||||
'id': '3453494717001',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Gospel by Numbers',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'upload_date': '20140410',
|
||||
'description': 'Coming soon from T4G 2014!',
|
||||
'uploader_id': '2034960640001',
|
||||
'timestamp': 1397145591,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@@ -1,63 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TheOnionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/',
|
||||
'md5': '19eaa9a39cf9b9804d982e654dc791ee',
|
||||
'info_dict': {
|
||||
'id': '2133',
|
||||
'ext': 'mp4',
|
||||
'title': 'Man Wearing M&M Jacket Apparently Made In God\'s Image',
|
||||
'description': 'md5:cc12448686b5600baae9261d3e180910',
|
||||
'thumbnail': 're:^https?://.*\.jpg\?\d+$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'"videoId":\s(\d+),', webpage, 'video ID')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage)
|
||||
formats = []
|
||||
for src, type_ in sources:
|
||||
if type_ == 'video/mp4':
|
||||
formats.append({
|
||||
'format_id': 'mp4_sd',
|
||||
'preference': 1,
|
||||
'url': src,
|
||||
})
|
||||
elif type_ == 'video/webm':
|
||||
formats.append({
|
||||
'format_id': 'webm_sd',
|
||||
'preference': 0,
|
||||
'url': src,
|
||||
})
|
||||
elif type_ == 'application/x-mpegURL':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(src, display_id, preference=-1))
|
||||
else:
|
||||
self.report_warning(
|
||||
'Encountered unexpected format: %s' % type_)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
@@ -50,8 +50,6 @@ class ThePlatformBaseIE(OnceIE):
|
||||
else:
|
||||
formats.append(_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self._parse_smil_subtitles(meta, default_ns)
|
||||
|
||||
return formats, subtitles
|
||||
@@ -241,6 +239,7 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
|
||||
|
||||
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
ret = self.get_metadata(path, video_id)
|
||||
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
|
||||
|
@@ -65,6 +65,9 @@ class TudouIE(InfoExtractor):
|
||||
if quality:
|
||||
info_url += '&hd' + quality
|
||||
xml_data = self._download_xml(info_url, video_id, 'Opening the info XML page')
|
||||
error = xml_data.attrib.get('error')
|
||||
if error is not None:
|
||||
raise ExtractorError('Tudou said: %s' % error, expected=True)
|
||||
final_url = xml_data.text
|
||||
return final_url
|
||||
|
||||
|
@@ -260,6 +260,17 @@ class TwitterIE(InfoExtractor):
|
||||
'upload_date': '20140615',
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
}, {
|
||||
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
|
||||
# md5 constantly changes
|
||||
'info_dict': {
|
||||
'id': '719944021058060289',
|
||||
'ext': 'mp4',
|
||||
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
|
||||
'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
|
||||
'uploader_id': 'captainamerica',
|
||||
'uploader': 'Captain America',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -284,17 +295,6 @@ class TwitterIE(InfoExtractor):
|
||||
'title': username + ' - ' + title,
|
||||
}
|
||||
|
||||
card_id = self._search_regex(
|
||||
r'["\']/i/cards/tfw/v1/(\d+)', webpage, 'twitter card url', default=None)
|
||||
if card_id:
|
||||
card_url = 'https://twitter.com/i/cards/tfw/v1/' + card_id
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'TwitterCard',
|
||||
'url': card_url,
|
||||
})
|
||||
return info
|
||||
|
||||
mobj = re.search(r'''(?x)
|
||||
<video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
|
||||
<source[^>]+video-src="(?P<url>[^"]+)"
|
||||
|
@@ -1,57 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class UbuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://ubu.com/film/her_noise.html',
|
||||
'md5': '138d5652618bf0f03878978db9bef1ee',
|
||||
'info_dict': {
|
||||
'id': 'her_noise',
|
||||
'ext': 'm4v',
|
||||
'title': 'Her Noise - The Making Of (2007)',
|
||||
'duration': 3600,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title')
|
||||
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
|
||||
invscale=60)
|
||||
|
||||
formats = []
|
||||
FORMAT_REGEXES = [
|
||||
('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
|
||||
('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
|
||||
]
|
||||
preference = qualities([fid for fid, _ in FORMAT_REGEXES])
|
||||
for format_id, format_regex in FORMAT_REGEXES:
|
||||
m = re.search(format_regex, webpage)
|
||||
if m:
|
||||
formats.append({
|
||||
'url': m.group(1),
|
||||
'format_id': format_id,
|
||||
'preference': preference(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -41,6 +41,12 @@ class UstreamIE(InfoExtractor):
|
||||
'uploader': 'sportscanadatv',
|
||||
},
|
||||
'skip': 'This Pro Broadcaster has chosen to remove this video from the ustream.tv site.',
|
||||
}, {
|
||||
'url': 'http://www.ustream.tv/embed/10299409',
|
||||
'info_dict': {
|
||||
'id': '10299409',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -55,10 +61,12 @@ class UstreamIE(InfoExtractor):
|
||||
if m.group('type') == 'embed':
|
||||
video_id = m.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
desktop_video_id = self._html_search_regex(
|
||||
r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id')
|
||||
desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id
|
||||
return self.url_result(desktop_url, 'Ustream')
|
||||
content_video_ids = self._parse_json(self._search_regex(
|
||||
r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage,
|
||||
'content video IDs'), video_id)
|
||||
return self.playlist_result(
|
||||
map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids),
|
||||
video_id)
|
||||
|
||||
params = self._download_json(
|
||||
'https://api.ustream.tv/videos/%s.json' % video_id, video_id)
|
||||
|
@@ -2,11 +2,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class Varzesh3IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
|
||||
'md5': '2a933874cb7dce4366075281eb49e855',
|
||||
'info_dict': {
|
||||
@@ -15,8 +23,19 @@ class Varzesh3IE(InfoExtractor):
|
||||
'title': '۵ واکنش برتر دروازهبانان؛هفته ۲۶ بوندسلیگا',
|
||||
'description': 'فصل ۲۰۱۵-۲۰۱۴',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'HTTP 404 Error',
|
||||
}, {
|
||||
'url': 'http://video.varzesh3.com/video/112785/%D8%AF%D9%84%D9%87-%D8%B9%D9%84%DB%8C%D8%9B-%D8%B3%D8%AA%D8%A7%D8%B1%D9%87-%D9%86%D9%88%D8%B8%D9%87%D9%88%D8%B1-%D9%84%DB%8C%DA%AF-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AC%D8%B2%DB%8C%D8%B1%D9%87',
|
||||
'md5': '841b7cd3afbc76e61708d94e53a4a4e7',
|
||||
'info_dict': {
|
||||
'id': '112785',
|
||||
'ext': 'mp4',
|
||||
'title': 'دله علی؛ ستاره نوظهور لیگ برتر جزیره',
|
||||
'description': 'فوتبال 120',
|
||||
},
|
||||
'expected_warnings': ['description'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -26,15 +45,30 @@ class Varzesh3IE(InfoExtractor):
|
||||
video_url = self._search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), 'ویدیو ورزش 3 | ')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="matn">(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = clean_html(self._html_search_meta('description', webpage))
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage, default=None)
|
||||
if thumbnail is None:
|
||||
fb_sharer_url = self._search_regex(
|
||||
r'<a[^>]+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"',
|
||||
webpage, 'facebook sharer URL', fatal=False)
|
||||
sharer_params = compat_parse_qs(compat_urllib_parse_urlparse(fb_sharer_url).query)
|
||||
thumbnail = sharer_params.get('p[images][0]', [None])[0]
|
||||
|
||||
video_id = self._search_regex(
|
||||
r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
|
||||
webpage, display_id, default=display_id)
|
||||
webpage, display_id, default=None)
|
||||
if video_id is None:
|
||||
video_id = self._search_regex(
|
||||
'var\s+VideoId\s*=\s*(\d+);', webpage, 'video id',
|
||||
default=display_id)
|
||||
|
||||
return {
|
||||
'url': video_url,
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
@@ -14,13 +13,21 @@ class ViceIE(InfoExtractor):
|
||||
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
'duration': 725.983,
|
||||
},
|
||||
'params': {
|
||||
# Requires ffmpeg (m3u8 manifest)
|
||||
'skip_download': True,
|
||||
}, {
|
||||
'url': 'http://www.vice.com/video/how-to-hack-a-car',
|
||||
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
|
||||
'info_dict': {
|
||||
'id': '3jstaBeXgAs',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
|
||||
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
|
||||
'uploader_id': 'MotherboardTV',
|
||||
'uploader': 'Motherboard',
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
@@ -39,11 +46,14 @@ class ViceIE(InfoExtractor):
|
||||
try:
|
||||
embed_code = self._search_regex(
|
||||
r'embedCode=([^&\'"]+)', webpage,
|
||||
'ooyala embed code')
|
||||
ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
|
||||
'ooyala embed code', default=None)
|
||||
if embed_code:
|
||||
return self.url_result('ooyala:%s' % embed_code, 'Ooyala')
|
||||
youtube_id = self._search_regex(
|
||||
r'data-youtube-id="([^"]+)"', webpage, 'youtube id')
|
||||
return self.url_result(youtube_id, 'Youtube')
|
||||
except ExtractorError:
|
||||
raise ExtractorError('The page doesn\'t contain a video', expected=True)
|
||||
return self.url_result(ooyala_url, ie='Ooyala')
|
||||
|
||||
|
||||
class ViceShowIE(InfoExtractor):
|
||||
|
@@ -81,7 +81,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
\.
|
||||
)?
|
||||
vimeo(?P<pro>pro)?\.com/
|
||||
(?!channels/[^/?#]+/?(?:$|[?#])|(?:album|ondemand)/)
|
||||
(?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
|
||||
(?:.*?/)?
|
||||
(?:
|
||||
(?:
|
||||
@@ -90,6 +90,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
)?
|
||||
(?:videos?/)?
|
||||
(?P<id>[0-9]+)
|
||||
(?:/[\da-f]+)?
|
||||
/?(?:[?&].*)?(?:[#].*)?$
|
||||
'''
|
||||
IE_NAME = 'vimeo'
|
||||
@@ -232,6 +233,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'url': 'https://vimeo.com/7809605',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -1,52 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class WayOfTheMasterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.wayofthemaster.com/hbks.shtml',
|
||||
'md5': '5316b57487ada8480606a93cb3d18d24',
|
||||
'info_dict': {
|
||||
'id': 'hbks',
|
||||
'ext': 'mp4',
|
||||
'title': 'Intelligent Design vs. Evolution',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'<img src="images/title_[^"]+".*?alt="([^"]+)"',
|
||||
webpage, 'title', default=None)
|
||||
if title is None:
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.*?)</title>', webpage, 'page title')
|
||||
|
||||
url_base = self._search_regex(
|
||||
r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"',
|
||||
webpage, 'URL base')
|
||||
formats = [{
|
||||
'format_id': 'low',
|
||||
'quality': 1,
|
||||
'url': url_base + '_low.mp4',
|
||||
}, {
|
||||
'format_id': 'high',
|
||||
'quality': 2,
|
||||
'url': url_base + '_high.mp4',
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
class XboxClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
|
||||
_TEST = {
|
||||
'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
||||
'info_dict': {
|
||||
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
|
@@ -1818,20 +1818,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
def _extract_mix(self, playlist_id):
|
||||
# The mixes are generated from a single video
|
||||
# the id of the playlist is just 'RD' + video_id
|
||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, 'Downloading Youtube mix')
|
||||
ids = []
|
||||
last_id = playlist_id[-11:]
|
||||
for n in itertools.count(1):
|
||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
|
||||
new_ids = orderedSet(re.findall(
|
||||
r'''(?xs)data-video-username=".*?".*?
|
||||
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
|
||||
webpage))
|
||||
# Fetch new pages until all the videos are repeated, it seems that
|
||||
# there are always 51 unique videos.
|
||||
new_ids = [_id for _id in new_ids if _id not in ids]
|
||||
if not new_ids:
|
||||
break
|
||||
ids.extend(new_ids)
|
||||
last_id = ids[-1]
|
||||
|
||||
url_results = self._ids_to_results(ids)
|
||||
|
||||
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
|
||||
title_span = (
|
||||
search_title('playlist-title') or
|
||||
search_title('title long-title') or
|
||||
search_title('title'))
|
||||
title = clean_html(title_span)
|
||||
ids = orderedSet(re.findall(
|
||||
r'''(?xs)data-video-username=".*?".*?
|
||||
href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),
|
||||
webpage))
|
||||
url_results = self._ids_to_results(ids)
|
||||
|
||||
return self.playlist_result(url_results, playlist_id, title)
|
||||
|
||||
@@ -1987,8 +1999,8 @@ class YoutubeUserIE(YoutubeChannelIE):
|
||||
def suitable(cls, url):
|
||||
# Don't return True if the url can be extracted with other youtube
|
||||
# extractor, the regex would is too permissive and it would match.
|
||||
other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
|
||||
if any(ie.suitable(url) for ie in other_ies):
|
||||
other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
|
||||
if any(ie.suitable(url) for ie in other_yt_ies):
|
||||
return False
|
||||
else:
|
||||
return super(YoutubeUserIE, cls).suitable(url)
|
||||
|
@@ -175,7 +175,8 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
# Always use 'file:' because the filename may contain ':' (ffmpeg
|
||||
# interprets that as a protocol) or can start with '-' (-- is broken in
|
||||
# ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
|
||||
return 'file:' + fn
|
||||
# Also leave '-' intact in order not to break streaming to stdout.
|
||||
return 'file:' + fn if fn != '-' else fn
|
||||
|
||||
|
||||
class FFmpegExtractAudioPP(FFmpegPostProcessor):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.04.13'
|
||||
__version__ = '2016.04.19'
|
||||
|
Reference in New Issue
Block a user